In [2]:
import pandas as pd
import numpy as np

In [3]:
## functions necessary to group the minute level to hours/days
def get_first_element(x):
    return x.iloc[0]

def get_last_element(x):
    return x.iloc[len(x)-1]

# indicate the indicator as positive when the price is within specified threshold 
##ranges compared to the open for the long strategy
def infer_profit_long_indicator(x,threshold=1.02):
    #max_close=max(x['next_1close'],x['next_2close'],x['next_3close'])
    max_close=x['next_1high']
    if max_close >= threshold*x['Open']:
        return 1
    else:
        return 0

def infer_profit_short_indicator(x,threshold=0.98):
    if x['next_1low'] <= threshold*x['Open']:
        return 1
    else:
        return 0
    
# to normalize the prices of 
def zscore_func_improved(x,window_size=50):
    rolling_mean=x.rolling(window=window_size).mean().bfill()
    rolling_std = x.rolling(window=window_size).std().bfill()
    return (x-rolling_mean)/rolling_std

## Preparing Dalian Palm oil data

In [4]:
palmoil_data=pd.read_excel('data/Dalian Palm Olein 1 Mins_updated.xlsx',skiprows=3)
palmoil_data=palmoil_data.drop(0)

palmoil_data['Date']=palmoil_data['Dates'].dt.date
palmoil_data['Hour']=palmoil_data['Dates'].dt.hour

# Compute daily and hourly data frames
palmoil_data_daily=palmoil_data.groupby(['Date'],as_index=False).agg({'Open':lambda x: get_first_element(x),'Close': lambda x:get_last_element(x),
                              'High':lambda x:np.max(x),'Low':lambda x:np.min(x),'Volume':'sum'})
palmoil_data_hourly=palmoil_data.groupby(['Date','Hour'],as_index=False).agg({'Open':lambda x: get_first_element(x),'Close': lambda x:get_last_element(x),
                              'High':lambda x:np.max(x),'Low':lambda x:np.min(x),'Volume':'sum'})

palmoil_data_daily=palmoil_data_daily.set_index(pd.to_datetime(palmoil_data_daily['Date']))

## Preparing FCPO Data

In [6]:
fcpo_data=pd.read_csv('data/FCPO_2014_2018_0719.csv')

#remove identity placeholder columns
fcpo_data=fcpo_data[fcpo_data['Time']!=1805]

#convert the date time values to string and derive the hour field
fcpo_data['Date']=fcpo_data['Date'].apply(lambda x:str(x))
fcpo_data['Hour']=fcpo_data['Time'].apply(lambda x:str(x)[0:2])

# Compute daily and hourly data frames
fcpo_data_daily=fcpo_data.groupby(['Date'],as_index=False).agg({'Open':lambda x: get_first_element(x),'Close': lambda x:get_last_element(x),
                              'High':lambda x:np.max(x),'Low':lambda x:np.min(x),'Volume':'sum'})
fcpo_data_hourly=fcpo_data.groupby(['Date','Hour'],as_index=False).agg({'Open':lambda x: get_first_element(x),'Close': lambda x:get_last_element(x),
                              'High':lambda x:np.max(x),'Low':lambda x:np.min(x),'Volume':'sum'})

fcpo_data_daily=fcpo_data_daily.set_index(pd.to_datetime(fcpo_data_daily['Date']))
fcpo_data_hourly=fcpo_data_hourly.set_index(pd.to_datetime(fcpo_data_hourly['Date']+'-'+fcpo_data_hourly['Hour']))

In [7]:
## infer the profit indicators based on the future closing/high or low  prices
# shift the output of next day into the daily data frame
fcpo_data_daily=fcpo_data_daily.assign(next_1close=fcpo_data_daily['Close'].shift(-1),
                      next_1high=fcpo_data_daily['High'].shift(-1),
                      next_1low=fcpo_data_daily['Low'].shift(-1),
                      next_1open=fcpo_data_daily['Open'].shift(-1),
                      prev_1close=fcpo_data_daily['Close'].shift(1),                 
                      )

fcpo_data_daily=fcpo_data_daily.assign(next_open_change_pct=(fcpo_data_daily['Open']/fcpo_data_daily['prev_1close'])*100-100,
                            lprofit_ind=fcpo_data_daily.apply(lambda x:infer_profit_long_indicator(x),axis=1),
                            sprofit_ind=fcpo_data_daily.apply(lambda x:infer_profit_short_indicator(x),axis=1))

fcpo_data_daily['next_open_change_pct']=fcpo_data_daily['next_open_change_pct'].bfill()
fcpo_data_daily=fcpo_data_daily.drop(columns=['Date'])

In [8]:
## infer the profit indicators based on the future closing prices
# shift the output of next 3 time periods into the daily data frame
fcpo_data_hourly=fcpo_data_hourly.assign(next_1close=fcpo_data_hourly['Close'].shift(-1),
                      next_1high=fcpo_data_hourly['High'].shift(-1),
                      next_1low=fcpo_data_hourly['Low'].shift(-1),
                      next_1open=fcpo_data_hourly['Open'].shift(-1))

fcpo_data_hourly=fcpo_data_hourly.assign(lprofit_ind=fcpo_data_hourly.apply(lambda x:infer_profit_long_indicator(x),axis=1),
                                       sprofit_ind=fcpo_data_hourly.apply(lambda x:infer_profit_short_indicator(x),axis=1))

fcpo_data_hourly=fcpo_data_hourly.drop(columns=['Date','Hour'])

In [9]:
hdf_store=pd.HDFStore('processed_dta.h5')

In [12]:
hdf_store.put('fcpo_data_daily',fcpo_data_daily)

hdf_store.put('fcpo_data_hourly',fcpo_data_hourly)

hdf_store.put('palmoil_data_daily',palmoil_data_daily)