### Important note
There was no close data in the Bloomberg terminal, so we assume that close is the open of the next day, due to the 24hr nature of the forex market.
However, gap up/down might occur during the weekends.
We have not accounted for that.

In this notebook, we will generate dataframes with features and save them for use in other notebooks.

# Initialisation

In [64]:
# Important packages
import pandas as pd
import numpy as np
import ta

from scipy.stats import linregress

In [65]:
# Load the raw data
df = pd.read_csv('GBPUSD.csv', names=['date','open','high','low','close'])

In [66]:
df.dropna(inplace=True)

In [67]:
df.shape

(5179, 5)

# Adding delta (which is a prediction target and NOT A FEATURE)

In [68]:
for i in range(len(df)-1):
    if df.at[i+1,'high'] > df.at[i,'high']:
        df.at[i,'delta'] = 1
    else:
        df.at[i,'delta'] = 0

# Adding features

### Be careful not to commit the off by one error!

## Technical Analysis features

In [69]:
df['wclose'] = (df['close']*2+df['high']+df['low'])/4

In [70]:
def add_SO(i):
    ind_SO = ta.momentum.StochasticOscillator(high=df['high'],low=df['low'],close=df['close'],n=i)
    name = 'so_%s' %(i)
    df[name] = ind_SO.stoch()
    return

for i in [3,4,5,8,9,10]:
    add_SO(i)

In [71]:
def add_WR(i):
    ind_WR = ta.momentum.WilliamsRIndicator(high=df['high'],low=df['low'],close=df['close'],lbp=i)
    name = 'wr_%s' %(i)
    df[name] = ind_WR.wr()
    return

for i in [6,7,8,9,10]:
    add_WR(i)

In [72]:
def add_ROC(i):
    ind_ROC = ta.momentum.ROCIndicator(close=df['close'],n=i)
    name = 'roc_%s' %(i)
    df[name] = ind_ROC.roc()
    return

for i in [12,13,14,15]:
    add_ROC(i)

In [73]:
def add_WCP(i):
    ind_WCP = ta.trend.EMAIndicator(close=df['wclose'],n=i)
    name = 'wcp_%s' %(i)
    df[name] = ind_WCP.ema_indicator()
    return

add_WCP(15)

In [74]:
def add_MACD(i,j):
    ind_MACD = ta.trend.MACD(close=df['close'],n_fast=i,n_slow=j)
    name ='macd_%s_%s' %(i,j)
    df[name] = ind_MACD.macd()
    return

add_MACD(15,30)  

In [75]:
def add_CCI(i):
    ind_CCI = ta.trend.cci(high=df['high'],low=df['low'],close=df['close'],n=i)
    name = 'cci_%s' %(i)
    df[name] = ind_CCI
    return

add_CCI(15)

## Signal Processing features

In [76]:
for i in range(1,len(df)):
    df.at[i,'hi_avg_2'] =  (df.at[i-1,'high'] + df.at[i,'high'])/2
    df.at[i,'lo_avg_2'] =  (df.at[i-1,'low'] + df.at[i,'low'])/2
    df.at[i,'hilo_avg_2'] = (df.at[i,'hi_avg_2'] + df.at[i,'lo_avg_2'])/2
    df.at[i,'hilo_avg'] = (df.at[i,'high'] + df.at[i,'low'])/2

In [78]:
df.dropna(inplace=True)

In [79]:
df.shape

(5149, 29)

In [80]:
def add_slope(i):
    for j in range(i-1,len(df)):
        a=df['high'][j-(i-1):j+1]
        b=[]
        for k in range(i):
            b.append(k)
        name = 'slope_%s' %(i)
        df.at[j,name] = linregress(a,b).slope
    
for i in [3,4,5,10,20,30]:
    add_slope(i)

In [81]:
df.dropna(inplace=True)

In [82]:
df.shape

(5120, 35)

## Run the cell below if you want to save the dataframe for future use

In [84]:
df.to_parquet('../Dataframes/df.parquet.gzip',compression='gzip')