# Initialisation

In [88]:
# Important packages
import pandas as pd
import numpy as np
import ta
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.style.use('seaborn')

from scipy.stats import linregress

In [90]:
# Load the raw data
df_long = pd.read_csv('../FX Data/GBPUSD.csv', names=['date','open','high','low','close'])
len(df_long)

5180

## Important!
Run the cell below to import the dataframe with features so that you can save computational time.
You can then skip to the classifer part right away as the features have already been added.

You need to have the pyarrow package.

In [48]:
df = pd.read_parquet('../Dataframes/df.parquet.gzip')

Run this if you want to create a dataframe with new features.

In [49]:
df.head()

Unnamed: 0,date,open,high,low,close,wclose,delta,so_3,so_4,so_5,...,hi_avg_2,lo_avg_2,hilo_avg_2,hilo_avg,slope_3,slope_4,slope_5,slope_10,slope_20,slope_30
50,13/3/2000,1.5773,1.5829,1.5761,1.5786,1.57905,0.0,34.042553,29.078014,47.368421,...,1.58335,1.57575,1.57955,1.5795,-1051.660517,-480.169784,-124.55239,-374.380286,-405.042029,-553.768912
51,14/3/2000,1.5786,1.5795,1.5682,1.5787,1.576275,0.0,67.307692,63.253012,51.470588,...,1.5812,1.57215,1.576675,1.57385,-418.01685,-528.634361,-464.546547,-528.940484,-418.450089,-548.468327
52,15/3/2000,1.5787,1.5787,1.5699,1.5733,1.5738,0.0,34.693878,32.692308,30.722892,...,1.5791,1.56905,1.574075,1.5743,-422.252011,-497.671324,-568.730181,-608.062,-416.686639,-543.624365
53,16/3/2000,1.5733,1.5775,1.5692,1.5765,1.574925,0.0,73.451327,56.462585,53.205128,...,1.5781,1.56955,1.573825,1.57335,-986.842105,-527.622595,-562.098501,-598.90398,-423.601117,-539.882002
54,17/3/2000,1.5765,1.5766,1.5702,1.5753,1.57435,0.0,64.210526,62.831858,48.29932,...,1.57705,1.5697,1.573375,1.5734,-945.945946,-1004.56621,-619.904891,-605.513402,-425.768716,-543.211462


In [91]:
# Limit to small case for easy testing, will change back to whole dataframe eventually
df = df_long

# Adding features
## Technical Analysis features

In [4]:
df['wclose'] = (df['close']*2+df['high']+df['low'])/4

In [5]:
for i in range(1,len(df)):
    if df.at[i,'high'] > df.at[i-1,'high']:
        df.at[i,'delta'] = 1
    else:
        df.at[i,'delta'] = 0

In [6]:
def add_SO(i):
    ind_SO = ta.momentum.StochasticOscillator(high=df['high'],low=df['low'],close=df['close'],n=i)
    name = 'so_%s' %(i)
    df[name] = ind_SO.stoch()
    return

for i in [3,4,5,8,9,10]:
    add_SO(i)

In [7]:
def add_WR(i):
    ind_WR = ta.momentum.WilliamsRIndicator(high=df['high'],low=df['low'],close=df['close'],lbp=i)
    name = 'wr_%s' %(i)
    df[name] = ind_WR.wr()
    return

for i in [6,7,8,9,10]:
    add_WR(i)

In [8]:
def add_ROC(i):
    ind_ROC = ta.momentum.ROCIndicator(close=df['close'],n=i)
    name = 'roc_%s' %(i)
    df[name] = ind_ROC.roc()
    return

for i in [12,13,14,15]:
    add_ROC(i)

In [9]:
def add_WCP(i):
    ind_WCP = ta.trend.EMAIndicator(close=df['wclose'],n=i)
    name = 'wcp_%s' %(i)
    df[name] = ind_WCP.ema_indicator()
    return

add_WCP(15)

In [10]:
def add_MACD(i,j):
    ind_MACD = ta.trend.MACD(close=df['close'],n_fast=i,n_slow=j)
    name ='macd_%s_%s' %(i,j)
    df[name] = ind_MACD.macd()
    return

add_MACD(15,30)  

In [11]:
def add_CCI(i):
    ind_CCI = ta.trend.cci(high=df['high'],low=df['low'],close=df['close'],n=i)
    name = 'cci_%s' %(i)
    df[name] = ind_CCI
    return

add_CCI(15)

## Signal Processing features

In [12]:
for i in range(1,len(df)):
    df.at[i,'hi_avg_2'] =  (df.at[i-1,'high'] + df.at[i,'high'])/2
    df.at[i,'lo_avg_2'] =  (df.at[i-1,'low'] + df.at[i,'low'])/2
    df.at[i,'hilo_avg_2'] = (df.at[i,'hi_avg_2'] + df.at[i,'lo_avg_2'])/2
    df.at[i,'hilo_avg'] = (df.at[i,'high'] + df.at[i,'low'])/2

In [13]:
def add_slope(i):
    for j in range(i-1,len(df)):
        a=df['high'][j-(i-1):j+1]
        b=[]
        for k in range(i):
            b.append(k)
        name = 'slope_%s' %(i)
        df.at[j,name] = linregress(a,b).slope
    
for i in [3,4,5,10,20,30]:
    add_slope(i)

  return (a < x) & (x < b)
  return (a < x) & (x < b)
  cond2 = cond0 & (x <= _a)


In [14]:
df = df[50:]

In [20]:
df.shape

(5130, 35)

## Run the cell below if you want to save the dataframe for future use

In [21]:
df.to_parquet('./Dataframes/df.parquet.gzip',compression='gzip')

# Charts (Not using these yet)

In [7]:
#fig_cdls = go.Figure(data=[go.Candlestick(x=df['Date'][0:100],open=df['Open'],high=df['High'],low=df['Low'],close=df['Close'])])
#fig_cdls.show()

In [8]:
#fig = px.line(df[0:500],x='Date',y='Close')
#fig.show()

In [9]:
# ind_EMA = ta.trend.EMAIndicator(close=df['Close'],n=10,fillna=False)

# df['EMA'] = ind_EMA.ema_indicator()

In [10]:
# fig_ind = go.Figure()

# fig_ind.update_layout(xaxis_range=[0,50],
#                       yaxis_range=(1.5,1.7),
#                  title_text = "Close and EMA")

# fig_ind.add_trace(go.Scatter(
#                     x=df['Date'],
#                     y=df['Close'],
#                     name='Close',
#                     line_color='dimgray',
#                     opacity=0.4))

# fig_ind.add_trace(go.Scatter(
#                     x=df['Date'],
#                     y=df['EMA'],
#                     name='EMA',
#                     line_color='deepskyblue',
#                     opacity=0.4))

# fig_ind.show()