# Initialisation

In [6]:
# Important packages
import pandas as pd
import numpy as np
import ta
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.style.use('seaborn')

from scipy.stats import linregress

# Charting stuff
#import plotly
#import plotly.express as px
#import plotly.graph_objects as go
#import cufflinks as cf
#from datetime import datetime

In [7]:
# Load the raw data
df_long = pd.read_csv('./FX Data/GBPUSD.csv', names=['date','open','high','low','close'])
len(df_long)

5180

## Important!
Run the cell below to import the dataframe with features so that you can save computational time.
You can then skip to the classifer part right away as the features have already been added.

You need to have the pyarrow package.

In [9]:
df = pd.read_parquet('./Dataframes/df.parquet.gzip')

Run this if you want to create a dataframe with new features.

In [None]:
# Limit to small case for easy testing, will change back to whole dataframe eventually
df = df_long

# Adding features
## Technical Analysis features

In [4]:
df['wclose'] = (df['close']*2+df['high']+df['low'])/4

In [5]:
for i in range(1,len(df)):
    if df.at[i,'high'] > df.at[i-1,'high']:
        df.at[i,'delta'] = 1
    else:
        df.at[i,'delta'] = 0

In [6]:
def add_SO(i):
    ind_SO = ta.momentum.StochasticOscillator(high=df['high'],low=df['low'],close=df['close'],n=i)
    name = 'so_%s' %(i)
    df[name] = ind_SO.stoch()
    return

for i in [3,4,5,8,9,10]:
    add_SO(i)

In [7]:
def add_WR(i):
    ind_WR = ta.momentum.WilliamsRIndicator(high=df['high'],low=df['low'],close=df['close'],lbp=i)
    name = 'wr_%s' %(i)
    df[name] = ind_WR.wr()
    return

for i in [6,7,8,9,10]:
    add_WR(i)

In [8]:
def add_ROC(i):
    ind_ROC = ta.momentum.ROCIndicator(close=df['close'],n=i)
    name = 'roc_%s' %(i)
    df[name] = ind_ROC.roc()
    return

for i in [12,13,14,15]:
    add_ROC(i)

In [9]:
def add_WCP(i):
    ind_WCP = ta.trend.EMAIndicator(close=df['wclose'],n=i)
    name = 'wcp_%s' %(i)
    df[name] = ind_WCP.ema_indicator()
    return

add_WCP(15)

In [10]:
def add_MACD(i,j):
    ind_MACD = ta.trend.MACD(close=df['close'],n_fast=i,n_slow=j)
    name ='macd_%s_%s' %(i,j)
    df[name] = ind_MACD.macd()
    return

add_MACD(15,30)  

In [11]:
def add_CCI(i):
    ind_CCI = ta.trend.cci(high=df['high'],low=df['low'],close=df['close'],n=i)
    name = 'cci_%s' %(i)
    df[name] = ind_CCI
    return

add_CCI(15)

## Signal Processing features

In [12]:
for i in range(1,len(df)):
    df.at[i,'hi_avg_2'] =  (df.at[i-1,'high'] + df.at[i,'high'])/2
    df.at[i,'lo_avg_2'] =  (df.at[i-1,'low'] + df.at[i,'low'])/2
    df.at[i,'hilo_avg_2'] = (df.at[i,'hi_avg_2'] + df.at[i,'lo_avg_2'])/2
    df.at[i,'hilo_avg'] = (df.at[i,'high'] + df.at[i,'low'])/2

In [13]:
def add_slope(i):
    for j in range(i-1,len(df)):
        a=df['high'][j-(i-1):j+1]
        b=[]
        for k in range(i):
            b.append(k)
        name = 'slope_%s' %(i)
        df.at[j,name] = linregress(a,b).slope
    
for i in [3,4,5,10,20,30]:
    add_slope(i)

  return (a < x) & (x < b)
  return (a < x) & (x < b)
  cond2 = cond0 & (x <= _a)


In [10]:
df = df[50:]

In [11]:
df.shape

(5080, 35)

## Run the cell below if you want to save the dataframe for future use

In [21]:
df.to_parquet('./Dataframes/df.parquet.gzip',compression='gzip')

# Classifiers

<img src="files/Images/algo cheat sheet.png">

In [33]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import accuracy_score

In [24]:
def get_feature_matrix(start_ind, end_ind):
    return df[start_ind:end_ind].drop(columns=['date','open','high','low','close','wclose','delta'])
def get_training_labels(start_ind, end_ind):
    return df[start_ind:end_ind]['delta']

## Random Forest Classifier

In [25]:
# "feature matrix a" and "training labels a"
ft_mtx_a = get_feature_matrix(0,2000)
tr_lbl_a = get_training_labels(0,2000)

In [28]:
# "prediction matrix a" and "real label a"
pdr_mtx_a = get_feature_matrix(2000,2250)
rl_lbl_a = get_training_labels(2000,2250)

In [37]:
clf_frst = RandomForestClassifier(max_depth=2, random_state=0)
clf_frst.fit(ft_mtx_a, tr_lbl_a)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=2, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=0, verbose=0,
                       warm_start=False)

In [38]:
accuracy_score(rl_lbl_a,clf_frst.predict(pdr_mtx_a))

0.804

In [39]:
clf_frst.predict(pdr_mtx_a)

array([0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 0., 1., 0., 0., 0., 0.,
       0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1.,
       1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0.,
       1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 1., 1.,
       1., 1., 0., 0., 0., 1., 1., 1., 1., 1., 1., 0., 0., 0., 1., 1., 1.,
       1., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 0.,
       0., 1., 1., 0., 0., 0., 0., 0., 1., 0., 0., 1., 1., 1., 1., 0., 0.,
       0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 1.,
       1., 1., 1., 1., 1., 0., 1., 0., 1., 0., 0., 0., 0., 1., 1., 0., 0.,
       0., 0., 1., 1., 0., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 0., 0.,
       0., 0., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1.,
       1., 1., 1., 1., 1.

## Support Vector Machine Classifier

# Charts (Not using these yet)

In [7]:
#fig_cdls = go.Figure(data=[go.Candlestick(x=df['Date'][0:100],open=df['Open'],high=df['High'],low=df['Low'],close=df['Close'])])
#fig_cdls.show()

In [8]:
#fig = px.line(df[0:500],x='Date',y='Close')
#fig.show()

In [9]:
# ind_EMA = ta.trend.EMAIndicator(close=df['Close'],n=10,fillna=False)

# df['EMA'] = ind_EMA.ema_indicator()

In [12]:
# fig_ind = go.Figure()

# fig_ind.update_layout(xaxis_range=[0,50],
#                       yaxis_range=(1.5,1.7),
#                  title_text = "Close and EMA")

# fig_ind.add_trace(go.Scatter(
#                     x=df['Date'],
#                     y=df['Close'],
#                     name='Close',
#                     line_color='dimgray',
#                     opacity=0.4))

# fig_ind.add_trace(go.Scatter(
#                     x=df['Date'],
#                     y=df['EMA'],
#                     name='EMA',
#                     line_color='deepskyblue',
#                     opacity=0.4))

# fig_ind.show()

In [14]:
df.head()


Unnamed: 0,date,open,high,low,close,wclose,delta,so_3,so_4,so_5,...,hi_avg_2,lo_avg_2,hilo_avg_2,hilo_avg,slope_3,slope_4,slope_5,slope_10,slope_20,slope_30
100,22/5/2000,1.4863,1.4958,1.4826,1.4913,1.49025,1.0,75.117371,69.565217,50.314465,...,1.4931,1.47895,1.486025,1.4892,-35.169988,-196.008298,-207.748685,-170.076397,-182.915624,-241.4219
101,23/5/2000,1.4913,1.4938,1.4731,1.4768,1.480125,0.0,16.299559,15.744681,14.68254,...,1.4948,1.47785,1.486325,1.48345,228.085868,-65.473592,-267.117314,-184.167125,-184.105253,-236.52517
102,24/5/2000,1.4768,1.4802,1.4686,1.4735,1.47395,0.0,18.014706,18.014706,17.5,...,1.487,1.47085,1.478925,1.4744,-108.253146,-112.982602,-164.621036,-198.948711,-184.13575,-228.69168
103,25/5/2000,1.4735,1.4801,1.4667,1.4708,1.4721,0.0,15.129151,14.089347,14.089347,...,1.48015,1.46765,1.4739,1.4734,-110.288198,-140.102249,-161.988974,-193.175861,-188.123439,-224.019419
104,26/5/2000,1.4708,1.4927,1.4702,1.4895,1.485475,1.0,87.692308,84.132841,78.350515,...,1.4864,1.46845,1.477425,1.48145,119.040061,-9.871095,-83.449352,-197.293609,-195.963969,-223.984239
