In [1]:
import pandas as pd
import numpy as np
import plotly as py
from plotly import tools
import plotly.graph_objs as go
from feature_functions import *
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Variables:
future = 15
averages = [5,10,15,20,30,50,70,100,200,300]


# Loading the data
df = pd.read_csv("data/EURUSDhour.csv")
df.columns = ['date','open','high','low','close','volume']
df.date = pd.to_datetime(df.date, format='%d.%m.%Y %H:%M:%S.%f')
df = df.set_index(df.date)
df = df[['open','high','low','close','volume']]

# Drop elements where there is no mouvement (Market closed) like in the weekends
df = df.drop_duplicates(keep=False)
df.head(2)

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-01-01 22:00:00,1.05236,1.05253,1.05221,1.05227,143.54
2017-01-01 23:00:00,1.05227,1.05426,1.05226,1.05282,253.14
2017-01-02 00:00:00,1.05283,1.05283,1.05268,1.05281,131.4
2017-01-02 01:00:00,1.05282,1.05286,1.05225,1.0524,273.94
2017-01-02 02:00:00,1.05239,1.0524,1.05164,1.0522,258.14


In [2]:
# To concat the original dataframe with averages
df_with_averages = df
averages_result = moving_averages(df, averages)

for average in averages :
    df_with_averages = pd.concat([df_with_averages, averages_result[average]],axis=1)

df_with_averages['result'] = create_results(df , 'close', future)
df_with_averages = df_with_averages.dropna()
df_with_averages.head(2)

Unnamed: 0_level_0,open,high,low,close,volume,MA5 open,MA5 high,MA5 low,MA5 close,MA10 open,...,MA100 close,MA200 open,MA200 high,MA200 low,MA200 close,MA300 open,MA300 high,MA300 low,MA300 close,result
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-01-18 09:00:00,1.06915,1.06964,1.06849,1.06959,11518.4004,1.068656,1.069580,1.067952,1.068618,1.069300,...,1.064291,1.060193,1.061190,1.059271,1.060271,1.056151,1.057140,1.055246,1.056224,0.0
2017-01-18 10:00:00,1.06960,1.06965,1.06800,1.06860,11601.0996,1.068616,1.069504,1.067924,1.068646,1.069292,...,1.064339,1.060248,1.061242,1.059321,1.060320,1.056209,1.057197,1.055299,1.056278,0.0
2017-01-18 11:00:00,1.06855,1.06886,1.06792,1.06868,9321.4600,1.068632,1.069440,1.067936,1.068766,1.069050,...,1.064396,1.060297,1.061290,1.059370,1.060369,1.056263,1.057246,1.055351,1.056331,0.0
2017-01-18 12:00:00,1.06870,1.06887,1.06651,1.06655,9303.3398,1.068760,1.069500,1.067716,1.068512,1.068930,...,1.064418,1.060347,1.061338,1.059414,1.060412,1.056316,1.057300,1.055397,1.056377,0.0
2017-01-18 13:00:00,1.06659,1.06737,1.06557,1.06685,17562.5703,1.068518,1.068878,1.067298,1.068054,1.068654,...,1.064446,1.060389,1.061383,1.059453,1.060454,1.056362,1.057348,1.055442,1.056425,0.0
2017-01-18 14:00:00,1.06685,1.06893,1.06652,1.06846,16889.8809,1.068058,1.068736,1.066904,1.067828,1.068357,...,1.064492,1.060432,1.061434,1.059498,1.060507,1.056410,1.057403,1.055491,1.056479,0.0
2017-01-18 15:00:00,1.06843,1.07031,1.06807,1.06954,18909.0703,1.067824,1.068868,1.066918,1.068016,1.068220,...,1.064535,1.060485,1.061490,1.059551,1.060562,1.056464,1.057463,1.055545,1.056537,0.0
2017-01-18 16:00:00,1.06955,1.06964,1.06768,1.06827,17272.5195,1.068024,1.069024,1.066870,1.067934,1.068328,...,1.064543,1.060540,1.061539,1.059599,1.060607,1.056522,1.057520,1.055598,1.056591,0.0
2017-01-18 17:00:00,1.06826,1.06920,1.06755,1.06832,10990.3203,1.067936,1.069090,1.067078,1.068288,1.068348,...,1.064574,1.060585,1.061581,1.059643,1.060648,1.056576,1.057578,1.055652,1.056647,0.0
2017-01-18 18:00:00,1.06832,1.06837,1.06638,1.06669,10246.1299,1.068282,1.069290,1.067240,1.068256,1.068400,...,1.064582,1.060626,1.061619,1.059680,1.060678,1.056632,1.057633,1.055703,1.056697,0.0


In [8]:
# The data
interval = 20
run_length = 300
correct_predictions = 0
for i in range(run_length):
    X = df_with_averages.iloc[:df_with_averages.shape[0]- (i*interval)-1,:-1]
    y = df_with_averages.result

    # Split the data
    X_train= X.iloc[:X.shape[0]-1,:]
    X_test= X.iloc[X.shape[0]-1:X.shape[0],:]
    y_train= y[:X.shape[0]-1]
    y_test =y [X.shape[0]-1:X.shape[0]]

    clf = GradientBoostingClassifier(random_state=5, learning_rate=0.1, n_estimators=100)
    clf.fit(X_train, y_train) 

    # Predictions
    predicted = clf.predict(X_test)
    print (str(accuracy_score(y_test, predicted)) + str(y_test.index))
    correct_predictions += accuracy_score(y_test, predicted)

1.0DatetimeIndex(['2018-07-27 04:00:00'], dtype='datetime64[ns]', name='date', freq=None)
1.0DatetimeIndex(['2018-07-26 08:00:00'], dtype='datetime64[ns]', name='date', freq=None)
1.0DatetimeIndex(['2018-07-25 12:00:00'], dtype='datetime64[ns]', name='date', freq=None)
1.0DatetimeIndex(['2018-07-24 16:00:00'], dtype='datetime64[ns]', name='date', freq=None)
1.0DatetimeIndex(['2018-07-23 20:00:00'], dtype='datetime64[ns]', name='date', freq=None)
0.0DatetimeIndex(['2018-07-23'], dtype='datetime64[ns]', name='date', freq=None)
1.0DatetimeIndex(['2018-07-20 04:00:00'], dtype='datetime64[ns]', name='date', freq=None)
1.0DatetimeIndex(['2018-07-19 08:00:00'], dtype='datetime64[ns]', name='date', freq=None)
1.0DatetimeIndex(['2018-07-18 12:00:00'], dtype='datetime64[ns]', name='date', freq=None)
0.0DatetimeIndex(['2018-07-17 16:00:00'], dtype='datetime64[ns]', name='date', freq=None)
0.0DatetimeIndex(['2018-07-16 20:00:00'], dtype='datetime64[ns]', name='date', freq=None)
1.0DatetimeIndex(['

1.0DatetimeIndex(['2018-04-10 16:00:00'], dtype='datetime64[ns]', name='date', freq=None)
1.0DatetimeIndex(['2018-04-09 20:00:00'], dtype='datetime64[ns]', name='date', freq=None)
1.0DatetimeIndex(['2018-04-09'], dtype='datetime64[ns]', name='date', freq=None)
1.0DatetimeIndex(['2018-04-06 04:00:00'], dtype='datetime64[ns]', name='date', freq=None)
1.0DatetimeIndex(['2018-04-05 08:00:00'], dtype='datetime64[ns]', name='date', freq=None)
0.0DatetimeIndex(['2018-04-04 12:00:00'], dtype='datetime64[ns]', name='date', freq=None)
1.0DatetimeIndex(['2018-04-03 16:00:00'], dtype='datetime64[ns]', name='date', freq=None)
0.0DatetimeIndex(['2018-04-02 20:00:00'], dtype='datetime64[ns]', name='date', freq=None)
0.0DatetimeIndex(['2018-04-02'], dtype='datetime64[ns]', name='date', freq=None)
1.0DatetimeIndex(['2018-03-30 04:00:00'], dtype='datetime64[ns]', name='date', freq=None)
1.0DatetimeIndex(['2018-03-29 08:00:00'], dtype='datetime64[ns]', name='date', freq=None)
1.0DatetimeIndex(['2018-03-2

1.0DatetimeIndex(['2017-12-21 10:00:00'], dtype='datetime64[ns]', name='date', freq=None)
1.0DatetimeIndex(['2017-12-20 14:00:00'], dtype='datetime64[ns]', name='date', freq=None)
1.0DatetimeIndex(['2017-12-19 18:00:00'], dtype='datetime64[ns]', name='date', freq=None)
0.0DatetimeIndex(['2017-12-18 22:00:00'], dtype='datetime64[ns]', name='date', freq=None)
1.0DatetimeIndex(['2017-12-18 02:00:00'], dtype='datetime64[ns]', name='date', freq=None)
1.0DatetimeIndex(['2017-12-15 06:00:00'], dtype='datetime64[ns]', name='date', freq=None)
1.0DatetimeIndex(['2017-12-14 10:00:00'], dtype='datetime64[ns]', name='date', freq=None)
1.0DatetimeIndex(['2017-12-13 14:00:00'], dtype='datetime64[ns]', name='date', freq=None)
1.0DatetimeIndex(['2017-12-12 18:00:00'], dtype='datetime64[ns]', name='date', freq=None)
1.0DatetimeIndex(['2017-12-11 22:00:00'], dtype='datetime64[ns]', name='date', freq=None)
1.0DatetimeIndex(['2017-12-11 02:00:00'], dtype='datetime64[ns]', name='date', freq=None)
0.0Datetim

1.0DatetimeIndex(['2017-09-05 17:00:00'], dtype='datetime64[ns]', name='date', freq=None)
0.0DatetimeIndex(['2017-09-04 21:00:00'], dtype='datetime64[ns]', name='date', freq=None)
1.0DatetimeIndex(['2017-09-04 01:00:00'], dtype='datetime64[ns]', name='date', freq=None)
1.0DatetimeIndex(['2017-09-01 05:00:00'], dtype='datetime64[ns]', name='date', freq=None)
1.0DatetimeIndex(['2017-08-31 09:00:00'], dtype='datetime64[ns]', name='date', freq=None)
1.0DatetimeIndex(['2017-08-30 13:00:00'], dtype='datetime64[ns]', name='date', freq=None)
1.0DatetimeIndex(['2017-08-29 17:00:00'], dtype='datetime64[ns]', name='date', freq=None)
1.0DatetimeIndex(['2017-08-28 21:00:00'], dtype='datetime64[ns]', name='date', freq=None)
1.0DatetimeIndex(['2017-08-28 01:00:00'], dtype='datetime64[ns]', name='date', freq=None)
1.0DatetimeIndex(['2017-08-25 05:00:00'], dtype='datetime64[ns]', name='date', freq=None)
0.0DatetimeIndex(['2017-08-24 09:00:00'], dtype='datetime64[ns]', name='date', freq=None)
1.0Datetim

In [9]:
correct_predictions/run_length

0.7333333333333333