In [112]:
import pandas as pd
import numpy as np

from feature_functions import *

from sklearn import preprocessing
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVR
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Variables:
future = 1
averages = [2]
momentum_values = [3, 4, 5, 8, 9, 10]
heiken = [15]
EMA = [2,10,20]


# Loading the data
df = pd.read_csv("data/EURUSDday.csv")
df.columns = ['date','open','high','low','close','volume']
df.date = pd.to_datetime(df.date, format='%d.%m.%Y %H:%M:%S.%f')
df = df.set_index(df.date)
df = df[['open','high','low','close','volume']]

# Drop elements where there is no mouvement (Market closed) like in the weekends
df = df.drop_duplicates(keep=False)
df.head(2)

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2011-01-03 22:00:00,1.33596,1.34298,1.32923,1.33068,204936.7404
2011-01-04 22:00:00,1.33047,1.33248,1.31262,1.3149,191026.63


In [113]:
# To concat the original dataframe with averages
df_with_averages = df

# Add Averages
df_with_averages = pd.concat([df_with_averages, moving_averages(df, averages)],axis=1)
# df_with_averages = pd.concat([df_with_averages, exponential_averages(df, EMA)],axis=1)

# Add heikenashi
df_with_averages = pd.concat([df_with_averages, heinkenashi(df, heiken)],axis=1)

# Add momentum
df_with_averages = pd.concat([df_with_averages, momentum(df, momentum_values)],axis=1)


df_with_averages['result'] = create_results(df , 'close', future)
df_with_averages = df_with_averages.dropna()
df_with_averages

Unnamed: 0_level_0,open,high,low,close,volume,MA2 open,MA2 high,MA2 low,MA2 close,HA15 open,...,MomClose 4,MomOpen 5,MomClose 5,MomOpen 8,MomClose 8,MomOpen 9,MomClose 9,MomOpen 10,MomClose 10,result
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2011-01-17 22:00:00,1.32932,1.34622,1.32527,1.33865,211863.2589,1.333720,1.342345,1.324840,1.333985,1.328957,...,0.02543,0.03424,0.04133,0.01444,0.03841,-0.00115,0.02375,-0.00664,0.00797,1.0
2011-01-18 22:00:00,1.33865,1.35379,1.33680,1.34728,213273.1896,1.333985,1.350005,1.331035,1.342965,1.331911,...,0.01094,0.04133,0.03406,0.03847,0.05668,0.02377,0.04704,0.00818,0.03238,0.0
2011-01-19 22:00:00,1.34711,1.35218,1.33958,1.34717,230471.8378,1.342880,1.352985,1.338190,1.347225,1.338021,...,0.00849,0.03389,0.01083,0.05966,0.05209,0.04693,0.05657,0.03223,0.04693,1.0
2011-01-20 22:00:00,1.34717,1.36247,1.34485,1.36204,200540.3906,1.347140,1.357325,1.342215,1.354605,1.342265,...,0.03272,0.01083,0.02336,0.05209,0.06472,0.05972,0.06696,0.04699,0.07144,1.0
2011-01-23 22:00:00,1.36388,1.36852,1.35407,1.36370,181836.0017,1.355525,1.365495,1.349460,1.362870,1.348199,...,0.02505,0.02576,0.03438,0.06656,0.05048,0.06880,0.06638,0.07643,0.06862,1.0
2011-01-24 22:00:00,1.36372,1.37038,1.35742,1.36808,213526.2391,1.363800,1.369450,1.355745,1.365890,1.355371,...,0.02080,0.03440,0.02943,0.05050,0.03174,0.06640,0.05486,0.06864,0.07076,1.0
2011-01-25 22:00:00,1.36806,1.37212,1.36449,1.37121,183526.8591,1.365890,1.371250,1.360955,1.369645,1.360135,...,0.02404,0.02941,0.02393,0.03172,0.03253,0.05484,0.03487,0.07074,0.05799,1.0
2011-01-26 22:00:00,1.37121,1.37572,1.36385,1.37325,176973.3101,1.369635,1.373920,1.364170,1.372230,1.364553,...,0.01121,0.02410,0.02608,0.03309,0.04393,0.03487,0.03457,0.05799,0.03691,0.0
2011-01-27 22:00:00,1.37325,1.37458,1.35831,1.36099,168266.9429,1.372230,1.375150,1.361080,1.367120,1.367780,...,-0.00271,0.02608,-0.00105,0.04393,0.02234,0.03513,0.03167,0.03691,0.02231,1.0
2011-01-30 22:00:00,1.35866,1.37391,1.35699,1.36929,195457.1189,1.365955,1.374245,1.357650,1.365140,1.367281,...,0.00121,-0.00522,0.00559,0.02001,0.02201,0.02934,0.03064,0.02054,0.03997,1.0


In [117]:
# The data
interval = future + 1
run_length = 300
correct_predictions = 0
for i in range(run_length):
    X = df_with_averages.iloc[:df_with_averages.shape[0]- (i*interval)-future -1 ,:-1]
    y = df_with_averages.result

    # Split the data
    
    X_train= X.iloc[:X.shape[0]-1,:]
    X_test= df_with_averages.iloc[X.shape[0]+future:X.shape[0]+future+1,:-1]
    
# #     Scale the data
#     scaler = preprocessing.StandardScaler().fit(X_train)
#     X_train = scaler.transform(X_train) 
#     X_test = scaler.transform(X_test) 
    
    y_train= y[:X.shape[0]-1]
    y_test =y [X.shape[0]+future:X.shape[0]+future+1]

#     Model
    clf = GradientBoostingClassifier(random_state=5, learning_rate=0.1, n_estimators=100)
#     clf = SVR(gamma= 0.1, C=1.0, epsilon=0.2)
#     clf = SVC(gamma=0.1)
    clf.fit(X_train, y_train) 

    # Predictions
    if(clf.predict(X_test)[0]>0.5):
        predicted = [1.0]
    else :
        predicted = [0.0]
    correct_predictions += accuracy_score(y_test, predicted)
    if ((i+1)%20 == 0):
        print (str(accuracy_score(y_test, predicted)) +' ' + str(i+1) + ' accuracy: '+ str(correct_predictions/(i+1)))
    
    
accuracy = correct_predictions/run_length
accuracy

1.0 20 accuracy: 0.5
1.0 40 accuracy: 0.475
1.0 60 accuracy: 0.5166666666666667
0.0 80 accuracy: 0.5125
1.0 100 accuracy: 0.53
0.0 120 accuracy: 0.5
0.0 140 accuracy: 0.5142857142857142
1.0 160 accuracy: 0.525
0.0 180 accuracy: 0.5277777777777778
0.0 200 accuracy: 0.53
1.0 220 accuracy: 0.5272727272727272
0.0 240 accuracy: 0.5166666666666667
1.0 260 accuracy: 0.5115384615384615
0.0 280 accuracy: 0.5
1.0 300 accuracy: 0.5066666666666667


0.5066666666666667

In [103]:
correct_predictions/run_length

0.57