In [1]:
import pandas as pd
import numpy as np

from feature_functions import *

from sklearn import preprocessing
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVR
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Variables:
future = 1
averageKeys = [2]
momentumKeys = [3, 4, 5, 8, 9, 10]
heikenKeys = [15]
EMAKeys = [2,10,20]
fourierKeys = [10,20,30]

# Loading the data
df = pd.read_csv("data/EURUSDday.csv")
df.columns = ['date','open','high','low','close','volume']
df.date = pd.to_datetime(df.date, format='%d.%m.%Y %H:%M:%S.%f')
df = df.set_index(df.date)
df = df[['open','high','low','close','volume']]

# Drop elements where there is no mouvement (Market closed) like in the weekends
df = df.drop_duplicates(keep=False)
df.head(2)

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2011-01-03 22:00:00,1.33596,1.34298,1.32923,1.33068,204936.7404
2011-01-04 22:00:00,1.33047,1.33248,1.31262,1.3149,191026.63


In [2]:
# To concat the original dataframe with averages
df_with_features = df

# Add Averages
df_with_features = pd.concat([df_with_features, moving_averages(df, averageKeys)],axis=1)
# df_with_features = pd.concat([df_with_features, exponential_averages(df, EMAKeys)],axis=1)

# Add heikenashi
df_with_features = pd.concat([df_with_features, heinkenashi(df, heikenKeys)],axis=1)

# Add momentum
df_with_features = pd.concat([df_with_features, momentum(df, momentumKeys)],axis=1)

# Add fourier
df_with_features= pd.concat([df_with_features, fourier(df, fourierKeys)],axis=1)

df_with_features['result'] = create_results(df , 'close', future)
df_with_features = df_with_features.dropna()
df_with_features

Unnamed: 0_level_0,open,high,low,close,volume,MA2 open,MA2 high,MA2 low,MA2 close,HA15 open,...,fourier 10 w,fourier 20 a0,fourier 20 a1,fourier 20 b1,fourier 20 w,fourier 30 a0,fourier 30 a1,fourier 30 b1,fourier 30 w,result
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2011-02-14 22:00:00,1.34883,1.35504,1.34609,1.34866,191687.1815,1.349715,1.355430,1.344445,1.348745,1.355296,...,1.072820,0.000705,-0.005075,0.004473,1.042928,0.001139,-0.001179,-0.007521,0.952509,1.0
2011-02-15 22:00:00,1.34865,1.35879,1.34619,1.35680,206301.3507,1.348740,1.356915,1.346140,1.352730,1.352476,...,1.038284,0.000056,-0.002316,0.007826,1.082158,0.001780,-0.004977,-0.006148,0.975364,1.0
2011-02-16 22:00:00,1.35666,1.36183,1.35368,1.36087,199447.1311,1.352655,1.360310,1.349935,1.358835,1.352542,...,1.050708,-0.000109,0.004955,0.006639,1.095552,0.002014,-0.006974,-0.001627,0.992115,1.0
2011-02-17 22:00:00,1.36087,1.37144,1.35459,1.36921,200646.7903,1.358765,1.366635,1.354135,1.365040,1.355401,...,0.922829,-0.000083,0.007152,0.001095,1.100631,0.002312,-0.004971,0.004802,0.988240,0.0
2011-02-20 22:00:00,1.36972,1.37147,1.36471,1.36752,120506.6307,1.365295,1.371455,1.359650,1.368365,1.359714,...,0.841563,0.000470,0.003587,-0.006359,1.089273,0.002074,0.000341,0.007325,0.995053,0.0
2011-02-21 22:00:00,1.36749,1.37030,1.35269,1.36497,238283.5475,1.368605,1.370885,1.358700,1.366245,1.364035,...,0.831273,0.000536,-0.003639,-0.006236,1.097550,0.002174,0.006193,0.003588,0.995109,1.0
2011-02-22 22:00:00,1.36495,1.37863,1.36460,1.37479,205282.9413,1.366220,1.374465,1.358645,1.369880,1.363949,...,0.990331,0.000458,-0.007720,0.001604,1.096039,0.002173,0.006501,-0.000923,1.022198,1.0
2011-02-23 22:00:00,1.37482,1.38198,1.37040,1.37992,220566.5610,1.369885,1.380305,1.367500,1.377355,1.367346,...,1.155241,-0.000216,-0.002819,0.008098,1.105099,0.001624,0.001391,-0.005269,1.013877,0.0
2011-02-24 22:00:00,1.37992,1.38377,1.37237,1.37529,180676.2400,1.377370,1.382875,1.371385,1.377605,1.372063,...,1.343538,0.000011,0.006751,0.004680,1.103540,0.001195,0.000181,-0.004974,1.066799,1.0
2011-02-27 22:00:00,1.37529,1.38544,1.37111,1.38057,183774.3495,1.377605,1.384605,1.371740,1.377930,1.374950,...,1.594412,0.000404,0.005877,-0.004375,1.065000,0.001873,0.001996,-0.004946,1.146092,0.0


In [None]:
# The data
interval = future + 1
run_length = 300
correct_predictions = 0
for i in range(run_length):
    X = df_with_features.iloc[:df_with_features.shape[0]- (i*interval)-future -1 ,:-1]
    y = df_with_features.result

    # Split the data
    
    X_train= X.iloc[:X.shape[0]-1,:]
    X_test= df_with_features.iloc[X.shape[0]+future:X.shape[0]+future+1,:-1]
    
# #     Scale the data
#     scaler = preprocessing.StandardScaler().fit(X_train)
#     X_train = scaler.transform(X_train) 
#     X_test = scaler.transform(X_test) 
    
    y_train= y[:X.shape[0]-1]
    y_test =y [X.shape[0]+future:X.shape[0]+future+1]

#     Model
    clf = GradientBoostingClassifier(random_state=5, learning_rate=0.1, n_estimators=100)
#     clf = SVR(gamma= 0.1, C=1.0, epsilon=0.2)
#     clf = SVC(gamma=0.1)
    clf.fit(X_train, y_train) 

    # Predictions
    if(clf.predict(X_test)[0]>0.5):
        predicted = [1.0]
    else :
        predicted = [0.0]
    correct_predictions += accuracy_score(y_test, predicted)
    if ((i+1)%20 == 0):
        print (str(accuracy_score(y_test, predicted)) +' ' + str(i+1) + ' accuracy: '+ str(correct_predictions/(i+1)))
    
    
accuracy = correct_predictions/run_length
accuracy

1.0 20 accuracy: 0.5
1.0 40 accuracy: 0.5
1.0 60 accuracy: 0.5333333333333333
0.0 80 accuracy: 0.55
0.0 100 accuracy: 0.55
0.0 120 accuracy: 0.5166666666666667
1.0 140 accuracy: 0.5071428571428571
1.0 160 accuracy: 0.48125
0.0 180 accuracy: 0.4666666666666667
1.0 200 accuracy: 0.48
1.0 220 accuracy: 0.4772727272727273
0.0 240 accuracy: 0.475


In [None]:
correct_predictions/run_length