In [1]:
import pandas as pd
import numpy as np 

In [2]:
file = 'dataset/EURUSD_features.csv'
data = pd.read_csv(file)
data.set_index('date', inplace=True, drop=True)

################################################
'''
    - 4 years data
    - 4 y * 356 d * 24 hr = 34,176

'''
amounts = 34176
#amounts = 200
data = data.tail(amounts+24)
data = pd.DataFrame(data=data, dtype=np.float64)
##############################################

features = data.copy(deep=False)
features.drop(features.tail(24).index, inplace=True)
features = features.drop(['open_24', 'close_24'], axis=1)

labels = data[['open_24', 'close_24']].copy(deep=False)
labels = labels.iloc[24:, :]
################################################
'''
    - scale output with 1 pip
'''
labels = labels*10000
################################################


labels.reset_index(drop=True, inplace=True)
labels.index = features.index
print(labels.head())
print(features.head())

                  open_24  close_24
date                               
05/06/2015 02:00  11319.3   11321.2
05/06/2015 03:00  11321.2   11316.1
05/06/2015 04:00  11316.2   11310.5
05/06/2015 05:00  11310.4   11339.1
05/06/2015 06:00  11339.0   11296.0
                     open     high      low    close    MOM_3    MOM_4  \
date                                                                     
05/06/2015 02:00  1.12100  1.12148  1.12081  1.12121  0.00048  0.00094   
05/06/2015 03:00  1.12121  1.12142  1.12064  1.12104  0.00009  0.00031   
05/06/2015 04:00  1.12104  1.12243  1.12102  1.12180  0.00080  0.00085   
05/06/2015 05:00  1.12179  1.12243  1.12111  1.12200  0.00079  0.00100   
05/06/2015 06:00  1.12202  1.12495  1.12181  1.12471  0.00367  0.00350   

                    MOM_5    MOM_8    MOM_9   MOM_10  ...  bb_bbl_15  \
date                                                  ...              
05/06/2015 02:00 -0.00114 -0.00329 -0.00288 -0.00755  ...   1.118060   
05/06/2015 03

In [3]:
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression
from sklearn.svm import LinearSVR
from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler

from numpy import mean
from numpy import std
from numpy import absolute
from sklearn.metrics import r2_score,mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

In [4]:
sc_X = StandardScaler()
sc_y = StandardScaler()
x = sc_X.fit_transform(features.values)
y = sc_y.fit_transform(labels.values)

input_train,input_test,output_train,output_test = train_test_split(x,y,test_size=0.05)
print("train shape : {:.0f}".format(input_train.shape[0]/24),"days || test shape : {:.0f}".format(input_test.shape[0]/24),"days")

train shape : 1353 days || test shape : 71 days


In [68]:

import joblib # save model
filename = 'model/EURUSD_SVR.joblib'

model = SVR(kernel='rbf',gamma='auto',C=25,epsilon=0.1)

best_svr = MultiOutputRegressor(model)
cv = KFold(n_splits=10,shuffle=False)
scores = []
i = 1
for train_index, test_index in cv.split(input_train):
        print("K-folds at : ",i)
        X_train, X_test, y_train, y_test = input_train[train_index], input_train[test_index], output_train[train_index], output_train[test_index]
        best_svr.fit(X_train, y_train)
        
        '''
                - Cross validate 
        '''
        scores.append(best_svr.score(X_test, y_test))
        print("scores : ",best_svr.score(X_test, y_test))

        '''
                - MAE
        '''
        yhat = best_svr.predict(X_test)
        print("MAE : ",mean_absolute_error(y_test, yhat, multioutput='raw_values'))
        joblib.dump(best_svr, filename)
        i+=1



K-folds at :  1


In [62]:
yhat = best_svr.predict(input_test)
yhat = sc_y.inverse_transform(yhat)
y_test = sc_y.inverse_transform(output_test)
mse = mean_squared_error(y_test,yhat)
sum_err = []

for i in range(len(y_test)):
    err = abs(y_test[i]-yhat[i])*10e4
    sum_err.append(err)
    #print(i,"-> Pre ",yhat[i]," vs Acc",y_test[i]," err = ",err)
print("Crossvalidation score :",np.mean(scores))
print("Abs_err = ",r2_score(yhat,y_test))
print("mse = ",mse)
print("sqrt(mse) = ",np.sqrt(mse))
print("Pips err = ",mean(sum_err),"\n")

Crossvalidation score : 0.9472659736667867
Abs_err =  0.965997070136337
mse =  65.73965558603018
sqrt(mse) =  8.107999481131593
Pips err =  655873.1633099525 



In [63]:
loaded_model = joblib.load(filename)
#result = loaded_model.score(input_test, output_test)
result = loaded_model.predict(input_test)
print(result)

[[-1.34222863 -1.22887601]
 [ 0.21183523  0.12484683]
 [ 1.63719022  1.64864249]
 [-0.02908245  0.12576594]
 [-1.47561627 -1.07919824]
 [ 0.89113658  1.11919729]
 [ 0.7065338   0.75264423]
 [ 0.06408291 -0.01350575]
 [-1.41872562 -1.81003548]
 [ 0.21570724 -0.36686984]]
