In [13]:
import pandas as pd
import numpy as np 

In [14]:
file = 'dataset/EURUSD_features.csv'
data = pd.read_csv(file)
data.set_index('date', inplace=True, drop=True)

################################################
'''
    - 4 years data
    - 4 y * 356 d * 24 hr = 34,176

'''
amounts = 34176
#amounts = 5000
data = data.tail(amounts+24)
data = pd.DataFrame(data=data, dtype=np.float64)
##############################################

features = data.copy(deep=False)
features.drop(features.tail(24).index, inplace=True)
features = features.drop(['open_24', 'close_24'], axis=1)

labels = data[['open_24', 'close_24']].copy(deep=False)
labels = labels.iloc[24:, :]
################################################
'''
    - scale output with 1 pip
'''
labels = labels*10000
################################################


labels.reset_index(drop=True, inplace=True)
labels.index = features.index
print(labels.head())
print(features.head())

                  open_24  close_24
date                               
11/02/2020 05:00  10868.9   10871.5
11/02/2020 06:00  10871.5   10871.4
11/02/2020 07:00  10871.4   10879.2
11/02/2020 08:00  10879.2   10886.9
11/02/2020 09:00  10886.9   10882.8
                     open     high      low    close    MOM_3    MOM_4  \
date                                                                     
11/02/2020 05:00  1.09093  1.09128  1.09078  1.09122 -0.00017 -0.00017   
11/02/2020 06:00  1.09122  1.09122  1.09082  1.09111 -0.00024 -0.00028   
11/02/2020 07:00  1.09112  1.09143  1.09056  1.09142  0.00049  0.00007   
11/02/2020 08:00  1.09144  1.09152  1.09056  1.09107 -0.00015  0.00014   
11/02/2020 09:00  1.09107  1.09158  1.09080  1.09136  0.00025  0.00014   

                    MOM_5    MOM_8    MOM_9   MOM_10  ...  bb_bbl_15  \
date                                                  ...              
11/02/2020 05:00  0.00022  0.00017 -0.00001 -0.00016  ...   1.090592   
11/02/2020 06

In [15]:
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression
from sklearn.svm import LinearSVR
from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler

from numpy import mean
from numpy import std
from numpy import absolute
from sklearn.metrics import r2_score,mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

In [16]:
sc_X = StandardScaler()
sc_y = StandardScaler()
x = sc_X.fit_transform(features.values)
y = sc_y.fit_transform(labels.values)

input_train,input_test,output_train,output_test = train_test_split(x,y,test_size=0.05)
print("train shape : {:.0f}".format(input_train.shape[0]/24),"days || test shape : {:.0f}".format(input_test.shape[0]/24),"days")

train shape : 198 days || test shape : 10 days


In [17]:

import joblib # save model
filename = 'model/EURUSD_SVR.joblib'

model = SVR(kernel='rbf',gamma='auto',C=25,epsilon=0.0001)

best_svr = MultiOutputRegressor(model)
cv = KFold(n_splits=10,shuffle=False)
scores = []
i = 1
for train_index, test_index in cv.split(input_train):
        print("K-folds at : ",i)
        X_train, X_test, y_train, y_test = input_train[train_index], input_train[test_index], output_train[train_index], output_train[test_index]
        best_svr.fit(X_train, y_train)
        
        '''
                - Cross validate 
        '''
        scores.append(best_svr.score(X_test, y_test))
        print("scores : ",best_svr.score(X_test, y_test))

        '''
                - MAE
        '''
        yhat = best_svr.predict(X_test)
        yhat = sc_y.inverse_transform(yhat)
        y_test = sc_y.inverse_transform(y_test)
        print("MAE : ",mean_absolute_error(y_test, yhat, multioutput='raw_values'))
        joblib.dump(best_svr, filename)
        i+=1



K-folds at :  1
scores :  0.9747699294489574
MAE :  [49.41819499 50.19816489]
K-folds at :  2


KeyboardInterrupt: 

In [11]:
yhat = best_svr.predict(input_test)
yhat = sc_y.inverse_transform(yhat)
y_test = sc_y.inverse_transform(output_test)
mse = mean_squared_error(y_test,yhat)
sum_err = []

for i in range(len(y_test)):
    err = abs(y_test[i]-yhat[i])
    sum_err.append(err)
    #print(i,"-> Pre ",yhat[i]," vs Acc",y_test[i]," err = ",err)
print("Crossvalidation score :",np.mean(scores))
print("Abs_err = ",r2_score(yhat,y_test))
print("mse = ",mse/10000)
print("sqrt(mse) = ",np.sqrt(mse))
print("Pips err = ",mean(sum_err),"\n")

Crossvalidation score : 0.6378515368198225
Abs_err =  -0.9137122381874064
mse =  5.972274090343993
sqrt(mse) =  244.38236618757895
Pips err =  219.77892228115374 



In [7]:
loaded_model = joblib.load(filename)
#result = loaded_model.score(input_test, output_test)
result = loaded_model.predict(input_test)
print(result)

[[-4.24586314e-01 -3.96555916e-01]
 [-1.10342552e+00 -1.09416299e+00]
 [ 1.11158891e+00  1.09566505e+00]
 [-6.52795051e-02 -8.23970996e-02]
 [ 6.71269712e-01  6.76198691e-01]
 [ 1.16993440e+00  1.19862584e+00]
 [-2.10190809e-01 -2.16932016e-01]
 [ 8.14954919e-01  8.03190356e-01]
 [ 1.23168695e+00  1.23764702e+00]
 [-1.06485943e+00 -1.07327627e+00]
 [-1.10229334e+00 -1.05268266e+00]
 [-1.33035911e+00 -1.35777142e+00]
 [ 8.45994815e-01  8.25706464e-01]
 [ 1.23471843e+00  1.22081570e+00]
 [ 9.45764687e-01  9.50139387e-01]
 [ 1.01021365e+00  1.02758680e+00]
 [ 9.62523537e-01  9.62920576e-01]
 [-1.34204551e+00 -1.31329482e+00]
 [-1.67867329e+00 -1.76668295e+00]
 [ 8.64498724e-01  8.59472898e-01]
 [ 2.45825684e-01  2.25444938e-01]
 [-1.39191428e+00 -1.40136489e+00]
 [-1.13950569e+00 -1.13748796e+00]
 [ 9.96879800e-01  1.00601511e+00]
 [-1.56955239e-01 -1.60785462e-01]
 [ 1.06444769e+00  1.06180773e+00]
 [ 1.24020707e+00  1.23180658e+00]
 [-8.08988141e-02 -8.19060853e-02]
 [ 1.17979099e+00  1