In [9]:
import pandas as pd
import numpy as np 
from datetime import datetime

import talib

from ta.volatility import BollingerBands
from ta.trend import MACD

import plotly as py
from plotly import tools
import plotly.graph_objects as go


In [10]:
def create_candlestick(df) :

    trace_0 = go.Ohlc(x=df.index,
                    open=df['Open'],
                    high=df['High'],
                    low=df['Low'],
                    close=df['Close'],
                    name='Currency Quote')

    fig = tools.make_subplots(rows=1,cols=1,shared_xaxes=True)
    fig.append_trace(trace_0,1,1)

    py.offline.plot(fig,filename='Candlestick_chart')

In [11]:
def preprocessing(file,s=0):

    data = pd.read_csv(file)
    data.set_index('date', inplace=True, drop=True)
    data = data.iloc[s:,:]
    data = pd.DataFrame(data=data, dtype=np.float64) 

    df = data.copy(deep=False)
    df.drop(df.tail(24).index,inplace=True)
  
    label = data[['open','close']].copy(deep=False)
    label = label.iloc[24:,:]

    label.reset_index(drop=True,inplace=True)
    label.index = df.index

    def Heiken_Ashi(prices):
 
        #################################################

        #prices  : dataframe of prices
        #periods : periods of which to create the candles

        #return  : Heiken_Ashi OHLC candles

        #################################################


        HA_close = prices[['open','high','low','close']].sum(axis = 1)/4

        HA_open = HA_close.copy()

        HA_open.iloc[0] = HA_close.iloc[0]

        HA_high = HA_close.copy()

        HA_low = HA_close.copy()

        for i in range(1,len(prices)):
            
            HA_open.iloc[i] = (HA_open.iloc[i-1] + HA_close.iloc[i-1])/2

            HA_high.iloc[i] = np.array([prices.high.iloc[i], HA_open.iloc[i], HA_close.iloc[i]]).max()

            HA_low.iloc[i] = np.array([prices.low.iloc[i], HA_open.iloc[i], HA_close.iloc[i]]).min()

        return HA_open,HA_high,HA_low,HA_close

    #------------------------------------------------------------#
    # Momentum (MOM) :
    #------------------------------------------------------------#

    periods = [3,4,5,8,9,10]

    for i in range(0,len(periods)):
        df['MOM_{i}'.format(i=periods[i])] = talib.MOM(df.close.values,timeperiod = periods[i])

    print("--------- Mometum Successful ---------")

    #------------------------------------------------------------#
    # Stochastic oscillator (STOCH):
    #------------------------------------------------------------#

    periods = [3,4,5,8,9,10]
    for i in range(0,len(periods)):
        K,D = talib.STOCH(
            close = df['close'],
            high = df['high'],
            low = df['low'],
            fastk_period=12
            )
        df['K_{i}'.format(i=periods[i])] = K
        df['D_{i}'.format(i=periods[i])] = D

    print("--------- Stochastic oscillator Successful ---------")

    #------------------------------------------------------------#
    # Williams %R (WILLR) :
    #------------------------------------------------------------#
    
    periods = [6,7,8,9, 10]
    for i in range(len(periods)):
        df['WILLR_{i}'.format(i=periods[i])] = talib.WILLR(
            high = df['high'],
            low = df['low'],
            close = df['close'],
            timeperiod = periods[i]
            )

    print("--------- Williams %R Successful ---------")

    #------------------------------------------------------------#
    #  Rate of change (PROCP) :
    #------------------------------------------------------------#

    periods = [12,13,14,15]
    for i in range(len(periods)):
        df['ROCP_{i}'.format(i=periods[i])] = talib.ROCP(
            df['close'],
            timeperiod = periods[i]
        )

    print("--------- Rate of change Successful ---------")

    #------------------------------------------------------------#
    # Weighted Closing Price (WPC) :
    #------------------------------------------------------------#

    df['WPC'] = talib.WCLPRICE(
            high = df['high'],
            low = df['low'],
            close = df['close']
        )

    print("--------- Weighted Closing Price Successful ---------")

    #------------------------------------------------------------#
    # Accumulation Distribution Line (ADL) :
    #------------------------------------------------------------#

    df['ADL'] = talib.AD(
        high = df['high'],
        low = df['low'],
        close = df['close'],
        volume = df['volume']
    )

    print("--------- Accumulation Distribution Line Successful ---------")

    #------------------------------------------------------------#
    # Accumulation Distribution Oscillator (ADOSC) :
    #------------------------------------------------------------#

    periods_fast = [2,3,4,5]
    periods_slow = [10,12,14,16]
    for i in range(len(periods_fast)):
        df['ADOSC_{i},{j}'.format(i=periods_fast[i],j=periods_slow[i])] = talib.ADOSC(
            high = df['high'],
            low = df['low'],
            close = df['close'],
            volume = df['volume'],
            fastperiod = periods_fast[i],
            slowperiod = periods_slow[i]
        )
    
    print("--------- Accumulation Distribution Line Successful ---------")

    #------------------------------------------------------------#
    # Moving Average Convergence/Divergence (MACD) :
    #------------------------------------------------------------#
    
    indicator_MACD = MACD(
        close = df['close'],
        n_fast=12,
        n_slow=26,
        n_sign=9,
        fillna=True
    )

    df['MACD_12,26'] = indicator_MACD.macd()
    df['MACD_his_12,26'] = indicator_MACD.macd_diff()
    df['MACD_signal_12,26'] = indicator_MACD.macd_signal()

    print("--------- Moving Average Convergence/Divergence Successful ---------")
    
    #------------------------------------------------------------#
    # Commodity Channel Index (CCI) :
    #------------------------------------------------------------#

    df['CCI_15'] = talib.CCI(
        high = df['high'],
        low = df['low'],
        close = df['close'],
        timeperiod = 15        
    )

    print("--------- Commodity Channel Index Successful ---------")

    #------------------------------------------------------------#
    # Bollinger Bands (BBANDS) :
    #------------------------------------------------------------#

    indicator_bb = BollingerBands(close=df["close"], n=15, ndev=2)
    df['bb_bbm_15'] = indicator_bb.bollinger_mavg()
    df['bb_bbh_15'] = indicator_bb.bollinger_hband()
    df['bb_bbl_15'] = indicator_bb.bollinger_lband()

    print("--------- Bollinger Bands Successful ---------")

    #------------------------------------------------------------#
    # Heikin Ashi :
    #------------------------------------------------------------#

    Open,High,Low,Close = Heiken_Ashi(df)
    df['HA_open'] = Open
    df['HA_high'] = High
    df['HA_low'] = Low
    df['HA_close'] = Close

    print("--------- Heikin Ashi Successful ---------")

    #------------------------------------------------------------#
    # Relative Strange index (RSI) :
    #------------------------------------------------------------#

    periods = [6,8,10,12]
    for i in range(len(periods)):
        df['RSI_{i}'.format(i=periods[i])] = talib.RSI(
            df['close'],
            timeperiod = periods[i]
        )
    
    print("--------- Relative Strange index Successful ---------")

    #------------------------------------------------------------#
    # Slope :
    #------------------------------------------------------------#

    df['Slope_4'] = talib.LINEARREG_SLOPE(df['close'], timeperiod=4)
    df = df.fillna(method='bfill')

    print("--------- Slope Successful ---------")

    df = df.drop(['open','high','low','close','volume'],axis=1)
   # print(df.head())
    return df,label

In [12]:

file = 'dataset/EURUSD_H1.csv'
X,Y = preprocessing(file,s=60000)


--------- Mometum Successful ---------
--------- Stochastic oscillator Successful ---------
--------- Williams %R Successful ---------
--------- Rate of change Successful ---------
--------- Weighted Closing Price Successful ---------
--------- Accumulation Distribution Line Successful ---------
--------- Accumulation Distribution Line Successful ---------
--------- Moving Average Convergence/Divergence Successful ---------
--------- Commodity Channel Index Successful ---------
--------- Bollinger Bands Successful ---------
--------- Heikin Ashi Successful ---------
--------- Relative Strange index Successful ---------
--------- Slope Successful ---------


In [13]:
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression
from sklearn.svm import LinearSVR
from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler

from numpy import mean
from numpy import std
from numpy import absolute
from sklearn.metrics import r2_score,mean_squared_error
from sklearn.model_selection import train_test_split

In [14]:
sc_X = StandardScaler()
sc_y = StandardScaler()
x = sc_X.fit_transform(X.values)
y = sc_y.fit_transform(Y.values)

# sc_X = MinMaxScaler()
# sc_y = MinMaxScaler()
# sc_X.fit(X.values)
# x = sc_X.transform(X.values)
# sc_y.fit(Y.values)
# y = sc_y.transform(Y.values)

input_train,input_test,output_train,output_test = train_test_split(x,y,test_size=0.05)

In [15]:

#model = SVR(kernel='rbf',gamma='auto')
#model = SVR(kernel='poly', degree=3,coef0=0.1,gamma='auto')
kernel_ = ['rbf','poly']
degree_ = [2,3,4,5,6]
gamma_ = ['auto','scale',10,1,0.1,0.01]
C_ = [10,0.1,10e-2,10e-3]


In [16]:
for d in degree_:
    for g in gamma_:
        for c in C_:
            print("kernel : poly"," degree :",d," gamma :",g," C :",c)
            model = SVR(kernel='poly', degree=d,gamma=g,C=c) 
            best_svr = MultiOutputRegressor(model)
            cv = KFold(n_splits=10,shuffle=False)
            scores = []
            for train_index, test_index in cv.split(input_train):
                #print("Train Index: ", train_index)
                #print("Test Index: ", test_index, "\n")
                X_train, X_test, y_train, y_test = input_train[train_index], input_train[test_index], output_train[train_index], output_train[test_index]
                best_svr.fit(X_train, y_train)
                scores.append(best_svr.score(X_test, y_test))
            print("Crossvalidation score :",np.mean(scores))
            yhat = best_svr.predict(input_test)
            yhat = sc_y.inverse_transform(yhat)
            y_test = sc_y.inverse_transform(output_test)
            mse = mean_squared_error(y_test,yhat)
            sum_err = []
            for i in range(len(y_test)):
                err = abs(y_test[i]-yhat[i])*10e4
                sum_err.append(err)
                #print(i,"-> Pre ",yhat[i]," vs Acc",y_test[i]," err = ",err)
            print("Abs_err = ",r2_score(yhat,y_test))
            print("mse = ",mse)
            print("sqrt(mse) = ",np.sqrt(mse))
            print("Pips err = ",mean(sum_err),"\n")

kernel : poly  degree : 2  gamma : auto  C : 10


In [None]:
for g in gamma_:
    for c in C_:
        print("kernel : rbf"," degree :",d," gamma :",g," C :",c)
        model = SVR(kernel='rbf',gamma=g,C=c) 
        best_svr = MultiOutputRegressor(model)
        cv = KFold(n_splits=10,shuffle=False)
        scores = []
        for train_index, test_index in cv.split(input_train):
            print("Train Index: ", train_index)
            print("Test Index: ", test_index, "\n")
            X_train, X_test, y_train, y_test = input_train[train_index], input_train[test_index], output_train[train_index], output_train[test_index]
            best_svr.fit(X_train, y_train)
            scores.append(best_svr.score(X_test, y_test))
        print("Crossvalidation score :",np.mean(scores))
        yhat = best_svr.predict(input_test)
        yhat = sc_y.inverse_transform(yhat)
        y_test = sc_y.inverse_transform(output_test)
        mse = mean_squared_error(y_test,yhat)
        sum_err = []
        for i in range(len(y_test)):
            err = abs(y_test[i]-yhat[i])*10e4
            sum_err.append(err)
                #print(i,"-> Pre ",yhat[i]," vs Acc",y_test[i]," err = ",err)
        print("Abs_err = ",r2_score(yhat,y_test))
        print("mse = ",mse)
        print("sqrt(mse) = ",np.sqrt(mse))
        print("Pips err = ",mean(sum_err),"\n")

In [8]:
#print(np.mean(scores))

0.7656582812343495


In [9]:
yhat = best_svr.predict(input_test)

yhat = sc_y.inverse_transform(yhat)
y_test = sc_y.inverse_transform(output_test)


mse = mean_squared_error(y_test,yhat)

sum_err = []
for i in range(len(y_test)):
    err = abs(y_test[i]-yhat[i])*10e4
    sum_err.append(err)
    print(i,"-> Pre ",yhat[i]," vs Acc",y_test[i]," err = ",err)
        #print("Acc ",y_test[:5,:])

85]
1067 -> Pre  [1.10212389 1.10204119]  vs Acc [1.10562 1.10657]  err =  [349.6105076  452.88144041]
1068 -> Pre  [1.06507809 1.06481469]  vs Acc [1.06461 1.0626 ]  err =  [ 46.80896128 221.4687472 ]
1069 -> Pre  [1.1363547  1.13635702]  vs Acc [1.18669 1.18913]  err =  [5033.53044791 5277.29812929]
1070 -> Pre  [1.13742444 1.13740352]  vs Acc [1.15628 1.1565 ]  err =  [1885.55610972 1909.64830363]
1071 -> Pre  [1.12486502 1.12490894]  vs Acc [1.113   1.11282]  err =  [1186.50225372 1208.89446273]
1072 -> Pre  [1.12989277 1.12987289]  vs Acc [1.12989 1.1298 ]  err =  [0.27720744 7.28853377]
1073 -> Pre  [1.21012906 1.21025539]  vs Acc [1.18788 1.18719]  err =  [2224.9058939  2306.53919986]
1074 -> Pre  [1.15298641 1.15299569]  vs Acc [1.18079 1.18033]  err =  [2780.35918555 2733.43086495]
1075 -> Pre  [1.12974541 1.12975478]  vs Acc [1.13273 1.13269]  err =  [298.45890937 293.52190352]
1076 -> Pre  [1.15114546 1.15117257]  vs Acc [1.17063 1.1709 ]  err =  [1948.45385216 1972.74285254

In [10]:
print(r2_score(yhat,y_test))
print("mse = ",mse)
print("sqrt(mse) = ",np.sqrt(mse))
print(mean(sum_err))
    

0.6099149497157751
mse =  0.0005078862127825704
sqrt(mse) =  0.022536330952099774
1710.5050036114164
