In [21]:
import pandas as pd
import numpy as np 
from datetime import datetime

import talib

from ta.volatility import BollingerBands
from ta.trend import MACD

import plotly as py
from plotly import tools
import plotly.graph_objects as go


In [22]:
def create_candlestick(df) :

    trace_0 = go.Ohlc(x=df.index,
                    open=df['Open'],
                    high=df['High'],
                    low=df['Low'],
                    close=df['Close'],
                    name='Currency Quote')

    fig = tools.make_subplots(rows=1,cols=1,shared_xaxes=True)
    fig.append_trace(trace_0,1,1)

    py.offline.plot(fig,filename='Candlestick_chart')

In [23]:
def preprocessing(file,s=0):
    data = pd.read_csv(file)
    data.set_index('date', inplace=True, drop=True)
    data = data.iloc[s:,:]
    data = pd.DataFrame(data=data, dtype=np.float64) 

    df = data.copy(deep=False)
    df.drop(df.tail(24).index,inplace=True)
  
    label = data[['open','close']].copy(deep=False)
    label = label.iloc[24:,:]

    label.reset_index(drop=True,inplace=True)
    label.index = df.index

    def Heiken_Ashi(prices):
 
        #################################################

        #prices  : dataframe of prices
        #periods : periods of which to create the candles

        #return  : Heiken_Ashi OHLC candles

        #################################################


        HA_close = prices[['open','high','low','close']].sum(axis = 1)/4

        HA_open = HA_close.copy()

        HA_open.iloc[0] = HA_close.iloc[0]

        HA_high = HA_close.copy()

        HA_low = HA_close.copy()

        for i in range(1,len(prices)):
            
            HA_open.iloc[i] = (HA_open.iloc[i-1] + HA_close.iloc[i-1])/2

            HA_high.iloc[i] = np.array([prices.high.iloc[i], HA_open.iloc[i], HA_close.iloc[i]]).max()

            HA_low.iloc[i] = np.array([prices.low.iloc[i], HA_open.iloc[i], HA_close.iloc[i]]).min()

        return HA_open,HA_high,HA_low,HA_close

    #------------------------------------------------------------#
    # Momentum (MOM) :
    #------------------------------------------------------------#

    periods = [3,4,5,8,9,10]

    for i in range(0,len(periods)):
        df['MOM_{i}'.format(i=periods[i])] = talib.MOM(df.close.values,timeperiod = periods[i])

    print("--------- Mometum Successful ---------")

    #------------------------------------------------------------#
    # Stochastic oscillator (STOCH):
    #------------------------------------------------------------#

    periods = [3,4,5,8,9,10]
    for i in range(0,len(periods)):
        K,D = talib.STOCH(
            close = df['close'],
            high = df['high'],
            low = df['low'],
            fastk_period=12
            )
        df['K_{i}'.format(i=periods[i])] = K
        df['D_{i}'.format(i=periods[i])] = D

    print("--------- Stochastic oscillator Successful ---------")

    #------------------------------------------------------------#
    # Williams %R (WILLR) :
    #------------------------------------------------------------#
    
    periods = [6,7,8,9, 10]
    for i in range(len(periods)):
        df['WILLR_{i}'.format(i=periods[i])] = talib.WILLR(
            high = df['high'],
            low = df['low'],
            close = df['close'],
            timeperiod = periods[i]
            )

    print("--------- Williams %R Successful ---------")

    #------------------------------------------------------------#
    #  Rate of change (PROCP) :
    #------------------------------------------------------------#

    periods = [12,13,14,15]
    for i in range(len(periods)):
        df['ROCP_{i}'.format(i=periods[i])] = talib.ROCP(
            df['close'],
            timeperiod = periods[i]
        )

    print("--------- Rate of change Successful ---------")

    #------------------------------------------------------------#
    # Weighted Closing Price (WPC) :
    #------------------------------------------------------------#

    df['WPC'] = talib.WCLPRICE(
            high = df['high'],
            low = df['low'],
            close = df['close']
        )

    print("--------- Weighted Closing Price Successful ---------")

    #------------------------------------------------------------#
    # Accumulation Distribution Line (ADL) :
    #------------------------------------------------------------#

    df['ADL'] = talib.AD(
        high = df['high'],
        low = df['low'],
        close = df['close'],
        volume = df['volume']
    )

    print("--------- Accumulation Distribution Line Successful ---------")

    #------------------------------------------------------------#
    # Accumulation Distribution Oscillator (ADOSC) :
    #------------------------------------------------------------#

    periods_fast = [2,3,4,5]
    periods_slow = [10,12,14,16]
    for i in range(len(periods_fast)):
        df['ADOSC_{i},{j}'.format(i=periods_fast[i],j=periods_slow[i])] = talib.ADOSC(
            high = df['high'],
            low = df['low'],
            close = df['close'],
            volume = df['volume'],
            fastperiod = periods_fast[i],
            slowperiod = periods_slow[i]
        )
    
    print("--------- Accumulation Distribution Line Successful ---------")

    #------------------------------------------------------------#
    # Moving Average Convergence/Divergence (MACD) :
    #------------------------------------------------------------#
    
    indicator_MACD = MACD(
        close = df['close'],
        n_fast=12,
        n_slow=26,
        n_sign=9,
        fillna=True
    )

    df['MACD_12,26'] = indicator_MACD.macd()
    df['MACD_his_12,26'] = indicator_MACD.macd_diff()
    df['MACD_signal_12,26'] = indicator_MACD.macd_signal()

    print("--------- Moving Average Convergence/Divergence Successful ---------")
    
    #------------------------------------------------------------#
    # Commodity Channel Index (CCI) :
    #------------------------------------------------------------#

    df['CCI_15'] = talib.CCI(
        high = df['high'],
        low = df['low'],
        close = df['close'],
        timeperiod = 15        
    )

    print("--------- Commodity Channel Index Successful ---------")

    #------------------------------------------------------------#
    # Bollinger Bands (BBANDS) :
    #------------------------------------------------------------#

    indicator_bb = BollingerBands(close=df["close"], n=15, ndev=2)
    df['bb_bbm_15'] = indicator_bb.bollinger_mavg()
    df['bb_bbh_15'] = indicator_bb.bollinger_hband()
    df['bb_bbl_15'] = indicator_bb.bollinger_lband()

    print("--------- Bollinger Bands Successful ---------")

    #------------------------------------------------------------#
    # Heikin Ashi :
    #------------------------------------------------------------#

    Open,High,Low,Close = Heiken_Ashi(df)
    df['HA_open'] = Open
    df['HA_high'] = High
    df['HA_low'] = Low
    df['HA_close'] = Close

    print("--------- Heikin Ashi Successful ---------")

    #------------------------------------------------------------#
    # Relative Strange index (RSI) :
    #------------------------------------------------------------#

    periods = [6,8,10,12]
    for i in range(len(periods)):
        df['RSI_{i}'.format(i=periods[i])] = talib.RSI(
            df['close'],
            timeperiod = periods[i]
        )
    
    print("--------- Relative Strange index Successful ---------")

    #------------------------------------------------------------#
    # Slope :
    #------------------------------------------------------------#

    df['Slope_4'] = talib.LINEARREG_SLOPE(df['close'], timeperiod=4)
    df = df.fillna(method='bfill')

    print("--------- Slope Successful ---------")

    return df,label

In [24]:

file = 'dataset/EURUSD_H1.csv'
X,Y = preprocessing(file,s=60000)


--------- Mometum Successful ---------
--------- Stochastic oscillator Successful ---------
--------- Williams %R Successful ---------
--------- Rate of change Successful ---------
--------- Weighted Closing Price Successful ---------
--------- Accumulation Distribution Line Successful ---------
--------- Accumulation Distribution Line Successful ---------
--------- Moving Average Convergence/Divergence Successful ---------
--------- Commodity Channel Index Successful ---------
--------- Bollinger Bands Successful ---------
--------- Heikin Ashi Successful ---------
--------- Relative Strange index Successful ---------
--------- Slope Successful ---------


In [25]:
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression
from sklearn.svm import LinearSVR
from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import KFold

from numpy import mean
from numpy import std
from numpy import absolute
from sklearn.metrics import r2_score,mean_squared_error
from sklearn.model_selection import train_test_split

In [26]:
sc_X = StandardScaler()
sc_y = StandardScaler()
x = sc_X.fit_transform(X.values)
y = sc_y.fit_transform(Y.values)
input_train,input_test,output_train,output_test = train_test_split(x,y,test_size=0.05)

In [27]:
scores = []
model = SVR(kernel='poly', degree=3,coef0=0.1)
best_svr = MultiOutputRegressor(model)
best_svr.fit(x,y)
cv = KFold(n_splits=10,shuffle=False)
for train_index, test_index in cv.split(input_train):
    print("Train Index: ", train_index)
    print("Test Index: ", test_index, "\n")
    X_train, X_test, y_train, y_test = input_train[train_index], input_train[test_index], output_train[train_index], output_train[test_index]
    best_svr.fit(X_train, y_train)
    scores.append(best_svr.score(X_test, y_test))

Train Index:  [ 2407  2408  2409 ... 24061 24062 24063] 

Test Index:  [   0    1    2 ... 2404 2405 2406]
Train Index:  [    0     1     2 ... 24061 24062 24063] 

Test Index:  [2407 2408 2409 ... 4811 4812 4813]
Train Index:  [    0     1     2 ... 24061 24062 24063] 

Test Index:  [4814 4815 4816 ... 7218 7219 7220]
Train Index:  [    0     1     2 ... 24061 24062 24063] 

Test Index:  [7221 7222 7223 ... 9625 9626 9627]
Train Index:  [    0     1     2 ... 24061 24062 24063] 

Test Index:  [ 9628  9629  9630 ... 12031 12032 12033]
Train Index:  [    0     1     2 ... 24061 24062 24063] 

Test Index:  [12034 12035 12036 ... 14437 14438 14439]
Train Index:  [    0     1     2 ... 24061 24062 24063] 

Test Index:  [14440 14441 14442 ... 16843 16844 16845]
Train Index:  [    0     1     2 ... 24061 24062 24063] 

Test Index:  [16846 16847 16848 ... 19249 19250 19251]
Train Index:  [    0     1     2 ... 24061 24062 24063] 

Test Index:  [19252 19253 19254 ... 21655 21656 21657]
Train I

In [28]:
print(np.mean(scores))

-1.96425803420012


In [29]:
yhat = best_svr.predict(input_test)

yhat = sc_y.inverse_transform(yhat)
y_test = sc_y.inverse_transform(output_test)

# yhat = best_svr.predict(x_test)

# yhat = sc_y.inverse_transform(yhat)
# y_test = sc_y.inverse_transform(y_test)

mse = mean_squared_error(y_test,yhat)

sum_err = []
for i in range(len(y_test)):
    err = abs(y_test[i]-yhat[i])*10e4
    sum_err.append(err)
    print(i,"-> Pre ",yhat[i]," vs Acc",y_test[i]," err = ",err)
        #print("Acc ",y_test[:5,:])

 err =  [ 36.43387549 556.61872734]
1065 -> Pre  [1.13660939 1.13637922]  vs Acc [1.13812 1.13688]  err =  [151.06139785  50.07809712]
1066 -> Pre  [1.15734995 1.15742292]  vs Acc [1.1639  1.16303]  err =  [655.0052333  560.70799907]
1067 -> Pre  [1.12125733 1.12095261]  vs Acc [1.1235 1.123 ]  err =  [224.26651731 204.73883621]
1068 -> Pre  [1.09119778 1.09135128]  vs Acc [1.09695 1.09631]  err =  [575.22210989 495.87172442]
1069 -> Pre  [1.05302241 1.0527952 ]  vs Acc [1.06207 1.06122]  err =  [904.75930903 842.47997372]
1070 -> Pre  [1.12459682 1.12488789]  vs Acc [1.12219 1.12249]  err =  [240.68191617 239.7885533 ]
1071 -> Pre  [1.12580961 1.12578959]  vs Acc [1.12145 1.1218 ]  err =  [435.96068442 398.95912612]
1072 -> Pre  [1.1679298  1.16837902]  vs Acc [1.17105 1.17109]  err =  [312.01968839 271.09824527]
1073 -> Pre  [1.16913991 1.16955352]  vs Acc [1.1722  1.17179]  err =  [306.00924312 223.64753891]
1074 -> Pre  [1.23884365 1.23871207]  vs Acc [1.2377  1.23766]  err =  [114

In [30]:
print(r2_score(yhat,y_test))
print("mse = ",mse)
print("sqrt(mse) = ",np.sqrt(mse))
print(mean(sum_err))
    

0.9677147630396765
mse =  6.668631954051768e-05
sqrt(mse) =  0.008166169208418209
396.50763593417656
