In [16]:
import pandas as pd
import numpy as np 
from datetime import datetime

import talib

from ta.volatility import BollingerBands
from ta.trend import MACD

import plotly as py
from plotly import tools
import plotly.graph_objects as go


In [17]:
def create_candlestick(df) :

    trace_0 = go.Ohlc(x=df.index,
                    open=df['Open'],
                    high=df['High'],
                    low=df['Low'],
                    close=df['Close'],
                    name='Currency Quote')

    fig = tools.make_subplots(rows=1,cols=1,shared_xaxes=True)
    fig.append_trace(trace_0,1,1)

    py.offline.plot(fig,filename='Candlestick_chart')

In [18]:
def preprocessing(file,s=0):

    #################################################

    #file  : name of data_set
    #s : start sample at 's' 

    #return  : Heiken_Ashi OHLC candles

    ################################################# 

    data = pd.read_csv(file)
    data.set_index('date', inplace=True, drop=True)
    data = data.iloc[s:,:]
    data = pd.DataFrame(data=data, dtype=np.float64) 

    df = data.copy(deep=False)
    df.drop(df.tail(24).index,inplace=True)
  
    label = data[['open','close']].copy(deep=False)
    label = label.iloc[24:,:]
    #print(label.head())
    #print(df.head())

    label.reset_index(drop=True,inplace=True)
    label.index = df.index
    #print(label.head())
    #print(df.head())

    def Heiken_Ashi(prices):
 
        #################################################

        #prices  : dataframe of prices
        #periods : periods of which to create the candles

        #return  : Heiken_Ashi OHLC candles

        #################################################


        HA_close = prices[['open','high','low','close']].sum(axis = 1)/4

        HA_open = HA_close.copy()

        HA_open.iloc[0] = HA_close.iloc[0]

        HA_high = HA_close.copy()

        HA_low = HA_close.copy()

        for i in range(1,len(prices)):
            
            HA_open.iloc[i] = (HA_open.iloc[i-1] + HA_close.iloc[i-1])/2

            HA_high.iloc[i] = np.array([prices.high.iloc[i], HA_open.iloc[i], HA_close.iloc[i]]).max()

            HA_low.iloc[i] = np.array([prices.low.iloc[i], HA_open.iloc[i], HA_close.iloc[i]]).min()

        return HA_open,HA_high,HA_low,HA_close

    #------------------------------------------------------------#
    # Momentum (MOM) :
    #------------------------------------------------------------#

    periods = [3,4,5,8,9,10]
    #periods = [x+20 for x in periods]

    for i in range(0,len(periods)):
        df['MOM_{i}'.format(i=periods[i])] = talib.MOM(df.close.values,timeperiod = periods[i])
    print(periods)
    print("--------- Mometum Successful ---------")

    #------------------------------------------------------------#
    # Stochastic oscillator (STOCH):
    #------------------------------------------------------------#

    periods = [3,4,5,8,9,10]
    #periods = [x+20 for x in periods]
    for i in range(0,len(periods)):
        K,D = talib.STOCH(
            close = df['close'],
            high = df['high'],
            low = df['low'],
            fastk_period=12
            )
        df['K_{i}'.format(i=periods[i])] = K
        df['D_{i}'.format(i=periods[i])] = D

    print(periods)
    print("--------- Stochastic oscillator Successful ---------")

    #------------------------------------------------------------#
    # Williams %R (WILLR) :
    #------------------------------------------------------------#
    
    periods = [6,7,8,9, 10]
    #periods = [x+20 for x in periods]
    for i in range(len(periods)):
        df['WILLR_{i}'.format(i=periods[i])] = talib.WILLR(
            high = df['high'],
            low = df['low'],
            close = df['close'],
            timeperiod = periods[i]
            )
    print(periods)
    print("--------- Williams %R Successful ---------")

    #------------------------------------------------------------#
    #  Rate of change (PROCP) :
    #------------------------------------------------------------#

    periods = [12,13,14,15]
    #periods = [x+20 for x in periods]
    for i in range(len(periods)):
        df['ROCP_{i}'.format(i=periods[i])] = talib.ROCP(
            df['close'],
            timeperiod = periods[i]
        )

    print(periods)
    print("--------- Rate of change Successful ---------")

    #------------------------------------------------------------#
    # Weighted Closing Price (WPC) :
    #------------------------------------------------------------#

    df['WPC'] = talib.WCLPRICE(
            high = df['high'],
            low = df['low'],
            close = df['close']
        )

    print("--------- Weighted Closing Price Successful ---------")

    #------------------------------------------------------------#
    # Accumulation Distribution Line (ADL) :
    #------------------------------------------------------------#

    df['ADL'] = talib.AD(
        high = df['high'],
        low = df['low'],
        close = df['close'],
        volume = df['volume']
    )

    print("--------- Accumulation Distribution Line Successful ---------")

    #------------------------------------------------------------#
    # Accumulation Distribution Oscillator (ADOSC) :
    #------------------------------------------------------------#

    periods_fast = [2,3,4,5]
    #periods_fast = [x+20 for x in periods_fast]
    periods_slow = [10,12,14,16]
    #periods_slow = [x+20 for x in periods_slow]
    for i in range(len(periods_fast)):
        df['ADOSC_{i},{j}'.format(i=periods_fast[i],j=periods_slow[i])] = talib.ADOSC(
            high = df['high'],
            low = df['low'],
            close = df['close'],
            volume = df['volume'],
            fastperiod = periods_fast[i],
            slowperiod = periods_slow[i]
        )
    
    print("--------- Accumulation Distribution Line Successful ---------")

    #------------------------------------------------------------#
    # Moving Average Convergence/Divergence (MACD) :
    #------------------------------------------------------------#
    
    indicator_MACD = MACD(
        close = df['close'],
        n_fast=12,
        n_slow=26,
        n_sign=9,
        fillna=True
    )

    df['MACD_12,26'] = indicator_MACD.macd()
    df['MACD_his_12,26'] = indicator_MACD.macd_diff()
    df['MACD_signal_12,26'] = indicator_MACD.macd_signal()

    print("--------- Moving Average Convergence/Divergence Successful ---------")
    
    #------------------------------------------------------------#
    # Commodity Channel Index (CCI) :
    #------------------------------------------------------------#

    df['CCI_15'] = talib.CCI(
        high = df['high'],
        low = df['low'],
        close = df['close'],
        timeperiod = 15        
    )

    print("--------- Commodity Channel Index Successful ---------")

    #------------------------------------------------------------#
    # Bollinger Bands (BBANDS) :
    #------------------------------------------------------------#

    indicator_bb = BollingerBands(close=df["close"], n=15, ndev=2)
    df['bb_bbm_15'] = indicator_bb.bollinger_mavg()
    df['bb_bbh_15'] = indicator_bb.bollinger_hband()
    df['bb_bbl_15'] = indicator_bb.bollinger_lband()

    print("--------- Bollinger Bands Successful ---------")

    #------------------------------------------------------------#
    # Heikin Ashi :
    #------------------------------------------------------------#

    Open,High,Low,Close = Heiken_Ashi(df)
    df['HA_open'] = Open
    df['HA_high'] = High
    df['HA_low'] = Low
    df['HA_close'] = Close

    print("--------- Heikin Ashi Successful ---------")

    #------------------------------------------------------------#
    # Relative Strange index (RSI) :
    #------------------------------------------------------------#

    periods = [6,8,10,12]
    #periods = [x+20 for x in periods]
    for i in range(len(periods)):
        df['RSI_{i}'.format(i=periods[i])] = talib.RSI(
            df['close'],
            timeperiod = periods[i]
        )
    
    print(periods)
    print("--------- Relative Strange index Successful ---------")

    #------------------------------------------------------------#
    # Slope :
    #------------------------------------------------------------#

    df['Slope_4'] = talib.LINEARREG_SLOPE(df['close'], timeperiod=6)
    df = df.fillna(method='bfill')

    print("--------- Slope Successful ---------")

    #df = df.drop(['open','high','low','close','volume'],axis=1)
    df = df.drop(['volume'],axis=1)

    _csv = pd.concat([df,label],axis = 1)
    _csv.to_csv(r'dataset/pre_data.csv')

    return df,label

In [19]:

file = 'dataset/EURUSD_H1.csv'
X,Y = preprocessing(file,s=60000)


                     open    close
date                              
2016-08-09 18:00  1.11088  1.11081
2016-08-09 19:00  1.11081  1.11119
2016-08-09 20:00  1.11119  1.11163
2016-08-09 21:00  1.11164  1.11153
2016-08-09 22:00  1.11153  1.11150
                     open     high      low    close   volume
date                                                         
2016-08-08 18:00  1.10809  1.10853  1.10806  1.10814  23053.0
2016-08-08 19:00  1.10814  1.10861  1.10813  1.10828  19118.0
2016-08-08 20:00  1.10828  1.10885  1.10810  1.10877  12560.0
2016-08-08 21:00  1.10879  1.10879  1.10816  1.10850   6382.0
2016-08-08 22:00  1.10852  1.10858  1.10819  1.10844   5872.0
                     open    close
date                              
2016-08-08 18:00  1.11088  1.11081
2016-08-08 19:00  1.11081  1.11119
2016-08-08 20:00  1.11119  1.11163
2016-08-08 21:00  1.11164  1.11153
2016-08-08 22:00  1.11153  1.11150
                     open     high      low    close   volume
date          

AttributeError: module 'pandas' has no attribute 'conat'

In [5]:
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression
from sklearn.svm import LinearSVR
from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler

from numpy import mean
from numpy import std
from numpy import absolute
from sklearn.metrics import r2_score,mean_squared_error
from sklearn.model_selection import train_test_split

In [6]:
sc_X = StandardScaler()
sc_y = StandardScaler()
x = sc_X.fit_transform(X.values)
y = sc_y.fit_transform(Y.values)
# sc_X = MinMaxScaler()
# sc_y = MinMaxScaler()
# sc_X.fit(X.values)
# x = sc_X.transform(X.values)
# sc_y.fit(Y.values)
# y = sc_y.transform(Y.values)

input_train,input_test,output_train,output_test = train_test_split(x,y,test_size=0.05)

In [7]:

model = SVR(kernel='rbf',gamma='auto',coef0=0.1,C=50,epsilon=0.00001)
#model = SVR(kernel='rbf',gamma='auto',coef0=0.1,C=15,epsilon=0.0001) #pip err ~= 303
#model = SVR(kernel='rbf',gamma='auto',coef0=0.1)
#model = SVR(kernel='poly', degree=3,coef0=0.1,gamma='auto') #pip err ~= 330+

# kernel_ = ['rbf','poly']
# degree_ = [3,4,5,6]
# gamma_ = ['auto','scale',1,0.1,0.01]
# C_ = [0.1,0.01,0.001]

best_svr = MultiOutputRegressor(model)
cv = KFold(n_splits=10,shuffle=False)
scores = []
i = 1
for train_index, test_index in cv.split(input_train):
        print("=================== ",i," ===================")
        print("Train Index: ", train_index)
        print("Test Index: ", test_index, "\n")
        X_train, X_test, y_train, y_test = input_train[train_index], input_train[test_index], output_train[train_index], output_train[test_index]
        best_svr.fit(X_train, y_train)
        scores.append(best_svr.score(X_test, y_test))
        i+=1



Train Index:  [ 2407  2408  2409 ... 24061 24062 24063]
Test Index:  [   0    1    2 ... 2404 2405 2406] 

Train Index:  [    0     1     2 ... 24061 24062 24063]
Test Index:  [2407 2408 2409 ... 4811 4812 4813] 

Train Index:  [    0     1     2 ... 24061 24062 24063]
Test Index:  [4814 4815 4816 ... 7218 7219 7220] 

Train Index:  [    0     1     2 ... 24061 24062 24063]
Test Index:  [7221 7222 7223 ... 9625 9626 9627] 

Train Index:  [    0     1     2 ... 24061 24062 24063]
Test Index:  [ 9628  9629  9630 ... 12031 12032 12033] 

Train Index:  [    0     1     2 ... 24061 24062 24063]
Test Index:  [12034 12035 12036 ... 14437 14438 14439] 

Train Index:  [    0     1     2 ... 24061 24062 24063]
Test Index:  [14440 14441 14442 ... 16843 16844 16845] 

Train Index:  [    0     1     2 ... 24061 24062 24063]
Test Index:  [16846 16847 16848 ... 19249 19250 19251] 

Train Index:  [    0     1     2 ... 24061 24062 24063]
Test Index:  [19252 19253 19254 ... 21655 21656 21657] 

Train I

In [8]:
yhat = best_svr.predict(input_test)
yhat = sc_y.inverse_transform(yhat)
y_test = sc_y.inverse_transform(output_test)
mse = mean_squared_error(y_test,yhat)
sum_err = []

for i in range(len(y_test)):
    err = abs(y_test[i]-yhat[i])*10e4
    sum_err.append(err)
    #print(i,"-> Pre ",yhat[i]," vs Acc",y_test[i]," err = ",err)
print("Crossvalidation score :",np.mean(scores))
print("Abs_err = ",r2_score(yhat,y_test))
print("mse = ",mse)
print("sqrt(mse) = ",np.sqrt(mse))
print("Pips err = ",mean(sum_err),"\n")

Crossvalidation score : 0.9893578875887288
Abs_err =  0.9884610662184803
mse =  2.3067661716221796e-05
sqrt(mse) =  0.00480288056443441
Pips err =  353.5051565789188 



In [9]:
class_predict = []
class_test = []
for i in range(len(y_test)):
    if y_test[i][0] >= y_test[i][1]:
        class_test.append(0)
    else:
        class_test.append(1)
    if yhat[i][0] >= yhat[i][1]:
        class_predict.append(0)
    else:
        class_predict.append(1)

In [10]:
err_class = 0
for i in range(len(class_predict)):
    print(class_predict[i],"vs",class_test[i])
    if class_predict[i] == class_test[i]:
       err_class += 1

print(err_class/len(class_predict))


1 vs 1
0 vs 1
1 vs 0
1 vs 1
0 vs 0
1 vs 0
1 vs 1
0 vs 0
1 vs 1
0 vs 1
0 vs 0
0 vs 1
1 vs 0
0 vs 1
0 vs 0
1 vs 1
1 vs 1
1 vs 0
1 vs 1
0 vs 0
0 vs 0
1 vs 0
1 vs 0
1 vs 0
1 vs 1
1 vs 1
1 vs 1
1 vs 1
1 vs 0
0 vs 0
1 vs 0
1 vs 0
1 vs 1
0 vs 1
1 vs 1
1 vs 0
0 vs 0
0 vs 1
1 vs 0
0 vs 0
0 vs 0
0 vs 0
1 vs 0
1 vs 1
0 vs 0
0 vs 1
1 vs 0
0 vs 0
1 vs 1
1 vs 0
0 vs 0
0 vs 0
0 vs 1
0 vs 0
0 vs 0
0 vs 1
1 vs 1
0 vs 1
1 vs 1
1 vs 0
1 vs 0
0 vs 0
1 vs 1
0 vs 0
1 vs 1
1 vs 0
0 vs 1
1 vs 0
1 vs 1
1 vs 0
1 vs 1
0 vs 0
0 vs 1
1 vs 0
1 vs 1
1 vs 1
1 vs 1
1 vs 0
1 vs 0
1 vs 0
1 vs 1
1 vs 1
1 vs 0
1 vs 1
1 vs 0
1 vs 0
1 vs 1
1 vs 1
0 vs 0
1 vs 0
1 vs 0
1 vs 0
1 vs 1
0 vs 1
0 vs 1
1 vs 0
0 vs 1
0 vs 1
1 vs 0
0 vs 1
0 vs 1
0 vs 1
0 vs 0
0 vs 0
1 vs 0
1 vs 0
1 vs 0
0 vs 0
1 vs 1
1 vs 1
1 vs 1
0 vs 1
0 vs 1
0 vs 1
1 vs 0
1 vs 0
1 vs 0
1 vs 0
1 vs 0
0 vs 1
0 vs 1
0 vs 0
1 vs 1
1 vs 0
1 vs 0
1 vs 1
1 vs 0
1 vs 1
1 vs 0
1 vs 1
1 vs 1
0 vs 1
1 vs 0
1 vs 0
0 vs 1
1 vs 0
0 vs 1
0 vs 0
1 vs 1
1 vs 1
0 vs 1
1 vs 0
1 vs 1