In [8]:
import pandas as pd
import numpy as np
from datetime import datetime

import talib

from ta.volatility import BollingerBands
from ta.trend import MACD

import plotly as py
from plotly import tools
import plotly.graph_objects as go

In [9]:
def Heiken_Ashi(prices):

        #################################################

        # prices  : dataframe of prices
        # periods : periods of which to create the candles

        # return  : Heiken_Ashi OHLC candles

        #################################################

        HA_close = prices[['open', 'high', 'low', 'close']].sum(axis=1)/4

        HA_open = HA_close.copy()

        HA_open.iloc[0] = HA_close.iloc[0]

        HA_high = HA_close.copy()

        HA_low = HA_close.copy()

        for i in range(1, len(prices)):

            HA_open.iloc[i] = (HA_open.iloc[i-1] + HA_close.iloc[i-1])/2

            HA_high.iloc[i] = np.array(
                [prices.high.iloc[i], HA_open.iloc[i], HA_close.iloc[i]]).max()

            HA_low.iloc[i] = np.array(
                [prices.low.iloc[i], HA_open.iloc[i], HA_close.iloc[i]]).min()

        return HA_open, HA_high, HA_low, HA_close


In [10]:

def preprocessing(file, s=0, create_file=False):


    data = pd.read_csv(file)
    data.set_index('date', inplace=True, drop=True)
    
    
    features = data[['open', 'high', 'low', 'close', 'volume', 'dayOfweek']].copy(deep=False)
    targets = data[['open_24', 'close_24']].copy(deep=False)
    targets = pd.DataFrame(data=targets, dtype=np.float64)

    periods = [4,8,16,32]

    #------------------------------------------------------------#
    # Create day of week :
    #------------------------------------------------------------#

    features['isFriday'] = np.where(features['dayOfweek'] == "Friday", 1, 0)
    features['isMonday'] = np.where(features['dayOfweek'] == "Monday", 1, 0)
    features = features.drop(['dayOfweek'], axis=1)
    features = pd.DataFrame(data=features, dtype=np.float64)

    #------------------------------------------------------------#
    # Momentum (MOM) :
    #------------------------------------------------------------#

    for i in range(0, len(periods)):
        features['MOM_{i}'.format(i=periods[i])] = talib.MOM(
            features.close.values, timeperiod=periods[i])

    print("--------- Mometum Successful ---------")

    #------------------------------------------------------------#
    # Stochastic oscillator (STOCH):
    #------------------------------------------------------------#

    for i in range(0, len(periods)):
        K, D = talib.STOCH(
            close=features['close'],
            high=features['high'],
            low=features['low'],
            fastk_period=periods[i]
        )
        features['K_{i}'.format(i=periods[i])] = K
        features['D_{i}'.format(i=periods[i])] = D

    print("--------- Stochastic oscillator Successful ---------")

    #------------------------------------------------------------#
    # Williams %R (WILLR) :
    #------------------------------------------------------------#

    for i in range(len(periods)):
        features['WILLR_{i}'.format(i=periods[i])] = talib.WILLR(
            high=features['high'],
            low=features['low'],
            close=features['close'],
            timeperiod=periods[i]
        )

    print("--------- Williams %R Successful ---------")

    #------------------------------------------------------------#
    #  Rate of change (PROCP) :
    #------------------------------------------------------------#

    for i in range(len(periods)):
        features['ROCP_{i}'.format(i=periods[i])] = talib.ROCP(
            features['close'],
            timeperiod=periods[i]
        )

    print("--------- Rate of change Successful ---------")

    #------------------------------------------------------------#
    # Weighted Closing Price (WPC) :
    #------------------------------------------------------------#

    features['WPC'] = talib.WCLPRICE(
        high=features['high'],
        low=features['low'],
        close=features['close']
    )

    print("--------- Weighted Closing Price Successful ---------")

    #------------------------------------------------------------#
    # Accumulation Distribution Line (ADL) :
    #------------------------------------------------------------#

    features['ADL'] = talib.AD(
        high=features['high'],
        low=features['low'],
        close=features['close'],
        volume=features['volume']
    )

    print("--------- Accumulation Distribution Line Successful ---------")

    #------------------------------------------------------------#
    # Accumulation Distribution Oscillator (ADOSC) :
    #------------------------------------------------------------#

    
    for i in range(len(periods)):
        features['ADOSC_{i},{j}'.format(i=periods[i], j=periods[i]+10)] = talib.ADOSC(
            high=features['high'],
            low=features['low'],
            close=features['close'],
            volume=features['volume'],
            fastperiod=periods[i],
            slowperiod=periods[i]+1
        )

    print("--------- Accumulation Distribution Oscillator Successful ---------")

    #------------------------------------------------------------#
    # Commodity Channel Index (CCI) :
    #------------------------------------------------------------#

    for i in range(len(periods)):
        features['CCI_{}'.format(periods[i])] = talib.CCI(
            high=features['high'],
            low=features['low'],
            close=features['close'],
            timeperiod=periods[i]
        )

    print("--------- Commodity Channel Index Successful ---------")

    #------------------------------------------------------------#
    # Heikin Ashi :
    #------------------------------------------------------------#

    Open, High, Low, Close = Heiken_Ashi(features)
    features['HA_open'] = Open
    features['HA_high'] = High
    features['HA_low'] = Low
    features['HA_close'] = Close

    print("--------- Heikin Ashi Successful ---------")

    #------------------------------------------------------------#
    # Moving Average Convergence/Divergence (MACD) :
    #------------------------------------------------------------#
    for i in range(0,len(periods)):
        indicator_MACD = MACD(
            close=features['close'],
            n_fast=((periods[i]/2)-1),
            n_slow=periods[i],
            n_sign=periods[i]/2.8,
            fillna=True
        )
        features['MACD_{}'.format(int((periods[i]/2)-1))] = indicator_MACD.macd()
        features['MACDsignal_{}'.format(int(periods[i]/2.8))] = indicator_MACD.macd_signal()
        features['MACDhist_{}'.format(periods[i])] = indicator_MACD.macd_diff()
    
    print("--------- Moving Average Convergence/Divergence Successful ---------")

    #------------------------------------------------------------#
    # Exponential Moving Average (EMA) :
    #------------------------------------------------------------#

    for i in range(0,len(periods)):
        features['EMA_{i}'.format(i=periods[i])] = talib.EMA(
            features['close'],
            timeperiod=periods[i])

    print("--------- Exponential Moving Average Successful ---------")

    #------------------------------------------------------------#
    # Bollinger Bands (BBANDS) :
    #------------------------------------------------------------#
    for i in range(0,len(periods)):
        indicator_bb = BollingerBands(close= features["close"], n=periods[i], ndev=2)
        features['bb_bbm_{}'.format(periods[i])] = indicator_bb.bollinger_mavg()
        features['bb_bbh_{}'.format(periods[i])] = indicator_bb.bollinger_hband()
        features['bb_bbl_{}'.format(periods[i])] = indicator_bb.bollinger_lband()

    print("--------- Bollinger Bands Successful ---------")

    #------------------------------------------------------------#
    # Relative Strange index (RSI) :
    #------------------------------------------------------------#

    for i in range(len(periods)):
        features['RSI_{i}'.format(i=periods[i])] = talib.RSI(
           features['close'],
            timeperiod=periods[i]
        )

    print("--------- Relative Strange index Successful ---------")

    features = features.fillna(method='bfill')

    # ----------- Create File .csv------------
    if create_file == True:
        _csv = pd.concat([features, targets], axis=1)
        _csv.to_csv(r'dataset/features/EURUSD_4.csv')

    return features, targets


In [11]:
features,targets = preprocessing(file=r'dataset\data\finish\EURUSD\dataset_H1_EURUSD.csv',create_file=True)

--------- Mometum Successful ---------
--------- Stochastic oscillator Successful ---------
--------- Williams %R Successful ---------
--------- Rate of change Successful ---------
--------- Weighted Closing Price Successful ---------
--------- Accumulation Distribution Line Successful ---------
--------- Accumulation Distribution Line Successful ---------
--------- Commodity Channel Index Successful ---------
--------- Heikin Ashi Successful ---------
--------- Moving Average Convergence/Divergence Successful ---------
--------- Exponential Moving Average Successful ---------
--------- Bollinger Bands Successful ---------
--------- Relative Strange index Successful ---------
