## Import Libraries

In [1]:
seed_value = 0
import os
os.environ['PYTHONHASHSEED'] = str(seed_value)

import random
random.seed(seed_value)

import numpy as np
np.random.seed(seed_value)


import tensorflow as tf
tf.compat.v1.set_random_seed(seed_value)

from keras import backend as K
session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf)
tf.compat.v1.keras.backend.set_session(sess)

import pandas as pd
import matplotlib.pyplot as plt
from itertools import chain 

from tensorflow.keras import datasets, layers, models
from tensorflow.keras import regularizers

import talib

## Read Data

In [2]:
df = pd.read_csv("Data/Original/training.csv")
# df = pd.read_csv("Data/Original/validation.csv")
# df = pd.read_csv("Data/Original/testing.csv")
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,04-01-2010,545.549988,545.549988,510.924988,537.750000,457.306488,35040012.0
1,05-01-2010,574.950012,574.950012,532.700012,535.349976,455.265503,9780080.0
2,06-01-2010,539.950012,547.250000,535.325012,544.000000,462.621460,10831076.0
3,07-01-2010,544.000000,557.500000,539.000000,553.025024,470.296417,11976808.0
4,08-01-2010,554.000000,556.900024,548.000000,551.575012,469.063354,6907852.0
...,...,...,...,...,...,...,...
1727,26-12-2016,527.174988,533.000000,522.325012,523.724976,512.049377,7004598.0
1728,27-12-2016,523.924988,533.549988,523.500000,532.349976,520.482178,7974284.0
1729,28-12-2016,533.950012,534.775024,523.549988,524.849976,513.149353,4405854.0
1730,29-12-2016,525.000000,534.400024,524.750000,532.724976,520.848816,4010412.0


In [3]:
df.dtypes

Date          object
Open         float64
High         float64
Low          float64
Close        float64
Adj Close    float64
Volume       float64
dtype: object

## Labelling

In [4]:
df['result'] = 0   # 0 for hold

In [5]:
def labelling(data):
    windowsize = 14
    counterrow = 0
    numberofdays = data.shape[0]
    result = np.array(data['result'])
    while(counterrow < numberofdays):
        counterrow = counterrow + 1
        if(counterrow > windowsize):
            windowbeginindex = counterrow - windowsize
            windowendindex = windowbeginindex + windowsize - 1
            windowmiddleindex = (windowbeginindex + windowendindex)/2
            minimum = max(data['Adj Close'])
            maximum = 0
            for i in range(windowbeginindex-1, windowendindex):               
                number = data['Adj Close'].iloc[i]
                if(number < minimum):
                    minimum = number
                    minindex = i
                if(number > maximum):
                    maximum = number
                    maxindex = i
            
            result[minindex] = 1    # 1 for buy  
            result[maxindex] = 2    # 2 for sell
    return result

In [6]:
df["result"] = labelling(df)

In [7]:
df.isna().sum().sum()

42

In [8]:
df = df.fillna(-12345)

In [9]:
df.isna().sum().sum()

0

In [10]:
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,result
0,04-01-2010,545.549988,545.549988,510.924988,537.750000,457.306488,35040012.0,0
1,05-01-2010,574.950012,574.950012,532.700012,535.349976,455.265503,9780080.0,0
2,06-01-2010,539.950012,547.250000,535.325012,544.000000,462.621460,10831076.0,0
3,07-01-2010,544.000000,557.500000,539.000000,553.025024,470.296417,11976808.0,0
4,08-01-2010,554.000000,556.900024,548.000000,551.575012,469.063354,6907852.0,0
...,...,...,...,...,...,...,...,...
1727,26-12-2016,527.174988,533.000000,522.325012,523.724976,512.049377,7004598.0,0
1728,27-12-2016,523.924988,533.549988,523.500000,532.349976,520.482178,7974284.0,2
1729,28-12-2016,533.950012,534.775024,523.549988,524.849976,513.149353,4405854.0,0
1730,29-12-2016,525.000000,534.400024,524.750000,532.724976,520.848816,4010412.0,2


## Creating Features 

In [11]:
for n in range(6,21):
    df['rsi' + str(n)] = talib.RSI(df['Adj Close'].values, timeperiod=n)
    df['roc' + str(n)] = talib.ROC(df['Adj Close'].values, timeperiod=n)
    df['sma' + str(n)] = talib.SMA(df['Adj Close'].values, timeperiod=n)
    df['ema' + str(n)] = talib.EMA(df['Adj Close'].values, timeperiod=n)
    df['wma' + str(n)] = talib.WMA(df['Adj Close'].values, timeperiod=n)
    df['tema' + str(n)] = talib.TEMA(df['Adj Close'].values, timeperiod=n)
    df['william' + str(n)] = talib.WILLR(df['High'].values,df['Low'].values,df['Adj Close'].values, timeperiod=n)
    df['cci' + str(n)] = talib.CCI(df['High'].values,df['Low'].values,df['Adj Close'].values, timeperiod=n)
    df['cmo' + str(n)] = talib.CMO(df['Adj Close'].values, timeperiod=n)
    df['macd' + str(n)],df['macdSignal' + str(n)] ,df['macdHist' + str(n)]  = talib.MACD(df['Adj Close'].values,fastperiod=12, slowperiod=26, signalperiod=9)
    df['ppo' + str(n)] = talib.PPO(df['Adj Close'].values, fastperiod=12, slowperiod=26, matype=0)
    df['parabolicsar'+ str(n)] = talib.SAR(df['High'].values, df['Low'].values, acceleration=0, maximum=0)

In [12]:
rsi = df[['rsi'+str(n) for n in range(6,21)]].to_numpy()
roc = df[['roc'+str(n) for n in range(6,21)]].to_numpy()
sma = df[['sma'+str(n) for n in range(6,21)]].to_numpy()
ema = df[['ema'+str(n) for n in range(6,21)]].to_numpy()
wma = df[['wma'+str(n) for n in range(6,21)]].to_numpy()
tema = df[['tema'+str(n) for n in range(6,21)]].to_numpy()
william = df[['william'+str(n) for n in range(6,21)]].to_numpy()
cci = df[['cci'+str(n) for n in range(6,21)]].to_numpy()
cmo = df[['cmo'+str(n) for n in range(6,21)]].to_numpy()
macd = df[['macd'+str(n) for n in range(6,21)]].to_numpy()
ppo = df[['ppo'+str(n) for n in range(6,21)]].to_numpy()
parabolicsar = df[['parabolicsar'+str(n) for n in range(6,21)]].to_numpy()

In [13]:
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,result,rsi6,roc6,...,wma20,tema20,william20,cci20,cmo20,macd20,macdSignal20,macdHist20,ppo20,parabolicsar20
0,04-01-2010,545.549988,545.549988,510.924988,537.750000,457.306488,35040012.0,0,,,...,,,,,,,,,,
1,05-01-2010,574.950012,574.950012,532.700012,535.349976,455.265503,9780080.0,0,,,...,,,,,,,,,,510.924988
2,06-01-2010,539.950012,547.250000,535.325012,544.000000,462.621460,10831076.0,0,,,...,,,,,,,,,,510.924988
3,07-01-2010,544.000000,557.500000,539.000000,553.025024,470.296417,11976808.0,0,,,...,,,,,,,,,,510.924988
4,08-01-2010,554.000000,556.900024,548.000000,551.575012,469.063354,6907852.0,0,,,...,,,,,,,,,,510.924988
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1727,26-12-2016,527.174988,533.000000,522.325012,523.724976,512.049377,7004598.0,0,49.617414,-0.917574,...,510.306222,520.908854,-54.398089,59.876594,9.356047,5.724714,4.279333,1.445382,2.786118,574.950012
1728,27-12-2016,523.924988,533.549988,523.500000,532.349976,520.482178,7974284.0,2,65.329535,0.202330,...,511.908149,522.407026,-33.942526,71.373811,18.113323,6.012883,4.626043,1.386840,2.794445,574.950012
1729,28-12-2016,533.950012,534.775024,523.549988,524.849976,513.149353,4405854.0,0,49.289941,-0.775122,...,512.655744,521.617328,-52.078648,54.950556,8.517021,5.585179,4.817870,0.767309,2.791268,574.950012
1730,29-12-2016,525.000000,534.400024,524.750000,532.724976,520.848816,4010412.0,2,61.270900,0.206917,...,513.997763,522.857986,-33.536905,66.317352,16.055166,5.800637,5.014424,0.786213,2.782108,574.950012


In [14]:
train_images = [[j for j in chain.from_iterable([rsi[i],roc[i],sma[i],ema[i],william[i],cci[i],cmo[i],macd[i],ppo[i],tema[i],wma[i],parabolicsar[i]])]for i in df.index]


In [15]:
train_images

[[nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,

In [17]:
train_images = np.array(train_images)
train_images = train_images.reshape(df.shape[0]*180)

In [18]:
temp = np.sort(train_images)
temp[::-1].sort()

In [19]:
df = df.fillna(0)

In [20]:
df.isna().sum().sum()

0

## Normalization

In [21]:
def normalization(data,x):
    unique = np.unique(x, axis=0)  # array of distinct values
    unique = unique.tolist()
    a = len(unique)
    for n in range(6,21):
        for i in range(0,df.shape[0]):
            data.at[i,'rsi'+str(n)]=unique.index(data['rsi'+str(n)][i])/a
            data.at[i,'roc'+str(n)]=unique.index(data['roc'+str(n)][i])/a
            data.at[i,'sma'+str(n)]=unique.index(data['sma'+str(n)][i])/a
            data.at[i,'ema'+str(n)]=unique.index(data['ema'+str(n)][i])/a
            data.at[i,'wma' +str(n)]=unique.index(data['wma'+str(n)][i])/a
            data.at[i,'tema'+str(n)]=unique.index(data['tema'+str(n)][i])/a
            data.at[i,'william'+str(n)]=unique.index(data['william'+str(n)][i])/a
            data.at[i,'cci'+str(n)]=unique.index(data['cci'+str(n)][i])/a
            data.at[i,'cmo'+str(n)]=unique.index(data['cmo'+str(n)][i])/a
            data.at[i,'macd'+str(n)]=unique.index(data['macd'+str(n)][i])/a
            data.at[i,'ppo'+str(n)]=unique.index(data['ppo'+str(n)][i])/a
            data.at[i,'parabolicsar'+str(n)]=unique.index(data['parabolicsar'+str(n)][i])/a
        return data

In [22]:
df = normalization(df,temp)

## Saving files

In [23]:
df.to_csv('Data/Pre_Processed/training.csv')
# df.to_csv('Data/Pre_Processed/testing.csv')
# df.to_csv('Data/Pre_Processed/validation.csv')