In [2]:
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.preprocessing import StandardScaler

In [3]:
df = pd.read_csv("/Users/omid/tmp/stockMarketPrediction_DL/tse_en.csv")
df[df.Symbol == "كاما"].head()

Unnamed: 0,Date,Symbol,Name,Yesterday,Open,Last,Close,Close Changes,Low,High,Number Traded,Volume,Value Traded
1,2008-12-06,كاما,باما,2580,2657,2657,2605,25,2656.0,2657.0,14,21000,55793000
91,2008-12-07,كاما,باما,2605,2683,2565,2641,36,2565.0,2683.0,18,35139,93799687
271,2008-12-08,كاما,باما,2641,2571,2591,2596,-45,2569.0,2621.0,25,42835,110269609
356,2008-12-10,كاما,باما,2596,2571,2565,2586,-10,2519.0,2571.0,7,10557,26763973
467,2008-12-13,كاما,باما,2586,2509,2557,2573,-13,2509.0,2557.0,8,20845,53055381


In [4]:
bama = df[df.Symbol == "كاما"]
bama.index = pd.to_datetime(bama['Date'])
bama = bama.drop(columns=['Symbol', 'Name', 'Date']).dropna()
bama = bama.head(2500)

In [4]:
import ta
#https://technical-analysis-library-in-python.readthedocs.io/en/latest/ta.html#volatility-indicators
#bama = ta.add_all_ta_features(bama, open="Open", high="High", low="Low", close="Close", volume="Volume")

import pandas_ta
#https://github.com/twopirllc/pandas-ta

#import talib
#https://github.com/mrjbq7/ta-lib
#https://ta-lib.org/function.html


# 1 - RSI - Relative Strength Index
rsi = pd.DataFrame()
for i in range(6, 21):
    indicator_rsi = ta.momentum.RSIIndicator(close=bama.Last, window=i)
    string = "RSI_" + str(i) + "d"
    rsi[string] = indicator_rsi.rsi()


# 2 - Williams %R
williamsR = pd.DataFrame()
for i in range(6, 21):
    indicator_WilliamsR = ta.momentum.WilliamsRIndicator(high=bama.High, low=bama.Low, close=bama.Last, lbp=i)
    string = "Williams %R_" + str(i) + "d"
    williamsR[string] = indicator_WilliamsR.williams_r()


# 3 - WMA - Weighted Moving Average
wma = pd.DataFrame()
for i in range(6, 21):
    indicator_wma = ta.trend.WMAIndicator(close=bama.Last, window=i)
    string = "WMA_" + str(i) + "d"
    wma[string] = indicator_wma.wma()


# 4 - EMA - Exponential Moving Average
ema = pd.DataFrame()
for i in range(6, 21):
    indicator_ema = ta.trend.EMAIndicator(close=bama.Last, window=i)
    string = "EMA_" + str(i) + "d"
    ema[string] = indicator_ema.ema_indicator()


# 5 - SMA - Simple Moving Average
sma = pd.DataFrame()
for i in range(6, 21):
    indicator_sma = ta.trend.SMAIndicator(close=bama.Last, window=i)
    string = "SMA_" + str(i) + "d"
    sma[string] = indicator_sma.sma_indicator()


# 6 - HMA - Hull Exponential Moving Average
hma = pd.DataFrame()
for i in range(6, 21):
    string = "HMA_" + str(i) + "d"
    hma[string] = pandas_ta.hma(close=bama.Last, length=i)


# 7 - Triple EMA - Triple Exponential Moving Average
tema = pd.DataFrame()
for i in range(6, 21):
    string = "Triple EMA_" + str(i) + "d"
    tema[string] = pandas_ta.tema(close=bama.Last, length=i)


# 8 - CCI - Commodity Channel Index
cci = pd.DataFrame()
for i in range(6, 21):
    indicator_cci = ta.trend.CCIIndicator(high=bama.High, low=bama.Low, close=bama.Last, window=i)
    string = "CCI_" + str(i) + "d"
    cci[string] = indicator_cci.cci()


# 9 - CMO - Chande Momentum Oscillator
cmo = pd.DataFrame()
for i in range(6, 21):
    string = "CMO_" + str(i) + "d"
    cmo[string] = pandas_ta.cmo(close=bama.Last, length=i)


# 10 - MACD - Moving Average Convergence Divergence
macd = pd.DataFrame()
for i in range(6, 21):
    indicator_macd = ta.trend.MACD(close=bama.Last, window_slow=26, window_fast=12, window_sign=9)
    string = "MACD_" + str(i) + "d"
    macd[string] = indicator_macd.macd()


# 11 - PPO - The Percentage Price Oscillator
ppo = pd.DataFrame()
for i in range(6, 21):
    indicator_ppo = ta.momentum.PercentagePriceOscillator(close=bama.Last, window_slow=26, window_fast=12, window_sign=9)
    string = "PPO_" + str(i) + "d"
    ppo[string] = indicator_ppo.ppo()


# 12 - ROC - Rate of Change
roc = pd.DataFrame()
for i in range(6, 21):
    indicator_roc = ta.momentum.ROCIndicator(close=bama.Last, window=i)
    string = "ROC_" + str(i) + "d"
    roc[string] = indicator_roc.roc()


# 13 - CMF - Chaikin Money Flow
cmf = pd.DataFrame()
for i in range(6, 21):
    string = "CMF_" + str(i) + "d"
    cmf[string] = pandas_ta.cmf(high=bama.High, low=bama.Low, close=bama.Last, volume=bama.Volume, open=bama.Open, length=i)

    
# 14 - ADX - Average Directional Movement Index (ADX)
adx = pd.DataFrame()
for i in range(6, 21):
    string = "ADX_" + str(i) + "d"
    sec_string = "ADX_" + str(i)
    adx[string] = pandas_ta.adx(high=bama.High, low=bama.Low, close=bama.Last, length=i)[sec_string]

    
# 15 - PSI - Parabolic Stop and Reverse (Parabolic SAR)
psi = pd.DataFrame()
for i in range(6, 21):
    indicator_psi = ta.trend.PSARIndicator(high=bama.High, low=bama.Low, close=bama.Last)
    string = "PSI_" + str(i) + "d"
    psi[string] = indicator_psi.psar()

In [5]:
rsi = rsi[60:]
williamsR = williamsR[60:]
wma = wma[60:]
ema = ema[60:]
sma = sma[60:]
hma = hma[60:]
tema = tema[60:]
cci = cci[60:]
cmo = cmo[60:]
macd = macd[60:]
ppo = ppo[60:]
roc = roc[60:]
cmf = cmf[60:]
adx = adx[60:]
psi = psi[60:]

In [6]:
pictures = list()
for i in range(0, len(rsi)):
    pic = pd.DataFrame(index=['6d', '7d', '8d', '9d', '10d', '11d', '12d', '13d', '14d', '15d', '16d', '17d', '18d', '19d', '20d'])
    pic["RSI"] = np.array(rsi[i:i+1].transpose())
    pic["Williams %R"] = np.array(williamsR[i:i+1].transpose())
    pic["WMA"] = np.array(wma[i:i+1].transpose())
    pic["EMA"] = np.array(ema[i:i+1].transpose())
    pic["SMA"] = np.array(sma[i:i+1].transpose())
    pic["HMA"] = np.array(hma[i:i+1].transpose())
    pic["Triple EMA"] = np.array(tema[i:i+1].transpose())
    pic["CCI"] = np.array(cci[i:i+1].transpose())
    pic["CMO"] = np.array(cmo[i:i+1].transpose())
    pic["MACD"] = np.array(macd[i:i+1].transpose())
    pic["PPO"] = np.array(ppo[i:i+1].transpose())
    pic["ROC"] = np.array(roc[i:i+1].transpose())
    pic["CMF"] = np.array(cmf[i:i+1].transpose())
    pic["ADX"] = np.array(adx[i:i+1].transpose())
    pic["PSI"] = np.array(psi[i:i+1].transpose())
    pictures.append(pic.transpose())

In [7]:
len(pictures)

2440

In [5]:
# labeling (Buy, Sell, Hold)

window_size = 11
counter = 0
bsh_labels = []

window_begin_idx=0
window_end_idx=0
window_middle_idx=0

min_idx=0
max_idx=0
number=0.0
mins=10000.0
maxs=0.0
while counter < len(bama.Last):
    if counter > window_size:
        window_begin_idx = counter - window_size
        window_end_idx = window_begin_idx + window_size - 1
        window_middle_idx = (window_begin_idx + window_end_idx)//2
        for i in range(window_begin_idx, window_end_idx+1):
            number = bama.Last[i]
            if number < mins:
                mins=number
                min_idx = np.where(bama.Last==mins)[0][0]
            if number > maxs:
                maxs=number
                max_idx = np.where(bama.Last==maxs)[0][0]
        if max_idx == window_middle_idx:
            bsh_labels.append("SELL")
        elif min_idx == window_middle_idx:
            bsh_labels.append("BUY")
        else:
            bsh_labels.append("HOLD")        
        mins = 10000.0
        maxs = 0.0
    counter+=1

In [9]:
len(bsh_labels)

2488

In [10]:
len(bama.Last)

2500

In [11]:
len(bama.Last) - len(bsh_labels)
# You should remove 6 rows from head and 6 rows from tail of data or actualy pictures.
# After checking, it was found that this difference is due to the fact that 6 lines from the top and 6 lines from the bottom have been removed.
# شش تای اول و شش تای آخر حذف میشوند موقع درست کردن لیبل

12

In [12]:
# labeling Up or Down

ud_labels = []

for i in range(1, len(bama.Last)):
    if bama.Last[i] < bama.Last[i-1]:
        ud_labels.append(0) # 0 = Down
    else:
        ud_labels.append(1) # 1 = Up
        
# This method miss first row from data, you should remove first row then train it

In [13]:
# making sliding window

series = np.array(pictures)
time = np.array(bama.index[60:])

split_time = 2000
time_train = time[:split_time]
x_train = series[:split_time]
time_valid = time[split_time:]
x_valid = series[split_time:]

window_size = 30
batch_size = 32
shuffle_buffer_size = 1000

def windowed_dataset(series, window_size, batch_size, shuffle_buffer):
    #series = tf.expand_dims(series, axis=-1)
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(window_size + 1, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda w: w.batch(window_size + 1))
    ds = ds.shuffle(shuffle_buffer)
    ds = ds.map(lambda w: (w[:-1], w[1:]))
    return ds.batch(batch_size).prefetch(1)

train_set = windowed_dataset(x_train, window_size, batch_size, shuffle_buffer_size)
# shape = 32 x 30 x 15 x 15

In [14]:
bsh_labels[0]

'HOLD'

In [15]:
ud_labels[0]

1

In [16]:
pictures[0]

Unnamed: 0,6d,7d,8d,9d,10d,11d,12d,13d,14d,15d,16d,17d,18d,19d,20d
RSI,63.58804,56.82258,51.96575,48.44301,45.85263,43.92351,42.47161,41.36981,40.52863,39.88395,39.38908,39.00952,38.71946,38.49938,38.33435
Williams %R,-2.5,-1.388889,-0.7518797,-0.5208333,-0.4016064,-0.3278689,-0.2785515,-0.2777778,-0.2770083,-0.234192,-55.85492,-56.92619,-60.3352,-61.13139,-61.13139
WMA,2125.524,2120.571,2115.556,2109.533,2102.291,2093.864,2084.385,2074.011,2063.362,2052.792,2049.5,2051.81,2058.158,2067.753,2079.614
EMA,2114.079,2109.701,2108.481,2110.31,2114.85,2121.666,2130.309,2140.358,2151.445,2163.254,2175.528,2188.055,2200.669,2213.239,2225.664
SMA,2110.833,2105.714,2098.0,2085.444,2069.7,2051.727,2032.25,2011.769,1994.143,1978.8,2024.812,2070.294,2112.111,2154.105,2192.3
HMA,2150.952,2155.524,2148.991,2149.644,2147.364,2156.784,2157.622,2168.409,2168.147,2173.465,2149.503,2141.722,2115.429,2102.016,2069.21
Triple EMA,2180.152,2183.988,2181.196,2171.283,2155.254,2134.718,2111.342,2086.623,2061.905,2038.343,2016.971,2000.004,1990.459,1989.165,1994.849
CCI,116.347,121.4876,118.5185,103.1128,92.36178,86.48147,85.20392,85.11055,86.65294,89.19989,54.21648,31.39534,14.90169,2.920993,-5.2112
CMO,27.17553,13.64222,3.922598,-3.133361,-8.326751,-12.19917,-15.10817,-17.3097,-18.97847,-20.22784,-21.15442,-21.81961,-22.27946,-22.58485,-22.76628
MACD,-163.7613,-163.7613,-163.7613,-163.7613,-163.7613,-163.7613,-163.7613,-163.7613,-163.7613,-163.7613,-163.7613,-163.7613,-163.7613,-163.7613,-163.7613
