In [1]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import datetime
import numpy as np
import seaborn as sns
import tensorflow as tf
import pathlib
from tensorflow import keras
from keras import Sequential,activations
from keras.layers import Dense,LSTM,Reshape,Dropout,InputLayer,Flatten,Input,BatchNormalization
from keras.callbacks import TensorBoard , ModelCheckpoint

mpl.rcParams['figure.figsize'] = (12, 8)
mpl.rcParams['axes.grid'] = False

In [25]:
df = pd.read_csv('data/sp500_with_indicators.csv')
df.pop('Unnamed: 0')
date_time = pd.to_datetime(df.pop('Date'))
timestamp_s = date_time.map(pd.Timestamp.timestamp)

#Removing unnecessary columns

CLOSE = df.pop('Close')
OPEN = df.pop('Open') 
HIGH = df.pop('High') 
LOW = df.pop('Low') 
VOLUME = df.pop('Volume') 
SPREAD = df.pop('Spread')
TICKVOL = df.pop('TickVol') 
BUY_OR_SELL = df.pop('Class')
BUY_OR_SELL_NUMBER = df.pop('Class_Number')

In [36]:
# df.describe().transpose()


Unnamed: 0,RSI,CCI,STOCH,STOCH_SIGNAL,Boll_Percent,WILL_R,DON_CHIAN_pband,ADX_Pos,ADX_Neg,Aroon,Candel_Body,Candel_Upper_Shadow,Candel_Lower_Shadow
0,-39.255563,-599.457308,-42.727273,-62.077649,-0.911196,-42.727273,-0.385246,-46.186287,-46.113689,-124.0,-35.3,-16.8,-45.7
1,-40.954541,-587.894290,-51.818182,-52.027972,-0.987866,-51.818182,-0.467213,-46.900703,-46.823264,-124.0,-38.8,-16.8,-45.4
2,-43.481674,-629.146824,-65.454545,-53.333333,-1.121890,-65.454545,-0.590164,-47.916803,-44.623164,-124.0,-39.3,-16.6,-45.3
3,-47.153907,-693.717531,-84.210526,-67.161085,-1.343912,-84.210526,-0.778689,-48.863007,-42.858173,-120.0,-40.1,-16.6,-46.2
4,-40.752328,-625.351300,-43.010753,-64.225275,-0.939909,-43.010753,-0.467213,-47.472429,-45.007243,-116.0,-34.0,-16.6,-45.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...
91516,-53.019307,-713.104466,-79.113924,-84.910448,-1.390251,-79.113924,-0.791139,-51.634283,-44.919257,-132.0,-37.8,-14.8,-45.9
91517,-49.306621,-694.271072,-71.518987,-76.582278,-1.287984,-71.518987,-0.715190,-52.583513,-46.287148,-132.0,-36.6,-15.8,-45.7
91518,-51.570574,-696.677727,-77.848101,-76.160338,-1.329628,-77.848101,-0.778481,-53.257408,-47.258265,-132.0,-38.8,-16.5,-45.5
91519,-53.126705,-698.856547,-81.699346,-77.022145,-1.346415,-81.699346,-0.822785,-53.913217,-46.954347,-132.0,-38.5,-16.0,-45.7


In [75]:
def normalize(data:pd.core.frame.DataFrame,data_std=None,data_mean=None):
    if(data_std is None):
        data_std = data.std()
    if(data_mean is None):
        data_mean = data.mean()


    data_normalized = (data-data_mean)/data_std

    return data_normalized, data_std,data_mean
    
def min_max_scaler(data:pd.core.frame.DataFrame,minimum:float,maximum:float):
    data_max = data.describe().transpose()['max']
    data_min = data.describe().transpose()['min']
    data_std = (data-data_min)/(data_max-data_min)
    data_scaled = data_std*(abs(minimum)+maximum)+minimum
    return data_scaled


In [59]:
data_normalized , data_std , data_mean = normalize(df) 

data_scaled = min_max_scaler(data_normalized,-1,1)

In [60]:
data_scaled.describe().transpose()



Unnamed: 0,count,mean,std,min,25%,50%,75%,max
RSI,91521.0,0.042346,0.276058,-1.0,-0.142144,0.045682,0.23114,1.0
CCI,91521.0,0.0116,0.183201,-1.0,-0.126138,0.024106,0.150153,1.0
STOCH,91521.0,0.072877,0.599633,-1.0,-0.454545,0.111111,0.617391,1.0
STOCH_SIGNAL,91521.0,0.072876,0.55583,-1.0,-0.425926,0.103566,0.586667,1.0
Boll_Percent,91521.0,0.012309,0.314085,-1.0,-0.234456,0.032681,0.262322,1.0
WILL_R,91521.0,0.072877,0.599633,-1.0,-0.454545,0.111111,0.617391,1.0
DON_CHIAN_pband,91521.0,0.083743,0.596811,-1.0,-0.439716,0.126214,0.625806,1.0
ADX_Pos,91521.0,-0.383765,0.235053,-1.0,-0.553026,-0.402243,-0.237762,1.0
ADX_Neg,91521.0,-0.379483,0.239576,-1.0,-0.549745,-0.402972,-0.234512,1.0
Aroon,91521.0,0.049111,0.621997,-1.0,-0.541667,0.208333,0.625,1.0


In [73]:
class dataProcessing():

    def __init__(self ,data:pd.core.frame.DataFrame , output:pd.core.series.Series , input_width:int, stockname:str,
                    min_max :bool =False,minimum:float=1.0 , maximum:float=1.0):

        self.stockname :str  = stockname 
        self.input_width : int = input_width
        self.data :pd.core.frame.DataFrame = data
        self.output : pd.core.series.Series = output
        self.column_indices : list[str] = {name: i for i, name in enumerate(data.columns)}
        self.num_features : int = data.shape[1]
        
        #slit into test and train data
        n = len(data)

        self.input_train_dataset = data[:int(0.90 * n)]
        self.output_train_dataset = output[:int(0.90 * n)]


        self.input_test_dataset = data[int(0.90 * n):]
        self.output_test_dataset = output[int(0.90 * n):]


        #reset indecies
        self.input_test_dataset = self.input_test_dataset.reset_index()
        self.input_test_dataset.pop('index')
        
        self.output_test_dataset = self.output_test_dataset.reset_index()
        self.output_test_dataset = self.output_test_dataset['Class_Number']

        #Normalizing The Data

        self.input_train_dataset, self.input_train_std ,self.input_train_mean = normalize(self.input_train_dataset)
        self.input_test_dataset ,_,_ = normalize(self.input_test_dataset,data_std=self.input_train_std,data_mean=self.input_train_mean)


        # self.input_train_std = self.input_train_dataset.std()  
        # self.input_train_mean = self.input_train_dataset.mean()
        
        # self.input_train_dataset = (self.input_train_dataset - self.input_train_mean) / self.input_train_std
        # self.input_test_dataset = (self.input_test_dataset - self.input_train_mean) / self.input_train_std

        #Min max scaling 

        if(min_max):
            self.input_train_dataset = min_max_scaler(self.input_train_dataset,minimum,maximum)
            self.input_test_dataset = min_max_scaler(self.input_test_dataset,minimum,maximum)
            

    def plot_normalized_data(self):
        data_std = (self.data - self.input_train_mean) / self.input_train_std
        data_std = data_std.melt(var_name='Column', value_name='Normalized')
        plt.figure(figsize=(40, 12))
        ax = sns.violinplot(x='Column', y='Normalized', data=data_std)
        _ = ax.set_xticklabels(self.data.keys(), rotation=90)

    def make_windows(self,input_data:pd.core.frame.DataFrame , output_data:pd.core.series.Series):
        
        window_input = []
        window_output=[]

        for i in range(self.input_width,len(input_data)):

            window_input.append(input_data[i-self.input_width:i].reset_index())
            
            window_output.append(output_data[i])

            window_input[-1].pop('index')
            
            #convert pd.DataFrame to numpy
            window_input[-1]= window_input[-1].to_numpy() 

        #convert list to numpy
        window_input = np.asarray(window_input)
        window_output = np.asarray(window_output)

        window_output = tf.one_hot(window_output,depth=2)

        return window_input,window_output  



In [76]:
data = dataProcessing(  data=df,
                        output=BUY_OR_SELL_NUMBER,
                        input_width=256,
                        stockname='sp500_with_Indicator',
                        min_max=True,
                        minimum=-1.0,
                        maximum=1.0
                        )


In [77]:
input_window , output_window = data.make_windows(   
                                                    input_data=data.input_train_dataset,
                                                    output_data=data.output_train_dataset,
                                                )

In [82]:
def compile_and_fit(model,modelname,data:dataProcessing,input_window,output_window):
    MAX_EPOCHS = 25

    #check if path is available
    path = f'models/{modelname}/{data.stockname}/tensorboard/logs/fit'
    pathlib.Path(path).mkdir(parents=True,exist_ok=True)

    
    log_dir =f'models/{modelname}/{data.stockname}/tensorboard/logs/fit/{datetime.datetime.now().strftime("(%Y-%m-%d)-(%H-%M-%S)")}'
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)


    model.compile(loss='binary_crossentropy',
                    optimizer='adam',
                    metrics=[tf.metrics.BinaryAccuracy()])

    history = model.fit(x=input_window,y= output_window ,validation_split=0.1, epochs=MAX_EPOCHS,verbose=2,callbacks=[tensorboard_callback])
    
    return history

In [83]:
input_layer = keras.layers.Input(shape=(256,13),name = 'Input_Layer')

reshape = Reshape(target_shape=(256,13,1),name='Reshape_Layer')(input_layer)

locally_connected_1 = keras.layers.Conv2D(filters=64 ,kernel_size=(5,13),activation='tanh')(reshape)

cnn_1 = keras.layers.Conv2D(filters=64,kernel_size=(8,13),activation='relu')(reshape)



dense = keras.layers.Dense(4096,activation='relu',name='Dense_1')(locally_connected_1)
output = keras.layers.Dense(2,activation= 'softmax',name='Output_Layer')(dense)



In [84]:
model = keras.Model(
    inputs = input_layer,
    outputs = output
)
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 256, 13)]         0         
                                                                 
 Reshape_Layer (Reshape)     (None, 256, 13, 1)        0         
                                                                 
 conv2d (Conv2D)             (None, 252, 1, 64)        4224      
                                                                 
 Dense_1 (Dense)             (None, 252, 1, 4096)      266240    
                                                                 
 Output_Layer (Dense)        (None, 252, 1, 2)         8194      
                                                                 
Total params: 278,658
Trainable params: 278,658
Non-trainable params: 0
_________________________________________________________________


In [10]:
# keras.utils.plot_model(model)


In [None]:

#name of the model 
name = "4_(CNN)_(22_03_17)"

compile_and_fit(model=model , modelname=name,
                data = data,input_window=input_window,output_window=output_window)

In [60]:
# model.save('models/secondcnn(22-03-11)/model.h5')
# model = keras.models.load_model('models/firstcnn/model.h5')

<bound method Layer.count_params of <keras.engine.functional.Functional object at 0x0000023521E5A8E0>>

In [17]:
input_test , output_test = data.make_windows(data.input_test_dataset,data.output_test_dataset)

In [18]:
predicted=model.predict(input_test)

In [21]:
predicted_list = predicted.tolist()
output_list = output_test.numpy().tolist()

In [15]:
counter = 0
for i in range(len(predicted_list)):
    if(predicted_list[i][0]>0.9 or predicted_list[i][1]>0.9):
        counter+=1

counter

6397

In [23]:
real = []
pred= []

for index in range(len(predicted_list)):
    
    if(predicted_list[index][0]>0.9):
        pred.append(predicted_list[index])
        real.append(output_list[index])
    
    elif(predicted_list[index][1]>0.9):
        pred.append(predicted_list[index])
        real.append(output_list[index])

len(real) 

6397

In [25]:
m = tf.keras.metrics.BinaryAccuracy()
m.update_state(y_true = real,y_pred = pred)
m.result().numpy()

0.5049242