In [1]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import datetime
import numpy as np
import seaborn as sns
import tensorflow as tf
import pathlib
from tensorflow import keras
from keras import Sequential,activations
from keras.layers import Dense,LSTM,Reshape,Dropout,InputLayer,Flatten,Input,BatchNormalization
from keras.callbacks import TensorBoard , ModelCheckpoint

mpl.rcParams['figure.figsize'] = (12, 8)
mpl.rcParams['axes.grid'] = False

In [2]:
df = pd.read_csv('data/sp500_with_indicators.csv')
df.pop('Unnamed: 0')
date_time = pd.to_datetime(df.pop('Date'))
timestamp_s = date_time.map(pd.Timestamp.timestamp)

#Removing unnecessary columns

CLOSE = df.pop('Close')
OPEN = df.pop('Open') 
HIGH = df.pop('High') 
LOW = df.pop('Low') 
VOLUME = df.pop('Volume') 
SPREAD = df.pop('Spread')
TICKVOL = df.pop('TickVol') 
BUY_OR_SELL = df.pop('Class')
BUY_OR_SELL_NUMBER = df.pop('Class_Number')

In [37]:
class dataProcessing():

    def __init__(self ,data:pd.core.frame.DataFrame , output:pd.core.series.Series , input_width:int, stockname:str):

        self.stockname :str  = stockname 
        self.input_width : int = input_width
        self.data :pd.core.frame.DataFrame = data
        self.output : pd.core.series.Series = output
        self.column_indices : list[str] = {name: i for i, name in enumerate(data.columns)}
        self.num_features : int = data.shape[1]
        
        #slit into test and train data
        n = len(data)

        self.input_train_dataset = data[:int(0.90 * n)]
        self.output_train_dataset = output[:int(0.90 * n)]


        self.input_test_dataset = data[int(0.90 * n):]
        self.output_test_dataset = output[int(0.90 * n):]


        #reset indecies
        self.input_test_dataset = self.input_test_dataset.reset_index()
        self.input_test_dataset.pop('index')
        
        self.output_test_dataset = self.output_test_dataset.reset_index()
        self.output_test_dataset = self.output_test_dataset['Class_Number']

        #Normalizing The Data

        self.input_train_std = self.input_train_dataset.std()  
        self.input_train_mean = self.input_train_dataset.mean()
        
        self.input_train_dataset = (self.input_train_dataset - self.input_train_mean) / self.input_train_std
        self.input_test_dataset = (self.input_test_dataset - self.input_train_mean) / self.input_train_std

    def plot_normalized_data(self):
        data_std = (self.data - self.input_train_mean) / self.input_train_std
        data_std = data_std.melt(var_name='Column', value_name='Normalized')
        plt.figure(figsize=(40, 12))
        ax = sns.violinplot(x='Column', y='Normalized', data=data_std)
        _ = ax.set_xticklabels(self.data.keys(), rotation=90)

    def make_windows(self,input_data:pd.core.frame.DataFrame , output_data:pd.core.series.Series):
        
        window_input = []
        window_output=[]

        for i in range(self.input_width,len(input_data)):

            window_input.append(input_data[i-self.input_width:i].reset_index())
            
            window_output.append(output_data[i])

            window_input[-1].pop('index')
            
            #convert pd.DataFrame to numpy
            window_input[-1]= window_input[-1].to_numpy() 

        #convert list to numpy
        window_input = np.asarray(window_input)
        window_output = np.asarray(window_output)

        window_output = tf.one_hot(window_output,depth=2)

        return window_input,window_output  



In [36]:
data = dataProcessing(df,input_width=256,output=BUY_OR_SELL_NUMBER,stockname='sp500_with_Indicator')


In [34]:
input_window , output_window = data.make_windows(input_data=data.input_train_dataset ,output_data=data.output_train_dataset)

In [42]:
def compile_and_fit(model,modelname,data:dataProcessing,input_window,output_window):
    MAX_EPOCHS = 25

    #check if path is available
    path = f'models/{modelname}/{data.stockname}/tensorboard/logs/fit'
    pathlib.Path(path).mkdir(parents=True,exist_ok=True)

    
    log_dir =f'models/{modelname}/{data.stockname}/tensorboard/logs/fit/{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}'
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)


    model.compile(loss='binary_crossentropy',
                    optimizer='adam',
                    metrics=[tf.metrics.BinaryAccuracy(),tf.metrics.BinaryCrossentropy()])

    history = model.fit(x=input_window,y= output_window ,validation_split=0.1, epochs=MAX_EPOCHS,verbose=2,callbacks=[tensorboard_callback])
    
    return history

In [38]:


inputs = keras.layers.Input(shape=(256,13))
reshape = Reshape((256,13,1))(inputs)
cnn0 = keras.layers.Conv2D(filters=20,kernel_size=(1,13),activation='relu')(reshape)
cnn1 = keras.layers.Conv2D(filters=20,kernel_size=(2,13),activation='relu')(reshape)
cnn2 = keras.layers.Conv2D(filters=20,kernel_size=(4,13),activation='relu')(reshape)
cnn3 = keras.layers.Conv2D(filters=20,kernel_size=(8,13),activation='relu')(reshape)
cnn4 = keras.layers.Conv2D(filters=20,kernel_size=(16,13),activation='relu')(reshape)
cnn5 = keras.layers.Conv2D(filters=20,kernel_size=(32,13),activation='relu')(reshape)
cnn6 = keras.layers.Conv2D(filters=20,kernel_size=(64,13),activation='relu')(reshape)
cnn7 = keras.layers.Conv2D(filters=20,kernel_size=(128,13),activation='relu')(reshape)
cnn8 = keras.layers.Conv2D(filters=20,kernel_size=(256,13),activation='relu')(reshape)
flatten0 = keras.layers.Flatten()(cnn0)
flatten1 = keras.layers.Flatten()(cnn1)
flatten2 = keras.layers.Flatten()(cnn2)
flatten3 = keras.layers.Flatten()(cnn3)
flatten4 = keras.layers.Flatten()(cnn4)
flatten5 = keras.layers.Flatten()(cnn5)
flatten6 = keras.layers.Flatten()(cnn6)
flatten7 = keras.layers.Flatten()(cnn7)
flatten8 = keras.layers.Flatten()(cnn8)
concatinate = keras.layers.Concatenate()([flatten0,flatten1,flatten2,flatten3,flatten4,flatten5,flatten6,flatten7,flatten8])
dense = keras.layers.Dense(256,activation='relu')(concatinate)
output = keras.layers.Dense(2,activation= 'softmax')(dense)



In [39]:
model = keras.Model(
    inputs = inputs,
    outputs = output
)

In [43]:
compile_and_fit(model=model , modelname='secondcnn(22-03-11)',data = data,input_window=input_window,output_window=output_window)

Epoch 1/25
2310/2310 - 40s - loss: 0.6911 - binary_accuracy: 0.5311 - binary_crossentropy: 0.6911 - val_loss: 0.6918 - val_binary_accuracy: 0.5118 - val_binary_crossentropy: 0.6918 - 40s/epoch - 17ms/step
Epoch 2/25
2310/2310 - 37s - loss: 0.6912 - binary_accuracy: 0.5311 - binary_crossentropy: 0.6912 - val_loss: 0.6934 - val_binary_accuracy: 0.5118 - val_binary_crossentropy: 0.6934 - 37s/epoch - 16ms/step
Epoch 3/25
2310/2310 - 37s - loss: 0.6912 - binary_accuracy: 0.5311 - binary_crossentropy: 0.6912 - val_loss: 0.6941 - val_binary_accuracy: 0.5118 - val_binary_crossentropy: 0.6941 - 37s/epoch - 16ms/step
Epoch 4/25
2310/2310 - 36s - loss: 0.6912 - binary_accuracy: 0.5311 - binary_crossentropy: 0.6912 - val_loss: 0.6936 - val_binary_accuracy: 0.5118 - val_binary_crossentropy: 0.6936 - 36s/epoch - 16ms/step
Epoch 5/25
2310/2310 - 37s - loss: 0.6912 - binary_accuracy: 0.5311 - binary_crossentropy: 0.6912 - val_loss: 0.6935 - val_binary_accuracy: 0.5118 - val_binary_crossentropy: 0.6935

KeyboardInterrupt: 

In [10]:
model.save('models/secondcnn(22-03-11)/model.h5')
# model = keras.models.load_model('models/firstcnn/model.h5')

In [12]:
iw , ow = data.make_windows(input_data=data.input_test_dataset , output_data=tds)

In [13]:
predicted=model.predict(iw)

In [14]:
predicted_list = predicted.tolist()
output_list = ow.numpy().tolist()

In [15]:
counter = 0
for i in range(len(predicted_list)):
    if(predicted_list[i][0]>0.9 or predicted_list[i][1]>0.9):
        counter+=1

counter

6397

In [16]:
real = []
pred= []

for index in range(len(predicted_list)):
    
    if(predicted_list[index][0]>0.9):
        pred.append(predicted_list[index])
        real.append(output_list[index])
    
    elif(predicted_list[index][1]>0.9):
        pred.append(predicted_list[index])
        real.append(output_list[index])

len(real) 

6397

In [26]:
m = tf.keras.metrics.BinaryAccuracy()
m.update_state(y_true = real,y_pred = pred)
m.result().numpy()

0.5049242