# STOCK MARKET SHORT PREDICTION MODEL

@author: Matheus José Oliveira dos Santos

## 1. Imports

In [33]:
import sys
import os
from datetime import date, datetime

import pandas as pd
import tensorflow as tf
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score

sys.path.append(os.getcwd()+'\\..\\src')
import db_interface

## 2. Get data

In [3]:
my_db = db_interface.DB_interface("FINANCE_DB")

with my_db:
    table_name = "financials"
    query = "SELECT * FROM {0} WHERE date BETWEEN '{1}' AND '{2}'".format(table_name, '2003-01-01', '2023-08-10')
    df_raw = my_db.read_by_command(query)

connecting in: FINANCE_DB


  return pd.read_sql_query(command, self.db)


DB Closed


## 3. Pre Processing

In [4]:
# Describe raw data
df_raw.describe()

Unnamed: 0,PX_LAST,EQY_SH_OUT,CURR_ENTP_VAL,market_cap,net_income,EBITDA,EBIT,BS_TOT_ASSET,BS_TOT_LIAB2,equity,...,EV/EBITDA_std_1y,EV/EBITDA_std_3y,EV/EBITDA_std_5y,EV/EBIT_std_6m,EV/EBIT_std_1y,EV/EBIT_std_3y,EV/EBIT_std_5y,return_3m,return_6m,return_1y
count,308290.0,308290.0,308290.0,308290.0,308290.0,308290.0,308290.0,308290.0,308290.0,308290.0,...,308290.0,308290.0,308290.0,308290.0,308290.0,308290.0,308290.0,308290.0,308290.0,308290.0
mean,36.287864,1146.681379,29080.755882,18039.16575,338.681589,914.024794,677.79213,27260.7,16293.794918,10966.908232,...,2.869103,4.124008,4.135563,10.686458,15.579769,22.024584,21.527782,0.023009,0.052037,0.126177
std,214.761366,2005.126975,72559.526422,42963.301242,2558.577792,4575.272361,3692.827056,92509.51,57263.195415,37516.705739,...,10.840442,11.916046,11.810097,151.157049,196.658228,206.126792,169.693442,0.263221,0.360445,0.586478
min,0.0,0.0,-1007.31,0.0,-48523.0,-28243.0,-44001.0,0.0,0.0,-23960.05,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0
25%,5.512,161.228,2601.264525,1609.44,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.0921,-0.1212,-0.1561
50%,11.68,531.62,8483.7862,4959.73,25.376,93.251,71.789,4263.171,2312.635,1458.29,...,0.72,0.87,0.0,0.81,1.2,1.85,1.71,0.0007,0.0,0.0
75%,21.325,1017.915,24777.0216,16119.365,171.956,519.0,324.142,18326.51,11122.379,7225.34,...,2.01,3.14,3.25,2.19,3.29,5.84,6.41,0.1374,0.2022,0.3169
max,5100.124,15753.833,785926.7002,627372.87,58644.36,113484.0,96488.0,1015142.0,769421.0,437061.0,...,279.22,391.57,491.45,10514.27,12481.88,9250.57,7401.52,4.0999,5.6091,12.4839


In [24]:
#Get the data we can work with
df_pre_train['date'] = pd.to_datetime(df_pre_train['date'])
df_pre_train = df_raw.loc[df_raw['P/E_ma_5y'] != 0]
df_pre_train = df_pre_train.loc[df_raw['return_3m'] != 0]

In [25]:
#Split the data between prediction y and data X

X = df_pre_train.iloc[:,2:-3]
y_3m = df_pre_train.iloc[:,-3]
y_6m = df_pre_train.iloc[:,-2]
y_1y = df_pre_train.iloc[:,-1]

In [26]:
# Describe the prediction y

y_3m.describe()

count    158987.000000
mean          0.023367
std           0.266076
min          -1.000000
25%          -0.103100
50%           0.016600
75%           0.141500
max           3.670100
Name: return_3m, dtype: float64

## 4. Parameters

In [27]:
# Date intervals

lista_date_start = ['2003-01-01','2004-01-01','2005-01-01','2006-01-01','2007-01-01','2008-01-01','2009-01-01','2010-01-01','2011-01-01','2012-01-01','2013-01-01']
lista_date_end = ['2013-01-01','2014-01-01','2015-01-01','2016-01-01','2017-01-01','2018-01-01','2019-01-01','2020-01-01','2021-01-01','2022-01-01','2023-01-01']

# file names
nomes_scalers = ['sc2013.pkl','sc2014.pkl','sc2015.pkl','sc2016.pkl','sc2017.pkl','sc2018.pkl','sc2019.pkl','sc2020.pkl','sc2021.pkl','sc2022.pkl','sc2023.pkl']
nomes_modelos = ['ANN2013','ANN2014','ANN2015','ANN2016','ANN2017','ANN2018','ANN2019','ANN2020','ANN2021','ANN2022','ANN2023']

In [28]:
inferior_short_limit = -0.15

In [40]:
path_to_save_model = os.getcwd()+'\\..\\models\\'
path_to_save_scalers = os.getcwd()+'\\..\\scalers\\'

## 5. Training

In [41]:
for i in range(len(lista_date_start)):
    X_selected = df_pre_train.loc[(df_pre_train['date'] < datetime.strptime(lista_date_end[i], "%Y-%d-%m").date()) & (df_pre_train['date'] > datetime.strptime(lista_date_start[i], "%Y-%d-%m").date())]
    print(lista_date_start[i])
    print(lista_date_end[i])
    X = X_selected.iloc[:,2:-3]
    y = X_selected.iloc[:,-3]
    y = y.apply(lambda x: 1 if x <= inferior_short_limit else 0)
    sc = StandardScaler()
    X_train = sc.fit_transform(X)
    joblib.dump(sc, path_to_save_scalers + nomes_scalers[i])
    
    ANN = tf.keras.models.Sequential()
    ANN.add(tf.keras.layers.Dense(units = 63, activation = 'relu'))
    ANN.add(tf.keras.layers.Dense(units = 63, activation = 'relu'))
    ANN.add(tf.keras.layers.Dense(units = 1, activation = 'sigmoid'))
    ANN.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['binary_crossentropy','accuracy'])
    ANN.fit(X_train, y, batch_size = 32, epochs = 30)
    ANN.save(path_to_save_model+nomes_modelos[i])

2003-01-01
2013-01-01
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
INFO:tensorflow:Assets written to: C:\Users\mathe\Coding Projects\quant-finance-strategy\notebooks\..\models\ANN2013\assets


INFO:tensorflow:Assets written to: C:\Users\mathe\Coding Projects\quant-finance-strategy\notebooks\..\models\ANN2013\assets


2004-01-01
2014-01-01
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
INFO:tensorflow:Assets written to: C:\Users\mathe\Coding Projects\quant-finance-strategy\notebooks\..\models\ANN2014\assets


INFO:tensorflow:Assets written to: C:\Users\mathe\Coding Projects\quant-finance-strategy\notebooks\..\models\ANN2014\assets


2005-01-01
2015-01-01
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
INFO:tensorflow:Assets written to: C:\Users\mathe\Coding Projects\quant-finance-strategy\notebooks\..\models\ANN2015\assets


INFO:tensorflow:Assets written to: C:\Users\mathe\Coding Projects\quant-finance-strategy\notebooks\..\models\ANN2015\assets


2006-01-01
2016-01-01
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
INFO:tensorflow:Assets written to: C:\Users\mathe\Coding Projects\quant-finance-strategy\notebooks\..\models\ANN2016\assets


INFO:tensorflow:Assets written to: C:\Users\mathe\Coding Projects\quant-finance-strategy\notebooks\..\models\ANN2016\assets


2007-01-01
2017-01-01
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
INFO:tensorflow:Assets written to: C:\Users\mathe\Coding Projects\quant-finance-strategy\notebooks\..\models\ANN2017\assets


INFO:tensorflow:Assets written to: C:\Users\mathe\Coding Projects\quant-finance-strategy\notebooks\..\models\ANN2017\assets


2008-01-01
2018-01-01
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
INFO:tensorflow:Assets written to: C:\Users\mathe\Coding Projects\quant-finance-strategy\notebooks\..\models\ANN2018\assets


INFO:tensorflow:Assets written to: C:\Users\mathe\Coding Projects\quant-finance-strategy\notebooks\..\models\ANN2018\assets


2009-01-01
2019-01-01
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
INFO:tensorflow:Assets written to: C:\Users\mathe\Coding Projects\quant-finance-strategy\notebooks\..\models\ANN2019\assets


INFO:tensorflow:Assets written to: C:\Users\mathe\Coding Projects\quant-finance-strategy\notebooks\..\models\ANN2019\assets


2010-01-01
2020-01-01
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
INFO:tensorflow:Assets written to: C:\Users\mathe\Coding Projects\quant-finance-strategy\notebooks\..\models\ANN2020\assets


INFO:tensorflow:Assets written to: C:\Users\mathe\Coding Projects\quant-finance-strategy\notebooks\..\models\ANN2020\assets


2011-01-01
2021-01-01
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
INFO:tensorflow:Assets written to: C:\Users\mathe\Coding Projects\quant-finance-strategy\notebooks\..\models\ANN2021\assets


INFO:tensorflow:Assets written to: C:\Users\mathe\Coding Projects\quant-finance-strategy\notebooks\..\models\ANN2021\assets


2012-01-01
2022-01-01
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
INFO:tensorflow:Assets written to: C:\Users\mathe\Coding Projects\quant-finance-strategy\notebooks\..\models\ANN2022\assets


INFO:tensorflow:Assets written to: C:\Users\mathe\Coding Projects\quant-finance-strategy\notebooks\..\models\ANN2022\assets


2013-01-01
2023-01-01
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
INFO:tensorflow:Assets written to: C:\Users\mathe\Coding Projects\quant-finance-strategy\notebooks\..\models\ANN2023\assets


INFO:tensorflow:Assets written to: C:\Users\mathe\Coding Projects\quant-finance-strategy\notebooks\..\models\ANN2023\assets
