In [1]:
import pandas as pd
import numpy as np

In [2]:
from google.colab import drive

In [3]:
drive.mount('/content/drive')

Mounted at /content/drive


#### Data Preprocessing

In [4]:
df = pd.read_csv("/content/drive/MyDrive/AI Final Project/CFC_traded_sahres_2019_to_date.csv")

In [5]:
df.drop(columns=['Share Code'], axis=1,inplace=True) #dropping repititve share code

In [6]:
l = []
l_less = []

# threshold to drop rows with 40%+ missing values
threshold = 0.4 * df.shape[0]

for column in df.columns:
    # checking using the threshold
    if df[column].isnull().sum() < threshold:
        l.append(column)
    else:
        l_less.append(column)

# filtering the data set to have columns in the list 'l'
df = df[l]
df

Unnamed: 0,Daily Date,Year High (GH¢),Year Low (GH¢),Previous Closing Price - VWAP (GH¢),Opening Price (GH¢),Last Transaction Price (GH¢),Closing Price - VWAP (GH¢),Price Change (GH¢),Total Shares Traded,Total Value Traded (GH¢)
0,31/07/2024,0.02,0.02,0.02,0.02,0.02,0.02,0.0,0.00,0.00
1,30/07/2024,0.02,0.02,0.02,0.02,0.02,0.02,0.0,0.00,0.00
2,29/07/2024,0.02,0.02,0.02,0.02,0.02,0.02,0.0,0.00,0.00
3,26/07/2024,0.02,0.02,0.02,0.02,0.02,0.02,0.0,70.00,1.40
4,25/07/2024,0.02,0.02,0.02,0.02,0.02,0.02,0.0,0.00,0.00
...,...,...,...,...,...,...,...,...,...,...
1366,09/01/2019,0.02,0.02,0.02,0.02,0.02,0.02,0.0,0.00,0.00
1367,08/01/2019,0.02,0.02,0.02,0.02,0.02,0.02,0.0,0.00,0.00
1368,04/01/2019,0.02,0.02,0.02,0.02,0.02,0.02,0.0,0.00,0.00
1369,03/01/2019,0.02,0.02,0.02,0.02,0.02,0.02,0.0,0.00,0.00


In [7]:
# setting the dates to datetype
df['Daily Date'] = pd.to_datetime(df['Daily Date'], format='%d/%m/%Y')

In [8]:
df['Daily Date'] #checking the new date format

Unnamed: 0,Daily Date
0,2024-07-31
1,2024-07-30
2,2024-07-29
3,2024-07-26
4,2024-07-25
5,2024-07-24
6,2024-07-23
7,2024-07-22
8,2024-07-19
9,2024-07-18


In [9]:
# convering shares traded from string to float formats
df['Total Shares Traded'] = df['Total Shares Traded'].str.replace(',', '').astype(float)
df['Total Value Traded (GH¢)'] = df['Total Value Traded (GH¢)'].str.replace(',', '').astype(float)

In [10]:
df.columns

Index(['Daily Date', 'Year High (GH¢)', 'Year Low (GH¢)',
       'Previous Closing Price - VWAP (GH¢)', 'Opening Price (GH¢)',
       'Last Transaction Price (GH¢)', 'Closing Price - VWAP (GH¢)',
       'Price Change (GH¢)', 'Total Shares Traded',
       'Total Value Traded (GH¢)'],
      dtype='object')

#### Feature Engineering

In [11]:
# scaling
from sklearn.preprocessing import MinMaxScaler

df = df.sort_values('Daily Date')

df_with_date= df[['Daily Date', 'Year High (GH¢)', 'Year Low (GH¢)',
       'Previous Closing Price - VWAP (GH¢)', 'Opening Price (GH¢)',
       'Last Transaction Price (GH¢)', 'Closing Price - VWAP (GH¢)',
       'Price Change (GH¢)', 'Total Shares Traded',
       'Total Value Traded (GH¢)']]

df_without_date = df.drop(columns=['Daily Date'])

scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df_without_date) # scaling the dataframe

# sequences
def create_sequences(df, seq_length):
    xs = []
    ys = []
    for i in range(len(df) - seq_length):
        x = df[i:i+seq_length]
        y = df[i+seq_length, 0]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

seq_length = 30  # example length
X, y = create_sequences(scaled_data, seq_length)

In [12]:
# splitting to train and test sets
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

#### Training of models

In [13]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, Dense

# LSTM
def create_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(50, return_sequences=True, input_shape=input_shape))
    model.add(LSTM(50, return_sequences=False))
    model.add(Dense(25))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# GRU
def create_gru_model(input_shape):
    model = Sequential()
    model.add(GRU(50, return_sequences=True, input_shape=input_shape))
    model.add(GRU(50, return_sequences=False))
    model.add(Dense(25))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# creating models
input_shape = (X_train.shape[1], X_train.shape[2])
lstm_model = create_lstm_model(input_shape)
gru_model = create_gru_model(input_shape)

  super().__init__(**kwargs)


In [14]:
# training the LSTM model
lstm_history = lstm_model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# training the GRU model
gru_history = gru_model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 55ms/step - loss: 0.0730 - val_loss: 6.2463e-04
Epoch 2/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 52ms/step - loss: 0.0080 - val_loss: 3.7063e-04
Epoch 3/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 50ms/step - loss: 0.0079 - val_loss: 2.0571e-04
Epoch 4/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 45ms/step - loss: 0.0049 - val_loss: 1.3958e-04
Epoch 5/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - loss: 0.0087 - val_loss: 3.4829e-05
Epoch 6/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - loss: 0.0059 - val_loss: 1.1049e-04
Epoch 7/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step - loss: 0.0058 - val_loss: 1.7024e-06
Epoch 8/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step - loss: 0.0046 - val_loss: 4.8394e-05
Epoch 9/10
[1m3

#### Evaluation and optimisation

In [15]:
# evaluating LSTM model
lstm_loss = lstm_model.evaluate(X_test, y_test)
print(f'LSTM Model Loss: {lstm_loss}')

# evaluating GRU model
gru_loss = gru_model.evaluate(X_test, y_test)
print(f'GRU Model Loss: {gru_loss}')

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 1.2136e-04
LSTM Model Loss: 0.00012197971227578819
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 1.4783e-05
GRU Model Loss: 1.47600085256272e-05


In [16]:
# predictions
lstm_predictions = lstm_model.predict(X_test)
gru_predictions = gru_model.predict(X_test)

# inverse transform predictions
lstm_predictions = scaler.inverse_transform(np.concatenate((lstm_predictions, np.zeros((lstm_predictions.shape[0], scaled_data.shape[1] - 1))), axis=1))[:, 0]
gru_predictions = scaler.inverse_transform(np.concatenate((gru_predictions, np.zeros((gru_predictions.shape[0], scaled_data.shape[1] - 1))), axis=1))[:, 0]

print("LSTM Preds:", lstm_predictions[:5])
print("GRU Preds:", gru_predictions[:5])

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 57ms/step
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 71ms/step
LSTM Preds: [0.019891   0.01989093 0.01989082 0.0198914  0.01989149]
GRU Preds: [0.01996136 0.01996142 0.01996152 0.01996109 0.01996091]


#### Saving the scaler and  models

In [17]:
import joblib

# models
lstm_model.save('lstm_model.keras')
gru_model.save('gru_model.keras')

# scaler
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']