In [None]:
import sqlite3
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from bokeh.plotting import figure, show, output_notebook
from bokeh.palettes import Category10
from bokeh.models import HoverTool
from ta.trend import SMAIndicator, EMAIndicator, MACD
from ta.momentum import RSIIndicator
from ta.volatility import BollingerBands, AverageTrueRange
from ta.volume import OnBalanceVolumeIndicator
import time
import joblib
import tensorflow as tf
import warnings
import os
warnings.filterwarnings('ignore')

In [2]:
from tensorflow.keras import backend as K
K.clear_session()

# Optional: force garbage collection
import gc
gc.collect()

0

In [3]:
db_path = os.path.join("data", "stock_data.db")
os.makedirs(os.path.dirname(db_path), exist_ok=True)

with sqlite3.connect(db_path) as conn:
    pass

gpus = tf.config.list_physical_devices('GPU')
print(f"Using GPU: {gpus[0].name}" if gpus else "GPU not found, using CPU")

Using GPU: /physical_device:GPU:0


In [4]:
tickers = {
    'CIMB': '1023.KL',
    'MAYBANK': '1155.KL',
    'HLB': '5819.KL',
    'AMMB': '1015.KL',
    'BANKISLAM': '5258.KL',
    'AFFIN': '5185.KL',
    'PBBank': '1295.KL',
    'RHB': '1066.KL',
    'ALLIANCE': '2488.KL'
}

sequence_length = 90
output_length = 1

In [5]:
all_stock_data = {}
for name, ticker in tickers.items():
    data = yf.download(ticker, period="5y", interval="1d")
    if not data.empty:
        data.to_sql(name, conn, if_exists='replace', index=True)
        all_stock_data[name] = data
        print(f"{name}: {len(data)} days - saved to database")
    else:
        try:
            data = pd.read_sql(f"SELECT * FROM {name}", conn, index_col='Date', parse_dates=['Date'])
            all_stock_data[name] = data
            print(f"{name}: No new data, loaded from database ({len(data)} days)")
        except:
            print(f"{name}: No data found and no database backup available")

print("DONE")

  data = yf.download(ticker, period="5y", interval="1d")
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, period="5y", interval="1d")


CIMB: 1227 days - saved to database


[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, period="5y", interval="1d")


MAYBANK: 1227 days - saved to database


[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, period="5y", interval="1d")


HLB: 1227 days - saved to database


[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, period="5y", interval="1d")


AMMB: 1227 days - saved to database


[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, period="5y", interval="1d")


BANKISLAM: 1227 days - saved to database


[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, period="5y", interval="1d")


AFFIN: 1227 days - saved to database


[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, period="5y", interval="1d")


PBBank: 1227 days - saved to database


[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, period="5y", interval="1d")


RHB: 1227 days - saved to database


[*********************100%***********************]  1 of 1 completed

ALLIANCE: 1227 days - saved to database
DONE





In [None]:
output_notebook()

p = figure(title="Malaysian Bank Stocks - Historical Closing Prices",
           x_axis_type='datetime',
           width=1300,
           height=600,
           tools="pan,wheel_zoom,box_zoom,reset,save")

colors = Category10[10]

for i, (name, data) in enumerate(all_stock_data.items()):
    color = colors[i % len(colors)]
    p.line(data.index, data['Close'], legend_label=name, line_width=2, color=color)

hover = HoverTool(tooltips=[("Date", "@x{%F}"), ("Price", "@y")], formatters={"@x": "datetime"})
p.add_tools(hover)

p.legend.click_policy="hide"
p.legend.location="top_left"
show(p)

In [None]:
def calculate_features(data):
    df = data.copy()
    df['SMA_20'] = SMAIndicator(df['Close'], window=20).sma_indicator()
    df['EMA_50'] = EMAIndicator(df['Close'], window=50).ema_indicator()
    df['MACD'] = MACD(df['Close']).macd()
    df['RSI_14'] = RSIIndicator(df['Close'], window=14).rsi()
    df['Momentum_10'] = df['Close'] / df['Close'].shift(10) - 1
    bb = BollingerBands(df['Close'], window=20, window_dev=2)
    df['BB_upper'] = bb.bollinger_hband()
    df['BB_lower'] = bb.bollinger_lband()
    df['ATR_14'] = (df['High'] - df['Low']).rolling(14).mean()
    obv = OnBalanceVolumeIndicator(df['Close'], df['Volume'])
    df['OBV'] = obv.on_balance_volume()
    return df

combined_data = []
for name, data in all_stock_data.items():
    df = pd.DataFrame({
        'Date': data.index,
        'Open': data['Open'].values.flatten(),
        'High': data['High'].values.flatten(),
        'Low': data['Low'].values.flatten(),
        'Close': data['Close'].values.flatten(),
        'Volume': data['Volume'].values.flatten(),
        'Stock': name
    })
    df = calculate_features(df)
    combined_data.append(df)
    
feature_columns = ['Open', 'High', 'Low', 'Close', 'Volume', 'SMA_20', 'EMA_50', 
                   'MACD', 'RSI_14', 'Momentum_10', 'BB_upper', 'BB_lower', 'ATR_14', 'OBV']
n_features = len(feature_columns)

stock_order = list(all_stock_data.keys())
combined_df = pd.concat(combined_data, ignore_index=True)
combined_df['Stock'] = pd.Categorical(combined_df['Stock'], categories=stock_order, ordered=True)
combined_df = combined_df.sort_values(['Date', 'Stock']).reset_index(drop=True)
combined_df_clean = combined_df[feature_columns].dropna()

In [None]:
correlation_matrix = combined_df[feature_columns].corr()
print("Feature Correlation Matrix:\n", correlation_matrix)

In [None]:
split_ratio = 0.80
split_index = int(len(combined_df) * split_ratio)
train_df = combined_df.iloc[:split_index]
test_df = combined_df.iloc[split_index:]

# Stock-specific scaling
scalers_features = {}
scalers_target = {}
scaled_features_train = []
scaled_targets_train = []
scaled_features_test = []
scaled_targets_test = []

for stock in stock_order:
    train_stock = train_df[train_df['Stock'] == stock][feature_columns]
    test_stock = test_df[test_df['Stock'] == stock][feature_columns]
    if train_stock.empty or test_stock.empty:
        print(f"Warning: Empty data for {stock}")
        continue
    scaler_features = StandardScaler()
    scaler_target = StandardScaler()
    scalers_features[stock] = scaler_features
    scalers_target[stock] = scaler_target
    scaled_features_train.append(scaler_features.fit_transform(train_stock))
    scaled_targets_train.append(scaler_target.fit_transform(train_df[train_df['Stock'] == stock][['Close']]))
    scaled_features_test.append(scaler_features.transform(test_stock))
    scaled_targets_test.append(scaler_target.transform(test_df[test_df['Stock'] == stock][['Close']]))

scaled_features_train = np.concatenate(scaled_features_train)
scaled_targets_train = np.concatenate(scaled_targets_train)
scaled_features_test = np.concatenate(scaled_features_test)
scaled_targets_test = np.concatenate(scaled_targets_test)

In [None]:
def create_sequences_per_stock(df, seq_length, feature_columns, target_column):
    X, y = [], []
    for stock in df['Stock'].unique():
        stock_data = df[df['Stock'] == stock][feature_columns + [target_column]]
        if stock_data.empty:
            continue
        features = stock_data[feature_columns].values
        targets = stock_data[target_column].values
        for i in range(seq_length, len(features)):
            X.append(features[i-seq_length:i])
            y.append(targets[i])
    return np.array(X), np.array(y)

# Create sequences
X_train, y_train = create_sequences_per_stock(train_df, sequence_length, feature_columns, 'Close')
X_test, y_test = create_sequences_per_stock(test_df, sequence_length, feature_columns, 'Close')
print(f"X_train shape: {X_train.shape} (samples, time_steps, features)")
print(f"y_train shape: {y_train.shape} (samples, 1)")
print(f"X_test shape: {X_test.shape} (samples, time_steps, features)")
print(f"y_test shape: {y_test.shape} (samples, 1)")

In [None]:
models = tf.keras.models
layers = tf.keras.layers
callbacks = tf.keras.callbacks

model = models.Sequential([
    layers.LSTM(units=100, return_sequences=True, input_shape=(sequence_length, n_features)),
    layers.Dropout(0.3),
    
    layers.LSTM(units=100, return_sequences=True),
    layers.Dropout(0.3),
    
    layers.LSTM(units=80, return_sequences=True),
    layers.Dropout(0.2),
    
    layers.LSTM(units=80, return_sequences=False),
    layers.Dropout(0.2),
    
    layers.Dense(units=50, activation='relu'),
    layers.Dropout(0.2),
    
    layers.Dense(units=1)  # CHANGED: Predict only 1 value
])

model.compile(optimizer='adam', loss='mean_squared_error')
print("Model built successfully!")
model.summary()

In [None]:
print("Phase1: Training model without early stopping")
start_time = time.time()

history = model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=32,
    validation_data=(X_test, y_test),
    verbose=1
)

end_time = time.time()
total_time = end_time - start_time

print("Training complete")
print(f"Total training time: {total_time:.2f} seconds")

In [None]:
plt.figure(figsize=(14, 5))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss During Training')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
print("Phase2: Training model with early stopping")
start_time = time.time()

early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)

history2 = model.fit(
    X_train, y_train,
    epochs=200,
    batch_size=32,
    validation_data=(X_test, y_test),
    callbacks=[early_stop],
    verbose=1
)

end_time = time.time()
total_time = end_time - start_time

print("Training complete!")
print(f"Total training time: {total_time:.2f} seconds")

In [None]:
plt.figure(figsize=(14, 5))
plt.plot(history2.history['loss'], label='Training Loss')
plt.plot(history2.history['val_loss'], label='Validation Loss')
plt.title('Model Loss During Training')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
model.save('klse_bank_model.h5')
print("Model saved")

joblib.dump(scaler_features, 'klse_bank_scaler_features.pkl')
joblib.dump(scaler_target, 'klse_bank_scaler_target.pkl')
print("Scaler saved")