In [1]:

import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.callbacks import EarlyStopping
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import joblib
import sqlite3
from sklearn.metrics import mean_squared_error, r2_score


In [2]:

# Path to SQLite database
db_path = 'database/stocks_data.db'

# Load data from SQLite
with sqlite3.connect(db_path) as conn:
    query = "SELECT * FROM processed_stocks"
    data = pd.read_sql(query, conn)
print(f"Loaded processed data: {data.shape[0]} rows")


Loaded processed data: 178922 rows


In [3]:
# Step 1: Set default ticker
default_ticker = 'XOM'

# Step 2: Filter data for the default ticker
ticker_data = data[data['Ticker'] == default_ticker]
print(f"Loaded data for {default_ticker}: {ticker_data.shape[0]} rows")

Loaded data for XOM: 31347 rows


In [4]:

# Define features and target
features = ['7-day MA', '14-day MA', 'Volatility', 'Lag_1', 'Lag_2']
target = 'Adj Close'

X = ticker_data[features]
y = ticker_data[target]


In [5]:

# # Drop missing values
# data = data.dropna(subset=features + [target])
# X = data[features].values
# y = data[target].values

# # Normalize the features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# # Reshape target to 2D for compatibility
# y = y.reshape(-1, 1)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [6]:

# Reshape input for LSTM (samples, timesteps, features)
# scaler.MinMaxScaler()
X_train_scaled = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test_scaled = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])


In [7]:

# Build and train the LSTM model
model = Sequential()
model.add(LSTM(32, input_shape=(1, X_train_scaled.shape[2]), activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')

# Early stopping for training
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train the model
model.fit(X_train_scaled, y_train, epochs=100, batch_size=42, validation_data=(X_test_scaled, y_test), callbacks=[early_stopping])

Epoch 1/100


  super().__init__(**kwargs)


[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 665us/step - loss: 2633.4128 - val_loss: 487.9894
Epoch 2/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 477us/step - loss: 206.8030 - val_loss: 4.1730
Epoch 3/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 481us/step - loss: 8.4412 - val_loss: 3.6590
Epoch 4/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 477us/step - loss: 5.9685 - val_loss: 3.3042
Epoch 5/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 480us/step - loss: 4.4353 - val_loss: 3.0103
Epoch 6/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 481us/step - loss: 4.4413 - val_loss: 2.7252
Epoch 7/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 491us/step - loss: 3.9131 - val_loss: 2.4724
Epoch 8/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 483us/step - loss: 5.8728 - val_loss: 2.2529
Epoch 9/100
[1m598/5

<keras.src.callbacks.history.History at 0x333327d40>

In [8]:

# Evaluate the model
y_pred = model.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("Model Evaluation:")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"R-squared (R²): {r2:.2f}")

[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 468us/step
Model Evaluation:
Mean Squared Error (MSE): 0.73
R-squared (R²): 1.00


In [9]:
# Step 8: Save the trained model and scaler
model_filename = f'models/model_{default_ticker}_lstm.pkl'
scaler_filename = f'models/scaler_{default_ticker}_lstm.pkl'

joblib.dump(model, model_filename)
joblib.dump(scaler, scaler_filename)

print(f"{default_ticker} model saved as '{model_filename}'")
print(f"{default_ticker} scaler saved as '{scaler_filename}'")

XOM model saved as 'models/model_XOM_lstm.pkl'
XOM scaler saved as 'models/scaler_XOM_lstm.pkl'


In [10]:
# Load the model and scaler
try:
    lstm_model = joblib.load(model_file_path)
    scaler = joblib.load(scaler_file_path)
    model_and_scaler_status = "LSTM model and scaler loaded successfully."
except Exception as e:
    model_and_scaler_status = f"Error loading LSTM model or scaler: {e}"

model_and_scaler_status

"Error loading LSTM model or scaler: name 'model_file_path' is not defined"