In [16]:
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import joblib
import sqlite3
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

# Path to SQLite database
db_path = 'database/stocks_data.db'

# Load data from SQLite
with sqlite3.connect(db_path) as conn:
    query = "SELECT * FROM processed_stocks"
    data = pd.read_sql(query, conn)
print(f"Loaded processed data: {data.shape[0]} rows")

# Step 1: Set default ticker
default_ticker = 'XOM'

# Step 2: Filter data for the default ticker
ticker_data = data[data['Ticker'] == default_ticker]
print(f"Loaded data for {default_ticker}: {ticker_data.shape[0]} rows")

# Define features and target
features = ['7-day MA', '14-day MA', 'Volatility', 'Lag_1', 'Lag_2']
target = 'Adj Close'

X_raw = ticker_data[features]
y_raw = ticker_data[target]

# Apply the sliding window approach
window_size = 10
X, y = [], []
for i in range(len(X_raw) - window_size):
    X.append(X_raw.iloc[i:i+window_size, :].values)  # Take `window_size` rows of features
    y.append(y_raw.iloc[i+window_size])  # Target is the next value after the window

# Convert to NumPy arrays
X, y = np.array(X), np.array(y)

print(f"Shape of X: {X.shape}, Shape of y: {y.shape}")

# Normalize features
scaler = MinMaxScaler()
X_scaled = []
for i in range(X.shape[0]):
    X_scaled.append(scaler.fit_transform(X[i]))  # Normalize each sliding window independently
X_scaled = np.array(X_scaled)

# Define features and target
features = ['7-day MA', '14-day MA', 'Volatility', 'Lag_1', 'Lag_2']
target = 'Adj Close'

X = ticker_data[features]
y = ticker_data[target]

# Feature Engineering: Adding lagged variables, volatility, and rolling statistics
ticker_data['Lag_1'] = ticker_data['Adj Close'].shift(1)
ticker_data['Lag_2'] = ticker_data['Adj Close'].shift(2)
ticker_data['Lag_3'] = ticker_data['Adj Close'].shift(3)
ticker_data['Volatility'] = ticker_data['Adj Close'].rolling(window=7).std()
ticker_data['Momentum'] = ticker_data['Adj Close'].pct_change(periods=3)

# Drop NaN values introduced by feature engineering
ticker_data = ticker_data.dropna()

# Update features and target with engineered features
engineered_features = features + ['Lag_1', 'Lag_2', 'Lag_3', 'Volatility', 'Momentum']
X = ticker_data[engineered_features]
y = ticker_data[target]
print(f"Updated feature set: {engineered_features}")

# Normalize the features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Split data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# No further reshaping needed as `X_scaled` is already in (samples, timesteps, features) format
print(f"Training data shape: {X_train.shape}, Testing data shape: {X_test.shape}")

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Reshape input for LSTM (samples, timesteps, features)
X_train_scaled = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test_scaled = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])

# Build the optimized LSTM model
model = Sequential()
model.add(LSTM(128, return_sequences=True, activation='relu', input_shape=(1, X_train_scaled.shape[2])))
model.add(Dropout(0.3))  # Add dropout for regularization
model.add(LSTM(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(1))  # Fully connected output layer
model.compile(optimizer='adam', loss='mean_squared_error')

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Callbacks for better training
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)

# Train the model
history = model.fit(
    X_train_scaled, y_train,
    validation_data=(X_test_scaled, y_test),
    epochs=100,  # Start with 100 epochs
    batch_size=32,  # Reduced batch size
    callbacks=[early_stopping, reduce_lr]
)

# Evaluate the model
y_pred = model.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("Model Evaluation:")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"R-squared (R²): {r2:.2f}")

# Save the trained model and scaler
model_filename = f'models/model_{default_ticker}_lstm.h5'
scaler_filename = f'models/scaler_{default_ticker}_lstm.pkl'

model.save(model_filename)
joblib.dump(scaler, scaler_filename)

print(f"{default_ticker} model saved as '{model_filename}'")
print(f"{default_ticker} scaler saved as '{scaler_filename}'")

Loaded processed data: 178922 rows
Loaded data for XOM: 31347 rows
Shape of X: (31337, 10, 5), Shape of y: (31337,)
Updated feature set: ['7-day MA', '14-day MA', 'Volatility', 'Lag_1', 'Lag_2', 'Lag_1', 'Lag_2', 'Lag_3', 'Volatility', 'Momentum']
Training data shape: (25072, 10), Testing data shape: (6269, 10)
Epoch 1/100


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_data['Lag_1'] = ticker_data['Adj Close'].shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_data['Lag_2'] = ticker_data['Adj Close'].shift(2)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_data['Lag_3'] = ticker_data['Adj Close'].shift(3)
A value is trying to be set on a c

[1m784/784[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 1112.5896 - val_loss: 7.0589 - learning_rate: 0.0010
Epoch 2/100
[1m784/784[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 62.7711 - val_loss: 3.4052 - learning_rate: 0.0010
Epoch 3/100
[1m784/784[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 59.8471 - val_loss: 2.4516 - learning_rate: 0.0010
Epoch 4/100
[1m784/784[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 58.5273 - val_loss: 2.5769 - learning_rate: 0.0010
Epoch 5/100
[1m784/784[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 57.6122 - val_loss: 2.2210 - learning_rate: 0.0010
Epoch 6/100
[1m784/784[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 57.3054 - val_loss: 2.6393 - learning_rate: 0.0010
Epoch 7/100
[1m784/784[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 57.9860 - val_loss: 1.2134 - learning_r



Model Evaluation:
Mean Squared Error (MSE): 0.95
Mean Absolute Error (MAE): 0.63
R-squared (R²): 1.00
XOM model saved as 'models/model_XOM_lstm.h5'
XOM scaler saved as 'models/scaler_XOM_lstm.pkl'


In [2]:

# import pandas as pd
# import numpy as np
# from keras.models import Sequential
# from keras.layers import LSTM, Dense
# from keras.callbacks import EarlyStopping
# from sklearn.preprocessing import MinMaxScaler
# from sklearn.model_selection import train_test_split
# import joblib
# import sqlite3
# from sklearn.metrics import mean_squared_error, r2_score


In [3]:

# # Path to SQLite database
# db_path = 'database/stocks_data.db'

# # Load data from SQLite
# with sqlite3.connect(db_path) as conn:
#     query = "SELECT * FROM processed_stocks"
#     data = pd.read_sql(query, conn)
# print(f"Loaded processed data: {data.shape[0]} rows")


In [4]:
# # Step 1: Set default ticker
# default_ticker = 'XOM'

# # Step 2: Filter data for the default ticker
# ticker_data = data[data['Ticker'] == default_ticker]
# print(f"Loaded data for {default_ticker}: {ticker_data.shape[0]} rows")

In [5]:

# # Define features and target
# features = ['7-day MA', '14-day MA', 'Volatility', 'Lag_1', 'Lag_2']
# target = 'Adj Close'

# X = ticker_data[features]
# y = ticker_data[target]


In [6]:

# # # Drop missing values
# # data = data.dropna(subset=features + [target])
# # X = data[features].values
# # y = data[target].values

# # # Normalize the features
# scaler = MinMaxScaler()
# X_scaled = scaler.fit_transform(X)

# # # Reshape target to 2D for compatibility
# # y = y.reshape(-1, 1)

# # Split data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [7]:

# # Reshape input for LSTM (samples, timesteps, features)
# # scaler.MinMaxScaler()
# X_train_scaled = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
# X_test_scaled = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])


In [8]:

# # Build and train the LSTM model
# model = Sequential()
# model.add(LSTM(32, input_shape=(1, X_train_scaled.shape[2]), activation='relu'))
# model.add(Dense(1))
# model.compile(optimizer='adam', loss='mean_squared_error')

# # Early stopping for training
# early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# # Train the model
# model.fit(X_train_scaled, y_train, epochs=100, batch_size=42, validation_data=(X_test_scaled, y_test), callbacks=[early_stopping])

In [9]:

# # Evaluate the model
# y_pred = model.predict(X_test_scaled)
# mse = mean_squared_error(y_test, y_pred)
# r2 = r2_score(y_test, y_pred)
# print("Model Evaluation:")
# print(f"Mean Squared Error (MSE): {mse:.2f}")
# print(f"R-squared (R²): {r2:.2f}")

In [10]:
# # Step 8: Save the trained model and scaler
# model_filename = f'models/model_{default_ticker}_lstm.pkl'
# scaler_filename = f'models/scaler_{default_ticker}_lstm.pkl'

# joblib.dump(model, model_filename)
# joblib.dump(scaler, scaler_filename)

# print(f"{default_ticker} model saved as '{model_filename}'")
# print(f"{default_ticker} scaler saved as '{scaler_filename}'")

In [11]:
# # Load the model and scaler
# try:
#     lstm_model = joblib.load(model_file_path)
#     scaler = joblib.load(scaler_file_path)
#     model_and_scaler_status = "LSTM model and scaler loaded successfully."
# except Exception as e:
#     model_and_scaler_status = f"Error loading LSTM model or scaler: {e}"

# model_and_scaler_status