In [1]:
# Import necessary libraries
import pandas as pd
import sqlite3
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import joblib

# Path to SQLite database
db_path = 'stocks_data.db'

In [2]:
# Step 1: Load processed data from SQLite
with sqlite3.connect(db_path) as conn:
    query = "SELECT * FROM processed_stocks"
    data = pd.read_sql(query, conn)

print(f"Loaded processed data: {data.shape[0]} rows")

# Step 2: Define features (X) and target (y)
features = ['7-day MA', '14-day MA', 'Volatility', 'Lag_1', 'Lag_2']
target = 'Adj Close'

X = data[features]
y = data[target]

# Step 3: Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Normalize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)  # Fit and transform on training data
X_test_scaled = scaler.transform(X_test)       # Transform testing data using the same scaler

# Step 5: Train a Linear Regression model
model = LinearRegression()
model.fit(X_train_scaled, y_train)

# Step 6: Evaluate the model
y_pred = model.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Model Evaluation:")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"R-squared (R²): {r2:.2f}")

# Step 7: Save the trained model and scaler
joblib.dump(model, 'stock_price_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
print("Trained model saved as 'stock_price_model.pkl'")
print("Scaler saved as 'scaler.pkl'")

Loaded processed data: 12006 rows
Model Evaluation:
Mean Squared Error (MSE): 1.05
R-squared (R²): 1.00
Trained model saved as 'stock_price_model.pkl'
Scaler saved as 'scaler.pkl'
