In [None]:
## Imports
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import joblib

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error, r2_score

import warnings
warnings.filterwarnings('ignore')

In [None]:
## Create output directory
os.makedirs('outputs', exist_ok=True)

In [None]:
## Load and Clean the Data
def load_data(path='Data/RELIANCE.csv'):
    df = pd.read_csv(path)
    df = df[['Date', 'Open', 'High', 'Low', 'Close', 'Volume']]
    df['Date'] = pd.to_datetime(df['Date'])
    df.sort_values('Date', inplace=True)
    df.dropna(inplace=True)
    return df

df = load_data()
df.head()

In [None]:
## Preprocess and Create Target Variables
def preprocess(df):
    df = df.copy()
    df['Target'] = df['Close'].shift(-1)
    df['Future_Close_3'] = df['Close'].shift(-3).rolling(3).mean()
    df['Future_Close_5'] = df['Close'].shift(-5).rolling(5).mean()
    df.dropna(inplace=True)

    features = ['Open', 'High', 'Low', 'Close', 'Volume']
    X = df[features]
    y = df['Target']
    future_y_3 = df['Future_Close_3']
    future_y_5 = df['Future_Close_5']
    dates = df['Date']

    return X, y, future_y_3, future_y_5, dates

X, y, y3, y5, dates = preprocess(df)
X.head()

In [None]:
## Train a Linear Regression Model with TimeSeriesSplit
def train_model(X, y):
    tscv = TimeSeriesSplit(n_splits=5)
    model = LinearRegression()
    scores = []

    for train_index, test_index in tscv.split(X):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        model.fit(X_train, y_train)
        scores.append(model.score(X_test, y_test))

    print("Average R² Score (TimeSeries CV):", np.mean(scores))
    model.fit(X, y)
    return model

model = train_model(X, y)

In [None]:
## Evaluate and Visualize Predictions
def evaluate_model(model, X, y, dates):
    y_pred = model.predict(X)
    mse = mean_squared_error(y, y_pred)
    r2 = r2_score(y, y_pred)

    print(f"MSE: {mse:.2f}")
    print(f"R² Score: {r2:.4f}")

    results_df = pd.DataFrame({
        'Date': dates,
        'Actual': y,
        'Predicted': y_pred
    })

    results_df.to_csv('outputs/predictions.csv', index=False)

    plt.figure(figsize=(14, 6))
    plt.plot(results_df['Date'], results_df['Actual'], label='Actual')
    plt.plot(results_df['Date'], results_df['Predicted'], label='Predicted', linestyle='--')
    plt.title('Stock Price Prediction vs Actual')
    plt.xlabel('Date')
    plt.ylabel('Close Price')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig('outputs/full_predictions_plot.png')
    plt.show()

evaluate_model(model, X, y, dates)

In [None]:
## Save model to disk
def save_model(model):
    joblib.dump(model, 'outputs/stock_model.pkl')
    print("Model saved to outputs/stock_model.pkl")

save_model(model)

In [None]:
## Completion Message
print("✅ All tasks completed. Outputs saved to /outputs/")