In [None]:
import numpy as np
import pandas as pd
import lightgbm as lgb
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from scipy.stats import boxcox

# Load historical stock, VIX, and SPX data
df = pd.read_csv('stock_data.csv', parse_dates=['Date'])
df.set_index('Date', inplace=True)

# Compute SPX correlation (Rolling 10-day correlation with stock)
df['SPX_Correlation'] = df['Close'].rolling(window=10).corr(df['SPX_Close'])

# Feature Engineering
df['Return'] = df['Close'].pct_change()  # Daily returns
df['Volatility'] = df['Return'].rolling(window=5).std()  # 5-day rolling volatility
df['Volume_Lag1'] = df['Volume'].shift(1)  # Lag feature
df['SMA_10'] = df['Close'].rolling(window=10).mean()  # 10-day Simple Moving Average
df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean()  # 10-day Exponential Moving Average
df['DayOfWeek'] = df.index.dayofweek  # Day of the week
df['Month'] = df.index.month  # Month of the year

# Drop NaNs from rolling computations
df.dropna(inplace=True)

# Target variable (log transform for stability)
df['Volume'], lam = boxcox(df['Volume'])

# Define feature set including VIX and SPX correlation
features = ['Return', 'Volatility', 'Volume_Lag1', 'SMA_10', 'EMA_10',
            'VIX', 'SPX_Correlation', 'DayOfWeek', 'Month']

X = df[features]
y = df['Volume']

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# LightGBM Dataset
train_data = lgb.Dataset(X_train, label=y_train)
test_data = lgb.Dataset(X_test, label=y_test)

# LightGBM Parameters
params = {
    'objective': 'regression',
    'metric': 'rmse',
    'boosting_type': 'gbdt',
    'num_leaves': 31,
    'learning_rate': 0.05,
    'feature_fraction': 0.9,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'verbose': -1
}

# Train Model
model = lgb.train(params, train_data, valid_sets=[test_data], num_boost_round=100, early_stopping_rounds=10)

# Predictions
y_pred = model.predict(X_test)

# Reverse Box-Cox Transform
y_pred = np.exp(y_pred) if lam == 0 else np.power(y_pred * lam + 1, 1 / lam)
y_test = np.exp(y_test) if lam == 0 else np.power(y_test * lam + 1, 1 / lam)

# Evaluate Performance
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae = mean_absolute_error(y_test, y_pred)

print(f"RMSE: {rmse:.2f}")
print(f"MAE: {mae:.2f}")

# Feature Importance Plot
plt.figure(figsize=(10, 6))
lgb.plot_importance(model, max_num_features=10, importance_type='gain')
plt.title("Feature Importance - LightGBM Model")
plt.show()

# Plot Actual vs Predicted
plt.figure(figsize=(12, 6))
plt.plot(df.index[-len(y_test):], y_test, label="Actual Volume", marker='o')
plt.plot(df.index[-len(y_test):], y_pred, label="Predicted Volume", linestyle='dashed')
plt.legend()
plt.title("Stock Volume Forecasting with LightGBM (Including VIX & SPX Correlation)")
plt.show()


AttributeError: 'RangeIndex' object has no attribute 'dayofweek'