In [None]:
# 02_regression_model.ipynb

# Importing necessary libraries
import pandas as pd
import numpy as np
import statsmodels.api as sm
import yfinance as yf

# Load the preprocessed data
data = pd.read_csv('processed_aapl_data.csv', index_col='Date', parse_dates=True)

# Feature Engineering: Using moving averages and the previous day's closing price as features
data['Prev_Close'] = data['Close'].shift(1)
data = data.dropna()

# Define the independent and dependent variables
X = data[['Prev_Close', 'SMA_50', 'SMA_200']]
y = data['Close']

# Adding a constant to the independent variables for the intercept in the regression
X = sm.add_constant(X)

# Fit the linear regression model
model = sm.OLS(y, X).fit()

# Model Summary
print(model.summary())

# Predicting the stock price using the model
data['Predicted_Close'] = model.predict(X)

# Plotting the actual vs predicted closing prices
import matplotlib.pyplot as plt
plt.figure(figsize=(12,6))
plt.plot(data['Close'], label='Actual Close')
plt.plot(data['Predicted_Close'], label='Predicted Close', linestyle='--')
plt.title('Actual vs Predicted AAPL Closing Prices')
plt.legend(loc='best')
plt.show()
