<a href="https://colab.research.google.com/github/prasadanvekar/elvtrdocs/blob/main/Bonus_Assignment_Predicting_Apple_Inc_Stock_Prices.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# install the Yahoo finance package and pmdarima package
!pip install yfinance pmdarima

Collecting pmdarima
  Downloading pmdarima-2.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pmdarima
Successfully installed pmdarima-2.0.4


In [None]:
#################################################################################
# Bonus Assignment Objective:Build an LSTM model to predict the closing prices  #
#of Apple Inc. (AAPL) stock and compare it to the models in the core assignment.#
# Author:  Prasad S Anvekar                                                     #
# Date:  22 Jan 2024                                                            #
#################################################################################
# Import necessary libraries
import yfinance as yf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pmdarima import auto_arima
from sklearn.linear_model import LinearRegression
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import seaborn as sns

# Data Acquisition
ticker = 'AAPL'
stock_data = yf.download('AAPL', start='2016-01-01', end='2024-01-21')
stock_data.sort_index(ascending=True, inplace=True)

# Ensure proper DatetimeIndex with frequency for time series analysis
stock_data.index = pd.to_datetime(stock_data.index)
stock_data = stock_data.asfreq('B', method='ffill')

print('-------Raw Data-------')
print(stock_data.head())

# Calculate rolling averages
stock_data['7_day_avg'] = stock_data['Close'].rolling(window=7, min_periods=1).mean()
stock_data['30_day_avg'] = stock_data['Close'].rolling(window=10, min_periods=1).mean()

print('-------Engineered Data-------')
print(stock_data.head())
print(stock_data.describe())

# Feature engineering
stock_data['7_day_shifted'] = stock_data['7_day_avg'].shift(1)
stock_data['30_day_shifted'] = stock_data['30_day_avg'].shift(1)

# Split the data into training and test sets
split_date = pd.to_datetime('2023-10-01')
train = stock_data.loc[stock_data.index < split_date]
test = stock_data.loc[stock_data.index >= split_date]

# Prepare the training and test data for Linear Regression
X_train = train[['7_day_shifted','30_day_shifted']].dropna()
y_train = train['Close'][1:].values
X_test = test[['7_day_shifted','30_day_shifted']].dropna()
y_test = test['Close']

# Review is the data size
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

# Visualization of Train and Test Datasets
plt.figure(figsize=(12, 6))
plt.plot(train.index, train['Close'], label='Train Data')
plt.plot(test.index, test['Close'], label='Test Data')
plt.title('Train vs Test Dataset')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.show()

# Fit a linear regression model
model_lr = LinearRegression()
model_lr.fit(X_train, y_train)

# Predict the trend for the test set
reg_predictions = model_lr.predict(X_test)

# Exponential Smoothing model

window_size = 20
exp_model = ExponentialSmoothing(train['Close'], trend='add', seasonal='add', seasonal_periods=window_size)
exp_fit = exp_model.fit()
exp_predictions = exp_fit.forecast(len(test))

# Evaluation metrics
reg_mae = mean_absolute_error(y_test, reg_predictions)
reg_mse = mean_squared_error(y_test, reg_predictions)
reg_rmse = np.sqrt(reg_mse)

exp_mae = mean_absolute_error(y_test, exp_predictions)
exp_mse = mean_squared_error(y_test, exp_predictions)
exp_rmse = np.sqrt(exp_mse)

# Plotting
plt.figure(figsize=(12, 6))
plt.plot(test.index, y_test, label='Actual')
plt.plot(test.index, reg_predictions, label=f'Linear Regression\nMAE: {reg_mae:.2f}, RMSE: {reg_rmse:.2f}')
plt.plot(test.index, exp_predictions, label=f'Exponential Smoothing\nMAE: {exp_mae:.2f}, RMSE: {exp_rmse:.2f}')
plt.legend()
plt.title(f'{ticker} Stock Price Prediction')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.show()

# Performance metrics
print(f'Linear Regression Metrics:\nMAE: {reg_mae:.2f}\nMSE: {reg_mse:.2f}\nRMSE: {reg_rmse:.2f}')
print(f'\nExponential Smoothing Metrics:\nMAE: {exp_mae:.2f}\nMSE: {exp_mse:.2f}\nRMSE: {exp_rmse:.2f}')

# Auto-fit ARIMA model
auto_model = auto_arima(train['Close'], seasonal=False, suppress_warnings=True)
fit_model = auto_model.fit(train['Close'])

# Forecast the trend for the test set
forecast = fit_model.predict(n_periods=len(test))

# Calculate performance metrics
mae = mean_absolute_error(y_test, forecast)
mse = mean_squared_error(y_test, forecast)
rmse = np.sqrt(mse)

# Print performance metrics
print(f'\nARIMA Model Metrics:\nMAE: {mae:.2f}\nMSE: {mse:.2f}\nRMSE: {rmse:.2f}')

# Plot the actual and predicted trends
plt.figure(figsize=(10, 6))
plt.plot(train.index, train['Close'], label='Training Data', marker='o')
plt.plot(test.index, test['Close'], label='Actual Trend', marker='o')
plt.plot(test.index, forecast, label='Predicted Trend', linestyle='--', marker='o')
plt.title('Financial Time Series Trend Forecasting with ARIMA (Auto-fit)')
plt.xlabel('Time')
plt.ylabel('Value')
plt.legend()
plt.show()

