# # Task 2: Time Series Forecasting for TSLA
#
# This notebook implements ARIMA and LSTM models to forecast Tesla (TSLA) stock prices.
# The dataset is split chronologically (train: 2015–2023, test: 2024–2025).
# Models are optimized, evaluated using MAE, RMSE, MAPE, and compared for performance.
#
# **Assumptions**:
# - Data: Adjusted Close prices from `data/TSLA_data.csv` (from Task 1).
# - Training: July 1, 2015, to December 31, 2023.
# - Testing: January 1, 2024, to July 31, 2025.
# - Risk-free rate: 0.02 (not used in forecasting but noted for context).
# - Outputs saved in `data/output/` and `plots/forecasting/`

In [1]:

# ## Cell 1: Imports and Setup
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from pmdarima import auto_arima
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
import os
import sys
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))


In [2]:
# Example usage
from src.time_series_forcasting_models import prepare_data
data_path = 'F:/Time-Series-Forecasting-for-Portfolio-Management-Optimization-/data/scrap data/TSLA_data.csv'
train_data, test_data = prepare_data(data_path)  


Training data shape: (2140,)
Testing data shape: (395,)


# Fit ARIMA Model

In [5]:
# ## Cell 2: Fit ARIMA Model
from src.time_series_forcasting_models import fit_arima
arima_forecast, arima_model = fit_arima(train_data,test_data)
    #to print the summary of the ARIMA model
print(arima_model.summary())





Performing stepwise search to minimize aic
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=13647.752, Time=0.06 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=13647.771, Time=0.13 sec




 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=13647.825, Time=0.25 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=13646.472, Time=0.04 sec




 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=13648.219, Time=0.59 sec

Best model:  ARIMA(0,1,0)(0,0,0)[0]          
Total fit time: 1.083 seconds


  return get_prediction_index(
  return get_prediction_index(


                               SARIMAX Results                                
Dep. Variable:                      y   No. Observations:                 2140
Model:               SARIMAX(0, 1, 0)   Log Likelihood               -6822.236
Date:                Wed, 13 Aug 2025   AIC                          13646.472
Time:                        00:22:22   BIC                          13652.140
Sample:                             0   HQIC                         13648.546
                               - 2140                                         
Covariance Type:                  opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
sigma2        34.5030      0.413     83.548      0.000      33.694      35.312
Ljung-Box (L1) (Q):                   1.98   Jarque-Bera (JB):             10936.43
Prob(Q):                              0.16   Pr

In [6]:
from src.time_series_forcasting_models import fit_lstm
# Fit LSTM Model
lstm_forecast, lstm_model, scaler = fit_lstm(train_data, test_data)
    # Print LSTM model summary
print(lstm_model.summary())

  super().__init__(**kwargs)


Epoch 1/50
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 50ms/step - loss: 0.0095 - val_loss: 0.0023
Epoch 2/50
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 35ms/step - loss: 0.0025 - val_loss: 0.0017
Epoch 3/50
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 37ms/step - loss: 0.0021 - val_loss: 0.0022
Epoch 4/50
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 36ms/step - loss: 0.0021 - val_loss: 0.0018
Epoch 5/50
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 37ms/step - loss: 0.0020 - val_loss: 0.0014
Epoch 6/50
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 40ms/step - loss: 0.0021 - val_loss: 0.0014
Epoch 7/50
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 40ms/step - loss: 0.0018 - val_loss: 0.0012
Epoch 8/50
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 40ms/step - loss: 0.0018 - val_loss: 0.0018
Epoch 9/50
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━

None


In [7]:
from src.time_series_forcasting_models import evaluate_models
# Evaluate Models
metrics_df = evaluate_models(test_data, arima_forecast, lstm_forecast)
# Display Metrics
print(metrics_df)


ValueError: Input contains NaN.

In [None]:
 plot_forecasts(train_data, test_data, arima_forecast, lstm_forecast)