<a href="https://colab.research.google.com/github/saksham-42/Time-series/blob/main/Copy_of_TSA_Assignment_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# StockGro Final Project: Time Series Forecasting and Analysis


In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima.model import ARIMA
from sklearn.preprocessing import MinMaxScaler
from prophet import Prophet
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error
from keras.models import Sequential
from keras.layers import LSTM, Dense
import warnings
warnings.filterwarnings("ignore")

plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("pastel")

Stock Universe Selection

In [None]:
# Selected 10 stocks across sectors to ensure diversification
stocks = [
    'RELIANCE.NS', 'INFY.NS', 'HDFCBANK.NS', 'TCS.NS', 'ITC.NS',
    'LT.NS', 'SUNPHARMA.NS', 'MARUTI.NS', 'ADANIGREEN.NS', 'HINDUNILVR.NS'
]

Data Collection


In [None]:
start_date = '2020-01-01'
end_date = '2024-12-31'
stock_data = {}

for stock in stocks:
    print(f"Downloading data for {stock}...")
    data = yf.download(stock, start=start_date, end=end_date)
    data['Ticker'] = stock
    stock_data[stock] = data

Downloading data for RELIANCE.NS...
YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed


Downloading data for INFY.NS...


[*********************100%***********************]  1 of 1 completed


Downloading data for HDFCBANK.NS...


[*********************100%***********************]  1 of 1 completed


Downloading data for TCS.NS...


[*********************100%***********************]  1 of 1 completed


Downloading data for ITC.NS...


[*********************100%***********************]  1 of 1 completed


Downloading data for LT.NS...


[*********************100%***********************]  1 of 1 completed


Downloading data for SUNPHARMA.NS...


[*********************100%***********************]  1 of 1 completed


Downloading data for MARUTI.NS...


[*********************100%***********************]  1 of 1 completed


Downloading data for ADANIGREEN.NS...


[*********************100%***********************]  1 of 1 completed


Downloading data for HINDUNILVR.NS...


[*********************100%***********************]  1 of 1 completed


Data Preprocessing

In [None]:
def preprocess_stock(data):
    df = data[['Close']].copy()
    df = df.ffill().bfill()
    df['Log_Return'] = np.log(df['Close']) - np.log(df['Close'].shift(1))
    df.dropna(inplace=True)
    return df

preprocessed = {s: preprocess_stock(df) for s, df in stock_data.items()}

Stationarity Check using ADF Test

In [None]:
print("\nAugmented Dickey-Fuller Test Results:")
for stock, df in preprocessed.items():
    adf_result = adfuller(df['Close'])
    print(f"{stock}: p-value = {adf_result[1]:.4f} - {'Stationary' if adf_result[1] < 0.05 else 'Non-stationary'}")


Augmented Dickey-Fuller Test Results:
RELIANCE.NS: p-value = 0.3296 - Non-stationary
INFY.NS: p-value = 0.5678 - Non-stationary
HDFCBANK.NS: p-value = 0.5204 - Non-stationary
TCS.NS: p-value = 0.5985 - Non-stationary
ITC.NS: p-value = 0.9589 - Non-stationary
LT.NS: p-value = 0.9695 - Non-stationary
SUNPHARMA.NS: p-value = 0.9961 - Non-stationary
MARUTI.NS: p-value = 0.7522 - Non-stationary
ADANIGREEN.NS: p-value = 0.1809 - Non-stationary
HINDUNILVR.NS: p-value = 0.0644 - Non-stationary


Train-Test Split

In [None]:
df_split = {}
for stock, df in preprocessed.items():
    df_split[stock] = {
        'train': df.iloc[:-126],
        'test': df.iloc[-126:]
    }

ARIMA Forecasting

In [None]:
forecast_results = {}

for stock in stocks:
    train = df_split[stock]['train']['Close']
    model = ARIMA(train, order=(5,1,0))
    model_fit = model.fit()
    arima_forecast = model_fit.forecast(steps=2)
    forecast_results.setdefault(stock, {})['ARIMA'] = arima_forecast.values

Prophet Forecasting


In [None]:
for stock in stocks:
    df_raw = df_split[stock]['train'].copy()
    df = pd.DataFrame({
        'ds': df_raw.index,
        'y': df_raw['Close'].squeeze().astype(float)
    }).dropna()

    prophet = Prophet(daily_seasonality=True)
    prophet.fit(df)
    future = prophet.make_future_dataframe(periods=2)
    forecast = prophet.predict(future)
    prophet_forecast = forecast[['ds', 'yhat']].tail(2)['yhat'].values
    forecast_results[stock]['Prophet'] = prophet_forecast

DEBUG:cmdstanpy:input tempfile: /tmp/tmpnalt1pn5/nv_ewf4x.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpnalt1pn5/stzj1y_k.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=31699', 'data', 'file=/tmp/tmpnalt1pn5/nv_ewf4x.json', 'init=/tmp/tmpnalt1pn5/stzj1y_k.json', 'output', 'file=/tmp/tmpnalt1pn5/prophet_modelc34ghu0r/prophet_model-20250513093146.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
09:31:46 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
09:31:46 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
DEBUG:cmdstanpy:input tempfile: /tmp/tmpnalt1pn5/4sup8fek.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpnalt1pn5/g4u6d79l.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/