<a href="https://colab.research.google.com/github/siddheshsp0/FinSearch-2025/blob/master/ARIMA_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import yfinance as yf
from statsmodels.tsa.arima.model import ARIMA
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Financial metrics functions
def calc_metrics(prices, rf=0.02):
    returns = np.diff(prices) / prices[:-1]
    volatility = np.std(returns) * np.sqrt(252)
    sharpe_ratio = (np.mean(returns) * 252 - rf) / volatility if volatility != 0 else np.nan
    # Max drawdown
    cumulative = np.cumprod(1 + returns)
    peak = np.maximum.accumulate(cumulative)
    drawdown = (cumulative - peak) / peak
    max_dd = np.min(drawdown)
    return volatility, sharpe_ratio, max_dd

tickers = ['RELIANCE.NS', 'TCS.NS', 'INFY.NS']
start_date = "2010-01-01"
end_date = "2019-06-30"

for ticker in tickers:
    print(f"\nProcessing {ticker}")

    # Download data
    df = yf.download(ticker, start=start_date, end=end_date)
    df.dropna(inplace=True)
    close_prices = df['Close'].values
    dates = df.index

    # ---- ARIMA ---- #
    print("ARIMA Model:")
    model = ARIMA(close_prices, order=(5, 1, 0))  # ARIMA(5,1,0)
    arima_fit = model.fit()

    # Ensure close_prices is 1D for concatenation
    close_prices_1d = close_prices.flatten() if close_prices.ndim > 1 else close_prices


    forecast_steps = 30
    forecast = arima_fit.forecast(steps=forecast_steps)
    forecast = forecast.values if hasattr(forecast, 'values') else forecast
    forecast_prices = np.concatenate([close_prices_1d, forecast])

    # ARIMA metrics (on entire forecast path)
    volatility, sharpe, max_dd = calc_metrics(forecast_prices)
    print(arima_fit.summary())
    print(f"Volatility: {volatility:.4f}, Sharpe Ratio: {sharpe:.4f}, Max Drawdown: {max_dd:.4f}")

    # ---- LSTM ---- #
    print("\nLSTM Model:")
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_close = scaler.fit_transform(close_prices.reshape(-1,1))

    # Prepare training data
    window = 60
    X, y = [], []
    for i in range(window, len(scaled_close)):
        X.append(scaled_close[i-window:i, 0])
        y.append(scaled_close[i, 0])
    X, y = np.array(X), np.array(y)

    # Train/test split
    split = int(0.8 * len(X))
    X_train, y_train = X[:split], y[:split]
    X_test, y_test = X[split:], y[split:]

    X_train = X_train.reshape((X_train.shape[0], X_train.shape, 1))
    X_test = X_test.reshape((X_test.shape, X_test.shape, 1))

    # Build & train LSTM
    lstm_model = Sequential()
    lstm_model.add(LSTM(50, return_sequences=True, input_shape=(window, 1)))
    lstm_model.add(Dropout(0.2))
    lstm_model.add(LSTM(50, return_sequences=False))
    lstm_model.add(Dropout(0.2))
    lstm_model.add(Dense(25))
    lstm_model.add(Dense(1))
    lstm_model.compile(optimizer='adam', loss='mean_squared_error')
    lstm_model.fit(X_train, y_train, batch_size=32, epochs=50, verbose=0)  # silent training

    # Predictions & inverse transform
    lstm_preds = lstm_model.predict(X_test)
    lstm_preds = scaler.inverse_transform(lstm_preds.reshape(-1,1)).flatten()
    actual_test = scaler.inverse_transform(y_test.reshape(-1,1)).flatten()

    lstm_prices = np.concatenate([close_prices[:split+window], lstm_preds])

    volatility, sharpe, max_dd = calc_metrics(lstm_prices)
    print(f"Volatility: {volatility:.4f}, Sharpe Ratio: {sharpe:.4f}, Max Drawdown: {max_dd:.4f}")

print("\nProcessing completed for all tickers.")


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


Processing RELIANCE.NS
ARIMA Model:





                               SARIMAX Results                                
Dep. Variable:                      y   No. Observations:                 2339
Model:                 ARIMA(5, 1, 0)   Log Likelihood               -6860.103
Date:                Fri, 15 Aug 2025   AIC                          13732.207
Time:                        14:34:25   BIC                          13766.749
Sample:                             0   HQIC                         13744.790
                               - 2339                                         
Covariance Type:                  opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.0826      0.011      7.479      0.000       0.061       0.104
ar.L2          0.0195      0.013      1.529      0.126      -0.005       0.044
ar.L3         -0.0235      0.015     -1.619      0.1

TypeError: 'tuple' object cannot be interpreted as an integer