<a href="https://colab.research.google.com/github/shruthits1/MyTrials/blob/main/time_series_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
!pip install pandas numpy requests matplotlib scikit-learn statsmodels pyportfolioopt

import pandas as pd
import numpy as np
import requests
import time
import os
import matplotlib.pyplot as plt
from datetime import datetime
from pypfopt import expected_returns, efficient_frontier, plotting, objective_functions




In [8]:
# Set Alphavantage API key
API_KEY = "PIEO765GFIFQ484V"
DATA_DIR = "stock_data"
os.makedirs(DATA_DIR, exist_ok=True)


## Fetch stocks

In [20]:
def fetch_and_save_stock_data(symbol, api_key):
    print(f"Fetching {symbol}...")
    url = f"https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol={symbol}&outputsize=full&apikey={api_key}"
    r = requests.get(url)
    data = r.json().get("Time Series (Daily)", {})

    if not data:
        print(f"No data for {symbol}")
        return None

    df = pd.DataFrame.from_dict(data, orient="index")
    df = df.rename(columns={"4. close": "close"}).astype(float)
    df.index = pd.to_datetime(df.index)
    df = df.sort_index()

    csv_path = os.path.join(DATA_DIR, f"{symbol}.csv")
    df[["close"]].to_csv(csv_path)
    return df[["close"]]

## feature engineering

In [18]:
def add_features(df):
    df["returns"] = df["close"].pct_change()
    #df["volatility"] = df["returns"].rolling(20).std()
    df["momentum"] = df["close"] / df["close"].shift(20) - 1
    df.dropna(inplace=True)
    return df

## Baseline model : ARIMA

In [33]:
from statsmodels.tsa.arima.model import ARIMA

def forecast_arima(df, ticker, steps=756):  # ~3 years of trading days
    model = ARIMA(df[ticker], order=(5,1,0))
    model_fit = model.fit()
    forecast = model_fit.forecast(steps=steps)
    return forecast

## Estimated returns

In [12]:
def compute_expected_return(forecast_series):
    start = forecast_series.iloc[0]
    end = forecast_series.iloc[-1]
    cagr = np.log(end / start) / 3  # 3-year CAGR
    return cagr

## portfolio Optimization

In [13]:
from pypfopt import EfficientFrontier, expected_returns, risk_models

def optimize_portfolio(price_df):
    mu = expected_returns.mean_historical_return(price_df)
    S = risk_models.sample_cov(price_df)
    ef = EfficientFrontier(mu, S)
    weights = ef.max_sharpe()
    return ef.clean_weights()


# Load and run

In [21]:
tickers = ["AAPL", "TSLA", "NVDA", "MSFT"]  # Replace with top gainers/losers
price_data = {}

for ticker in tickers:
    csv_path = os.path.join(DATA_DIR, f"{ticker}.csv")
    if os.path.exists(csv_path):
        df = pd.read_csv(csv_path, index_col=0, parse_dates=True)
    else:
        df = fetch_and_save_stock_data(ticker, API_KEY)
    if df is not None:
        df = add_features(df)
        price_data[ticker] = df["close"]

price_df = pd.DataFrame(price_data)

Fetching AAPL...
Fetching TSLA...
Fetching NVDA...
Fetching MSFT...


In [23]:
price_df.head()

Unnamed: 0,AAPL,TSLA,NVDA,MSFT
1999-11-30,97.87,,34.63,91.05
1999-12-01,103.06,,36.38,93.19
1999-12-02,110.19,,39.06,94.81
1999-12-03,115.0,,38.13,96.12
1999-12-06,116.0,,39.19,95.44


In [25]:
#there are 2680 NAN values for Tesla from 1999 . setting closing price to 0
price_df.fillna(0, inplace=True)

In [35]:
for ticker in price_df.columns:
    # Pass a DataFrame with the 'close' column for each ticker
    forecast = forecast_arima(price_df, "AAPL")
    print("forecast: ", forecast)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


forecast:  6481    239.615374
6482    239.635265
6483    239.675667
6484    239.680911
6485    239.680283
           ...    
7232    239.680211
7233    239.680211
7234    239.680211
7235    239.680211
7236    239.680211
Name: predicted_mean, Length: 756, dtype: float64


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


forecast:  6481    239.615374
6482    239.635265
6483    239.675667
6484    239.680911
6485    239.680283
           ...    
7232    239.680211
7233    239.680211
7234    239.680211
7235    239.680211
7236    239.680211
Name: predicted_mean, Length: 756, dtype: float64


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


forecast:  6481    239.615374
6482    239.635265
6483    239.675667
6484    239.680911
6485    239.680283
           ...    
7232    239.680211
7233    239.680211
7234    239.680211
7235    239.680211
7236    239.680211
Name: predicted_mean, Length: 756, dtype: float64
forecast:  6481    239.615374
6482    239.635265
6483    239.675667
6484    239.680911
6485    239.680283
           ...    
7232    239.680211
7233    239.680211
7234    239.680211
7235    239.680211
7236    239.680211
Name: predicted_mean, Length: 756, dtype: float64


  return get_prediction_index(
  return get_prediction_index(


## Build Portfolio

In [36]:
forecasted_returns = {}
for ticker in price_df.columns:
    # Pass a DataFrame with the 'close' column for each ticker
    forecast = forecast_arima(price_df, ticker)
    print("forecast: ", forecast)
    forecasted_returns[ticker] = compute_expected_return(forecast)
    print("forecasted_returns: ", forecasted_returns)

returns_series = pd.Series(forecasted_returns)
cov_matrix = price_df.pct_change().cov()


ef = EfficientFrontier(returns_series, cov_matrix)
weights = ef.max_sharpe()
ef.portfolio_performance(verbose=True)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


forecast:  6481    239.615374
6482    239.635265
6483    239.675667
6484    239.680911
6485    239.680283
           ...    
7232    239.680211
7233    239.680211
7234    239.680211
7235    239.680211
7236    239.680211
Name: predicted_mean, Length: 756, dtype: float64
forecasted_returns:  {'AAPL': np.float64(9.018389540656496e-05)}


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


forecast:  6481    350.384539
6482    349.828781
6483    350.011390
6484    350.181698
6485    350.853416
           ...    
7232    350.844827
7233    350.844827
7234    350.844827
7235    350.844827
7236    350.844827
Name: predicted_mean, Length: 756, dtype: float64
forecasted_returns:  {'AAPL': np.float64(9.018389540656496e-05), 'TSLA': np.float64(0.0004376005919365321)}


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


forecast:  6481    167.269175
6482    167.249695
6483    167.371163
6484    167.372959
6485    167.481548
           ...    
7232    167.464242
7233    167.464242
7234    167.464242
7235    167.464242
7236    167.464242
Name: predicted_mean, Length: 756, dtype: float64
forecasted_returns:  {'AAPL': np.float64(9.018389540656496e-05), 'TSLA': np.float64(0.0004376005919365321), 'NVDA': np.float64(0.00038850308183014135)}
forecast:  6481    495.853287
6482    495.819532
6483    496.251368
6484    496.185633
6485    495.840963
           ...    
7232    495.889714
7233    495.889714
7234    495.889714
7235    495.889714
7236    495.889714
Name: predicted_mean, Length: 756, dtype: float64
forecasted_returns:  {'AAPL': np.float64(9.018389540656496e-05), 'TSLA': np.float64(0.0004376005919365321), 'NVDA': np.float64(0.00038850308183014135), 'MSFT': np.float64(2.4486861659851542e-05)}
Expected annual return: 0.0%
Annual volatility: 3.2%
Sharpe Ratio: 0.01


  return get_prediction_index(
  return get_prediction_index(


(np.float64(0.0004169431882660869),
 np.float64(0.0321350638332425),
 np.float64(0.012974711686577545))