# ARCH Model

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import bs4 as bs
import requests
from random import randrange

import yfinance as yf # not sure why but it seems VSCode needs yfinance to be imported again
import arch.data.sp500
from arch import arch_model

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

In [2]:
# Example: we can get data on the S&P500 index and compute the daily returns (a percentage)
data = arch.data.sp500.load()
market = data["Adj Close"]
returns = 100 * market.pct_change().dropna()

In [4]:
data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1999-01-04,1229.229980,1248.810059,1219.099976,1228.099976,1228.099976,877000000
1999-01-05,1228.099976,1246.109985,1228.099976,1244.780029,1244.780029,775000000
1999-01-06,1244.780029,1272.500000,1244.780029,1272.339966,1272.339966,986900000
1999-01-07,1272.339966,1272.339966,1257.680054,1269.729980,1269.729980,863000000
1999-01-08,1269.729980,1278.239990,1261.819946,1275.089966,1275.089966,937800000
...,...,...,...,...,...,...
2018-12-24,2400.560059,2410.340088,2351.100098,2351.100098,2351.100098,2613930000
2018-12-26,2363.120117,2467.760010,2346.580078,2467.699951,2467.699951,4233990000
2018-12-27,2442.500000,2489.100098,2397.939941,2488.830078,2488.830078,4096610000
2018-12-28,2498.770020,2520.270020,2472.889893,2485.739990,2485.739990,3702620000


In [29]:
# Create a Ticker object
apple_ticker = yf.Ticker("AAPL")

# Fetch historical market data
apple_historical_data = apple_ticker.history(period="max") # data for the entire history of Apple
print("Historical Data:")
apple_historical_data.tail()

Historical Data:


Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-12-20 00:00:00-05:00,248.039993,255.0,245.690002,254.490005,147495300,0.0,0.0
2024-12-23 00:00:00-05:00,254.770004,255.649994,253.449997,255.270004,40858800,0.0,0.0
2024-12-24 00:00:00-05:00,255.490005,258.209991,255.289993,258.200012,23234700,0.0,0.0
2024-12-26 00:00:00-05:00,258.190002,260.100006,257.630005,259.019989,27237100,0.0,0.0
2024-12-27 00:00:00-05:00,257.829987,258.700012,253.059998,255.589996,42317100,0.0,0.0


In [30]:
# all data up to the end of 2023
apple_historical_data[:'2023-12-31 00:00:00-05:00']

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1980-12-12 00:00:00-05:00,0.098834,0.099264,0.098834,0.098834,469033600,0.0,0.0
1980-12-15 00:00:00-05:00,0.094108,0.094108,0.093678,0.093678,175884800,0.0,0.0
1980-12-16 00:00:00-05:00,0.087232,0.087232,0.086802,0.086802,105728000,0.0,0.0
1980-12-17 00:00:00-05:00,0.088951,0.089381,0.088951,0.088951,86441600,0.0,0.0
1980-12-18 00:00:00-05:00,0.091530,0.091959,0.091530,0.091530,73449600,0.0,0.0
...,...,...,...,...,...,...,...
2023-12-22 00:00:00-05:00,194.228458,194.457347,192.029240,192.656174,37122800,0.0,0.0
2023-12-26 00:00:00-05:00,192.666108,192.944742,191.889912,192.108841,28919300,0.0,0.0
2023-12-27 00:00:00-05:00,191.551603,192.556674,190.158419,192.208374,48087700,0.0,0.0
2023-12-28 00:00:00-05:00,193.193544,193.711013,192.228271,192.636276,34049900,0.0,0.0


In the following code, we make training and testing datasets from Apple's historical data and then fit GARCH models with parameters $p,q$ ranging from 1 to 5. We compare each of their forecasts to a rolling volatility (with window of size 5) and then look for the best parameters. 

In [75]:
train = apple_historical_data[:'2024-12-01 00:00:00-05:00']['Close'] # for options pricing, we want the volatility of the underlying stock, not the returns
test = apple_historical_data['2024-12-01 00:00:00-05:00':]['Close']

In [91]:
realized_vol = test.rolling(window=5).std().dropna()

# Initialize variables to store results
results = []
best_model = None
best_mse = float('inf')

# Loop over p and q in range(1, 6)
for p in range(1, 6):
    for q in range(1, 6):
        try:
            # Fit a GARCH(p, q) model
            model = arch_model(train, vol='Garch', p=p, q=q, dist='Normal')
            model_fit = model.fit(disp="off")
            
            # Forecast for the test period
            forecast = model_fit.forecast(horizon=len(test))
            # Because the rolling window has to drop some NaN values, we need to align forecast length here
            forecast_vol = np.sqrt(forecast.variance.iloc[-1][:len(realized_vol)])
            
            # Calculate MSE
            mse = np.mean((realized_vol.values - forecast_vol.values) ** 2)
            
            # Store results
            results.append({'p': p, 'q': q, 'MSE': mse})
            
            # Track the best model
            if mse < best_mse:
                best_mse = mse
                best_model = model_fit
        
        except Exception as e:
            print(f"Error with p={p}, q={q}: {e}")

# Convert results to a DataFrame for easier inspection
results_df = pd.DataFrame(results)

# Print and sort results by MSE
print("Best models sorted by MSE:")
print(results_df.sort_values(by='MSE').head())

Best models sorted by MSE:
    p  q           MSE
11  3  2  26391.480912
18  4  4  36417.002196
16  4  2  44493.274971
17  4  3  49650.410661
20  5  1  50366.769863


The code below scrapes Wikipedia for a list of the S&P500 companies and then gives a way to randomly select a ticker and get data on it from Yahoo Finance.

In [5]:
def get_tickers():
    
    # Scrape the Wikipedia page related to the S&P500 
    resp = requests.get('http://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    soup = bs.BeautifulSoup(resp.text, 'lxml')
    table = soup.find('table', {'class': 'wikitable sortable sticky-header'})
    tickers = []

    # Import stock tickers
    for row in table.findAll('tr')[2:]: 
        ticker_info = (row.findAll('td')[0].text.replace('\n', ''), row.findAll('td')[1].text, row.findAll('td')[2].text)
        tickers.append(ticker_info)
    
    return tickers

In [6]:
list = get_tickers()
list

[('AOS', 'A. O. Smith', 'Industrials'),
 ('ABT', 'Abbott Laboratories', 'Health Care'),
 ('ABBV', 'AbbVie', 'Health Care'),
 ('ACN', 'Accenture', 'Information Technology'),
 ('ADBE', 'Adobe Inc.', 'Information Technology'),
 ('AMD', 'Advanced Micro Devices', 'Information Technology'),
 ('AES', 'AES Corporation', 'Utilities'),
 ('AFL', 'Aflac', 'Financials'),
 ('A', 'Agilent Technologies', 'Health Care'),
 ('APD', 'Air Products', 'Materials'),
 ('ABNB', 'Airbnb', 'Consumer Discretionary'),
 ('AKAM', 'Akamai Technologies', 'Information Technology'),
 ('ALB', 'Albemarle Corporation', 'Materials'),
 ('ARE', 'Alexandria Real Estate Equities', 'Real Estate'),
 ('ALGN', 'Align Technology', 'Health Care'),
 ('ALLE', 'Allegion', 'Industrials'),
 ('LNT', 'Alliant Energy', 'Utilities'),
 ('ALL', 'Allstate', 'Financials'),
 ('GOOGL', 'Alphabet Inc. (Class A)', 'Communication Services'),
 ('GOOG', 'Alphabet Inc. (Class C)', 'Communication Services'),
 ('MO', 'Altria', 'Consumer Staples'),
 ('AMZN',

In [7]:
# Pick a random stock (without repetition) from a list
def random_picker(list):   
    i = randrange(len(list))
    random_stock = list[i]
    
    return random_stock

In [8]:
# Get n stocks from S&P500, some dataframes are empty
n=10
k=0
period = '1y'
prices = []
stock_names=[]
stock_symbols=[]
while k<n:
    name = random_picker(list)
    ticker = yf.Ticker(name[0])
    stock_prices = ticker.history(period=period)
    if not stock_prices.empty:
        stock_names.append(name)
        stock_symbols.append(name[0])
        prices.append(stock_prices)
        k+=1

In [10]:
stock_names

[('MAS', 'Masco', 'Industrials'),
 ('TRGP', 'Targa Resources', 'Energy'),
 ('CAG', 'Conagra Brands', 'Consumer Staples'),
 ('WRB', 'W. R. Berkley Corporation', 'Financials'),
 ('WBA', 'Walgreens Boots Alliance', 'Consumer Staples'),
 ('HD', 'Home Depot (The)', 'Consumer Discretionary'),
 ('BA', 'Boeing', 'Industrials'),
 ('RF', 'Regions Financial Corporation', 'Financials'),
 ('SYK', 'Stryker Corporation', 'Health Care'),
 ('HAS', 'Hasbro', 'Consumer Discretionary')]

In [11]:
# combines columns labeled 'Value' from dataframes that are placed in a list (so each dataframe needs 'Value' as a column)
# adds an average value column and also daily return % column

def combine(prices,value):
    # Concatenate the value columns
    combined = pd.concat([df[value] for df in prices], axis=1)

    combined.columns = stock_symbols

    combined = combined.dropna(axis=1)

    # Calculate the average value price for each day
    combined['Average_' + value] = combined.mean(axis=1)
    combined['Daily Returns %'] = ((combined['Average_'+value]/combined['Average_'+value].shift(1))-1)*100
    return combined

In [12]:
combined = combine(prices,'Close')
combined.head()

Unnamed: 0_level_0,MAS,TRGP,CAG,WRB,WBA,HD,BA,RF,SYK,HAS,Average_Close,Daily Returns %
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2023-12-28 00:00:00-05:00,66.133057,84.441048,27.264101,45.81496,25.335596,339.055328,260.350006,18.681635,297.074799,49.672874,121.38234,
2023-12-29 00:00:00-05:00,65.945999,84.949562,27.349985,46.010136,24.868887,338.264679,260.660004,18.519186,297.412476,49.277187,121.32581,-0.046572
2024-01-02 00:00:00-05:00,65.54232,84.509499,28.18022,46.940491,25.383219,336.829834,251.759995,18.633858,294.20459,48.157692,120.014172,-1.081088
2024-01-03 00:00:00-05:00,63.277824,85.810104,27.941648,47.207233,24.354555,330.172882,243.910004,17.964949,291.930237,46.430187,117.899962,-1.761633
2024-01-04 00:00:00-05:00,63.238449,84.343262,27.407244,47.604099,23.106825,330.494995,244.940002,18.127399,296.012115,46.671459,118.194585,0.249892
