In [29]:
import arch 
import math
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import statsmodels.api as sm

In [30]:
logRet = pd.read_csv("nasdaq_w_logret.txt", delim_whitespace = True)
logRet.head()


Unnamed: 0,Date,logreturn
0,19-Nov-84,0.0816
1,26-Nov-84,-1.1893
2,3-Dec-84,-1.2871
3,10-Dec-84,-0.1254
4,17-Dec-84,2.3978


In [31]:
volModel = arch.arch_model(logRet[["logreturn"]], mean = 'Zero')
#set mu to zero
volModel = volModel.fit()

Iteration:      1,   Func. Count:      5,   Neg. LLF: 5840.276335800089
Iteration:      2,   Func. Count:     14,   Neg. LLF: 2626.920481088593
Iteration:      3,   Func. Count:     20,   Neg. LLF: 2322.7989825904597
Iteration:      4,   Func. Count:     25,   Neg. LLF: 2367.4394347999773
Iteration:      5,   Func. Count:     31,   Neg. LLF: 2322.5887538555526
Iteration:      6,   Func. Count:     35,   Neg. LLF: 2322.5885987044226
Iteration:      7,   Func. Count:     39,   Neg. LLF: 2322.588539272379
Iteration:      8,   Func. Count:     42,   Neg. LLF: 2322.5885392724576
Optimization terminated successfully    (Exit mode 0)
            Current function value: 2322.588539272379
            Iterations: 8
            Function evaluations: 42
            Gradient evaluations: 8


In [32]:
volModel.params

omega       0.265938
alpha[1]    0.210302
beta[1]     0.779292
Name: params, dtype: float64

Now I need to fit the same model, but over four time periods
(Nov 19, 1984: June 16, 1987)
(June 9, 1987: Aug 15, 1990)
(Aug 8, 1990: Mar 13, 1998)
(Mar 6, 1998: Sep 15, 2003)

In [38]:
#Find the indices that correspond to those dates:
dates = ["19-Nov-84", "15-Jun-87", "8-Jun-87", "13-Aug-90", 
        "6-Aug-90", "9-Mar-98", "2-Mar-98", "15-Sep-03"]

idx = list()
for i in dates:
    idx.append(logRet[logRet['Date']==i].index.values[0])
    
timeperiods = 4

In [39]:
#Now lets fit each of these timeperiods
subVolModel = list()
for i in range(0, timeperiods):
    startIdx = idx[2*i]
    endIdx = idx[2*i + 1]
    #Last date needs to include the end date
    if i == timeperiods-1:
        subVolModel.append(arch.arch_model(logRet.iloc[startIdx:,1], mean = 'Zero'))
        subVolModel[i] = subVolModel[i].fit()
    else:
        #These should not include the end-date
        subVolModel.append(arch.arch_model(logRet.iloc[startIdx:endIdx, 1], mean = 'Zero'))
        subVolModel[i] = subVolModel[i].fit()

Iteration:      1,   Func. Count:      5,   Neg. LLF: 442.4858754974817
Iteration:      2,   Func. Count:     11,   Neg. LLF: 274.24238534761406
Iteration:      3,   Func. Count:     15,   Neg. LLF: 274.23990964036363
Iteration:      4,   Func. Count:     19,   Neg. LLF: 274.2326066493505
Iteration:      5,   Func. Count:     23,   Neg. LLF: 274.2122967072929
Iteration:      6,   Func. Count:     27,   Neg. LLF: 274.2149576767904
Iteration:      7,   Func. Count:     32,   Neg. LLF: 274.21006181646453
Iteration:      8,   Func. Count:     37,   Neg. LLF: 274.20740019046775
Iteration:      9,   Func. Count:     41,   Neg. LLF: 274.2073949379761
Iteration:     10,   Func. Count:     44,   Neg. LLF: 274.2073949379916
Optimization terminated successfully    (Exit mode 0)
            Current function value: 274.2073949379761
            Iterations: 10
            Function evaluations: 44
            Gradient evaluations: 10
Iteration:      1,   Func. Count:      5,   Neg. LLF: 219328.916557

In [40]:
data = pd.DataFrame(columns = ('omega', 'alpha', 'beta', 'beta+alpha', 'half-life'))
for iterator, i in enumerate(subVolModel):
    omega = i.params[0]
    alpha = i.params[1]
    beta = i.params[2]
    AB = alpha + beta
    halfLife = -1*math.log(2)/math.log(AB)
    data.loc[iterator] = [omega, alpha, beta, AB, halfLife]
    
data.loc[len(subVolModel)] = [volModel.params[0], volModel.params[1], volModel.params[2],
                              volModel.params[1]+volModel.params[2],
                              -1*math.log(2)/(math.log(volModel.params[1]+volModel.params[2]))]
data = data.set_axis(['Period1','Period2','Period3','Period4','Full-Period'], axis = 'index')
data

Unnamed: 0,omega,alpha,beta,beta+alpha,half-life
Period1,0.939574,4.673352e-16,0.735141,0.735141,2.252722
Period2,0.63976,0.5405512,0.415746,0.956297,15.511251
Period3,0.16063,0.02589907,0.933133,0.959032,16.570401
Period4,0.531863,0.1472041,0.845084,0.992288,89.530398
Full-Period,0.265938,0.2103023,0.779292,0.989594,66.264167


The parameters appear to be dynamic, i.e. they are not constant over time, and there seems to be very different volatility persistence estimates as it seems to change as new "volatility regimes" take over the market and displace old ones. For example, during period 1, both estimates of volatility persistence--$\beta + \alpha$ and half-life--were relatively low aat 0.735 and 2.25 respectively. But, in period 4, which includes the dot-com bust, these estimates spiked to 0.992228 and 89.53. Note that when $\beta + \alpha = 1$, our time series becomes non-stationary and the unconditional variance becomes infinite. 
The last thing to note is that when fitting the GARCH(1,1) model in the full period, the parameters seem to be far more heavily weighted to the period 4 weights than the period1 weights, even when taking into account the time-scale differences, suggesting these estimatares are impacted heavily by outliers. 