# Calculating Close-to-Close Volatility Forecasts

This code was taken directly from P4DSF.  The next step is to turn this into a function.

In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
yf.pdr_override()
from pandas_datareader import data as pdr

In [None]:
underlying = 'EWJ'
df_underlying = pdr.get_data_yahoo(underlying, start = '2010-05-28', end = '2018-12-29').reset_index()
df_underlying.columns = df_underlying.columns.str.lower().str.replace(' ', '_')
df_underlying.rename(columns = {'date':'trade_date'}, inplace = True)
df_underlying.insert(0, 'ticker', underlying)
df_underlying

[*********************100%***********************]  1 of 1 completed


Unnamed: 0,ticker,trade_date,open,high,low,close,adj_close,volume
0,EWJ,2010-05-28,38.520000,38.599998,38.040001,38.040001,30.781044,7155925
1,EWJ,2010-06-01,38.040001,38.520000,37.880001,37.919998,30.683935,6133800
2,EWJ,2010-06-02,37.680000,38.160000,37.560001,38.119999,30.845778,4481100
3,EWJ,2010-06-03,38.320000,38.439999,37.799999,38.040001,30.781044,9611225
4,EWJ,2010-06-04,37.599998,37.759998,37.119999,37.320000,30.198427,8464875
...,...,...,...,...,...,...,...,...
2157,EWJ,2018-12-21,50.049999,55.549999,49.570000,49.660000,46.213531,24613600
2158,EWJ,2018-12-24,49.340000,49.650002,48.990002,49.119999,45.711018,16386600
2159,EWJ,2018-12-26,49.650002,50.200001,49.080002,50.049999,46.576469,28378500
2160,EWJ,2018-12-27,49.970001,50.770000,49.590000,50.720001,47.199966,24602600


In [None]:
df_underlying['dly_ret'] = np.log(df_underlying['close']).diff()
df_underlying.dropna(inplace=True)
df_underlying.reset_index(drop=True, inplace=True)
df_underlying

Unnamed: 0,ticker,trade_date,open,high,low,close,adj_close,volume,dly_ret
0,EWJ,2010-06-01,38.040001,38.520000,37.880001,37.919998,30.683935,6133800,-0.003160
1,EWJ,2010-06-02,37.680000,38.160000,37.560001,38.119999,30.845778,4481100,0.005260
2,EWJ,2010-06-03,38.320000,38.439999,37.799999,38.040001,30.781044,9611225,-0.002101
3,EWJ,2010-06-04,37.599998,37.759998,37.119999,37.320000,30.198427,8464875,-0.019109
4,EWJ,2010-06-07,37.040001,37.240002,36.599998,36.720001,29.712925,4972750,-0.016208
...,...,...,...,...,...,...,...,...,...
2156,EWJ,2018-12-21,50.049999,55.549999,49.570000,49.660000,46.213531,24613600,-0.031321
2157,EWJ,2018-12-24,49.340000,49.650002,48.990002,49.119999,45.711018,16386600,-0.010934
2158,EWJ,2018-12-26,49.650002,50.200001,49.080002,50.049999,46.576469,28378500,0.018756
2159,EWJ,2018-12-27,49.970001,50.770000,49.590000,50.720001,47.199966,24602600,0.013298


In [None]:
weekday = df_underlying['trade_date'].dt.weekday
weekday

0       1
1       2
2       3
3       4
4       0
       ..
2156    4
2157    0
2158    2
2159    3
2160    4
Name: trade_date, Length: 2161, dtype: int32

In [None]:
week_num = []
ix_week = 0
week_num.append(ix_week)
for ix in range(0, len(weekday) - 1):
    prev_day = weekday[ix]
    curr_day = weekday[ix + 1]
    if curr_day < prev_day:
        ix_week = ix_week + 1
    week_num.append(ix_week)
np.array(week_num) 

array([  0,   0,   0, ..., 447, 447, 447])

In [None]:
df_underlying.insert(2, 'week_num', week_num)
df_underlying

Unnamed: 0,ticker,trade_date,week_num,open,high,low,close,adj_close,volume,dly_ret
0,EWJ,2010-06-01,0,38.040001,38.520000,37.880001,37.919998,30.683935,6133800,-0.003160
1,EWJ,2010-06-02,0,37.680000,38.160000,37.560001,38.119999,30.845778,4481100,0.005260
2,EWJ,2010-06-03,0,38.320000,38.439999,37.799999,38.040001,30.781044,9611225,-0.002101
3,EWJ,2010-06-04,0,37.599998,37.759998,37.119999,37.320000,30.198427,8464875,-0.019109
4,EWJ,2010-06-07,1,37.040001,37.240002,36.599998,36.720001,29.712925,4972750,-0.016208
...,...,...,...,...,...,...,...,...,...,...
2156,EWJ,2018-12-21,446,50.049999,55.549999,49.570000,49.660000,46.213531,24613600,-0.031321
2157,EWJ,2018-12-24,447,49.340000,49.650002,48.990002,49.119999,45.711018,16386600,-0.010934
2158,EWJ,2018-12-26,447,49.650002,50.200001,49.080002,50.049999,46.576469,28378500,0.018756
2159,EWJ,2018-12-27,447,49.970001,50.770000,49.590000,50.720001,47.199966,24602600,0.013298


In [None]:
df_start_end = \
    (
    df_underlying.groupby(['week_num'], as_index = False)[['trade_date']].agg([min, max])['trade_date']
    .rename(columns = {'min':'week_start', 'max':'week_end'})
    .reset_index()
    .rename(columns = {'index':'week_num'})
    )
df_start_end

Unnamed: 0,week_num,week_start,week_end
0,0,2010-06-01,2010-06-04
1,1,2010-06-07,2010-06-11
2,2,2010-06-14,2010-06-18
3,3,2010-06-21,2010-06-25
4,4,2010-06-28,2010-07-02
...,...,...,...
443,443,2018-11-26,2018-11-30
444,444,2018-12-03,2018-12-07
445,445,2018-12-10,2018-12-14
446,446,2018-12-17,2018-12-21


In [None]:
df_underlying = df_underlying.merge(df_start_end)
df_underlying

Unnamed: 0,ticker,trade_date,week_num,open,high,low,close,adj_close,volume,dly_ret,week_start,week_end
0,EWJ,2010-06-01,0,38.040001,38.520000,37.880001,37.919998,30.683935,6133800,-0.003160,2010-06-01,2010-06-04
1,EWJ,2010-06-02,0,37.680000,38.160000,37.560001,38.119999,30.845778,4481100,0.005260,2010-06-01,2010-06-04
2,EWJ,2010-06-03,0,38.320000,38.439999,37.799999,38.040001,30.781044,9611225,-0.002101,2010-06-01,2010-06-04
3,EWJ,2010-06-04,0,37.599998,37.759998,37.119999,37.320000,30.198427,8464875,-0.019109,2010-06-01,2010-06-04
4,EWJ,2010-06-07,1,37.040001,37.240002,36.599998,36.720001,29.712925,4972750,-0.016208,2010-06-07,2010-06-11
...,...,...,...,...,...,...,...,...,...,...,...,...
2156,EWJ,2018-12-21,446,50.049999,55.549999,49.570000,49.660000,46.213531,24613600,-0.031321,2018-12-17,2018-12-21
2157,EWJ,2018-12-24,447,49.340000,49.650002,48.990002,49.119999,45.711018,16386600,-0.010934,2018-12-24,2018-12-28
2158,EWJ,2018-12-26,447,49.650002,50.200001,49.080002,50.049999,46.576469,28378500,0.018756,2018-12-24,2018-12-28
2159,EWJ,2018-12-27,447,49.970001,50.770000,49.590000,50.720001,47.199966,24602600,0.013298,2018-12-24,2018-12-28


In [None]:
def close_to_close(r):
    T = r.shape[0]
    r_bar = r.mean()
    vol = np.sqrt((1 / (T - 1)) * ((r - r_bar) ** 2).sum()) * np.sqrt(252)
    return(vol)

In [None]:
df_close_to_close = \
    (
    df_underlying
        .groupby(['ticker', 'week_num', 'week_start', 'week_end'], as_index = False)[['dly_ret']]
        .agg(close_to_close)
        .rename(columns = {'dly_ret':'close_to_close'})
    )
df_close_to_close = df_close_to_close[0:-1]
df_close_to_close

Unnamed: 0,ticker,week_num,week_start,week_end,close_to_close
0,EWJ,0,2010-06-01,2010-06-04,0.162906
1,EWJ,1,2010-06-07,2010-06-11,0.200378
2,EWJ,2,2010-06-14,2010-06-18,0.118301
3,EWJ,3,2010-06-21,2010-06-25,0.140981
4,EWJ,4,2010-06-28,2010-07-02,0.195822
...,...,...,...,...,...
442,EWJ,442,2018-11-19,2018-11-23,0.219897
443,EWJ,443,2018-11-26,2018-11-30,0.127427
444,EWJ,444,2018-12-03,2018-12-07,0.340403
445,EWJ,445,2018-12-10,2018-12-14,0.141891
