# Calculating Close-to-Close Volatility Forecasts via a Function

Let's load the packages that we will need.

In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
yf.pdr_override()
from pandas_datareader import data as pdr

In this code, we take the code from *P4DSF* and turn it into a function that takes in an `underlying` as an input, and then returns all the volatility forecasts for the backtest period in a `DataFrame`.

In [None]:
def get_vol_forecasts(underlying):
    
    # reading in data from yahoo finance
    df_underlying = pdr.get_data_yahoo(underlying, start = '2010-05-28', end = '2018-12-29').reset_index()
    df_underlying.columns = df_underlying.columns.str.lower().str.replace(' ', '_')
    df_underlying.rename(columns = {'date':'trade_date'}, inplace = True)
    df_underlying.insert(0, 'ticker', underlying)
    # df_underlying

    # calculating returns
    df_underlying['dly_ret'] = np.log(df_underlying['close']).diff()
    df_underlying.dropna(inplace=True)
    df_underlying.reset_index(drop=True, inplace=True)
    df_underlying

    # calculating day of week
    weekday = df_underlying['trade_date'].dt.weekday
    #weekday

    # calculating week number
    week_num = []
    ix_week = 0
    week_num.append(ix_week)
    for ix in range(0, len(weekday) - 1):
        prev_day = weekday[ix]
        curr_day = weekday[ix + 1]
        if curr_day < prev_day:
            ix_week = ix_week + 1
        week_num.append(ix_week)
    #np.array(week_num)

    # inserting week number into df_underlying
    df_underlying.insert(2, 'week_num', week_num)
    #df_underlying

    # calculating start and end of week
    df_start_end = \
    (
    df_underlying.groupby(['week_num'], as_index = False)[['trade_date']].agg([min, max])['trade_date']
    .rename(columns = {'min':'week_start', 'max':'week_end'})
    .reset_index()
    .rename(columns = {'index':'week_num'})
    )
    #df_start_end

    # adding start and end of week to df_underlying via a merge
    df_underlying = df_underlying.merge(df_start_end)
    # df_underlying

    # defining close-to-close estimator
    def close_to_close(r):
        T = r.shape[0]
        r_bar = r.mean()
        vol = np.sqrt((1 / (T - 1)) * ((r - r_bar) ** 2).sum()) * np.sqrt(252)
        return(vol)

    # calculating all forecasts with a group by
    df_close_to_close = \
    (
    df_underlying
        .groupby(['ticker', 'week_num', 'week_start', 'week_end'], as_index = False)[['dly_ret']]
        .agg(close_to_close)
        .rename(columns = {'dly_ret':'close_to_close'})
    )
    df_close_to_close = df_close_to_close[0:-1]
    
    return(df_close_to_close)

Let's check that our function for SPY.

In [None]:
get_vol_forecasts('SPY')

[*********************100%***********************]  1 of 1 completed


Unnamed: 0,ticker,week_num,week_start,week_end,close_to_close
0,SPY,0,2010-06-01,2010-06-04,0.420077
1,SPY,1,2010-06-07,2010-06-11,0.252653
2,SPY,2,2010-06-14,2010-06-18,0.169387
3,SPY,3,2010-06-21,2010-06-25,0.147360
4,SPY,4,2010-06-28,2010-07-02,0.186375
...,...,...,...,...,...
442,SPY,442,2018-11-19,2018-11-23,0.162775
443,SPY,443,2018-11-26,2018-11-30,0.159534
444,SPY,444,2018-12-03,2018-12-07,0.331849
445,SPY,445,2018-12-10,2018-12-14,0.148140


## Calculating Volatility Forecasts for Universe

Now we'll go through our universe and calculate the `close_to_close` forecasts for all the underlyings.  Let's begin by reading in our universe from the precalculated CSV.

In [None]:
df_universe = pd.read_csv('../data/universe_per_expiration.csv')
df_universe                          

Unnamed: 0,underlying,monthly,expiration,execution_date,num_otm
0,DIA,False,2010-06-11,2010-06-04,15
1,DIA,True,2010-06-19,2010-06-11,15
2,DIA,False,2010-06-25,2010-06-18,11
3,DIA,True,2010-07-17,2010-07-09,13
4,DIA,True,2010-08-21,2010-08-13,14
...,...,...,...,...,...
13169,XRT,False,2018-11-30,2018-11-23,14
13170,XRT,False,2018-12-07,2018-11-30,15
13171,XRT,False,2018-12-14,2018-12-07,13
13172,XRT,True,2018-12-21,2018-12-14,11


Next, let's isolate all the unique underlyings in our universe.

In [None]:
underlyings = (list(df_universe['underlying'].unique()))
underlyings

['DIA',
 'EEM',
 'EFA',
 'EMB',
 'EWH',
 'EWJ',
 'EWU',
 'EWW',
 'EWY',
 'EWZ',
 'FXE',
 'FXI',
 'FXY',
 'GDX',
 'GLD',
 'HYG',
 'IWM',
 'IYR',
 'QQQ',
 'SLV',
 'SMH',
 'SPY',
 'TLT',
 'UNG',
 'USO',
 'XBI',
 'XHB',
 'XLB',
 'XLC',
 'XLE',
 'XLF',
 'XLI',
 'XLK',
 'XLP',
 'XLU',
 'XLV',
 'XLY',
 'XME',
 'XOP',
 'XRT']

Now we can loop through and calculate the volatility forecasts for all the underlyings in the universe.  For each underlying, the volatility forecasts are put in a `list` which will be concatenated in the next step.

In [None]:
forecasts = []
for ix_underlying in underlyings:
    df = get_vol_forecasts(ix_underlying)
    forecasts.append(df)
    print(ix_underlying)

[*********************100%***********************]  1 of 1 completed
DIA
[*********************100%***********************]  1 of 1 completed
EEM
[*********************100%***********************]  1 of 1 completed
EFA
[*********************100%***********************]  1 of 1 completed
EMB
[*********************100%***********************]  1 of 1 completed
EWH
[*********************100%***********************]  1 of 1 completed
EWJ
[*********************100%***********************]  1 of 1 completed
EWU
[*********************100%***********************]  1 of 1 completed
EWW
[*********************100%***********************]  1 of 1 completed
EWY
[*********************100%***********************]  1 of 1 completed
EWZ
[*********************100%***********************]  1 of 1 completed
FXE
[*********************100%***********************]  1 of 1 completed
FXI
[*********************100%***********************]  1 of 1 completed
FXY
[*********************100%***********************] 

Now let's concatenate our volatility estimates into a single `DataFrame`.

In [None]:
df_forecasts = pd.concat(forecasts)

In [None]:
df_forecasts

Unnamed: 0,ticker,week_num,week_start,week_end,close_to_close
0,DIA,0,2010-06-01,2010-06-04,0.363399
1,DIA,1,2010-06-07,2010-06-11,0.235762
2,DIA,2,2010-06-14,2010-06-18,0.139662
3,DIA,3,2010-06-21,2010-06-25,0.130178
4,DIA,4,2010-06-28,2010-07-02,0.160041
...,...,...,...,...,...
442,XRT,442,2018-11-19,2018-11-23,0.362015
443,XRT,443,2018-11-26,2018-11-30,0.172415
444,XRT,444,2018-12-03,2018-12-07,0.401701
445,XRT,445,2018-12-10,2018-12-14,0.225990


## Examining the Volatility Forecasts

The following bit of code checks for the first week that each underlying has a volatility forecasts.  Notice that all of them have data at the beginning of the backtest period except for XLC.  We should remove this form the universe in a later step.

In [None]:
df_forecasts.groupby(['ticker'])['week_end'].min()

ticker
DIA   2010-06-04
EEM   2010-06-04
EFA   2010-06-04
EMB   2010-06-04
EWH   2010-06-04
EWJ   2010-06-04
EWU   2010-06-04
EWW   2010-06-04
EWY   2010-06-04
EWZ   2010-06-04
FXE   2010-06-04
FXI   2010-06-04
FXY   2010-06-04
GDX   2010-06-04
GLD   2010-06-04
HYG   2010-06-04
IWM   2010-06-04
IYR   2010-06-04
QQQ   2010-06-04
SLV   2010-06-04
SMH   2010-06-04
SPY   2010-06-04
TLT   2010-06-04
UNG   2010-06-04
USO   2010-06-04
XBI   2010-06-04
XHB   2010-06-04
XLB   2010-06-04
XLC   2018-06-22
XLE   2010-06-04
XLF   2010-06-04
XLI   2010-06-04
XLK   2010-06-04
XLP   2010-06-04
XLU   2010-06-04
XLV   2010-06-04
XLY   2010-06-04
XME   2010-06-04
XOP   2010-06-04
XRT   2010-06-04
Name: week_end, dtype: datetime64[ns]

In [None]:
df_universe.query('underlying == "XLC"')

Unnamed: 0,underlying,monthly,expiration,execution_date,num_otm
9427,XLC,True,2018-07-20,2018-07-13,3
9428,XLC,True,2018-08-17,2018-08-10,3
9429,XLC,True,2018-09-21,2018-09-14,3
9430,XLC,True,2018-10-19,2018-10-12,6
9431,XLC,True,2018-11-16,2018-11-09,6
9432,XLC,True,2018-12-21,2018-12-14,6


## Export Forecasts to a CSV

In [None]:
df_forecasts.to_csv('../data/close_to_close_forecasts.csv', index=False)