Tobias Kuhlmann, Karlsruhe Institute of Technology (KIT), tobias.kuhlmann@student.kit.edu

In [2]:
%matplotlib inline

# Pretty Display of Variables
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Double resolution plotting for retina display
%config InlineBackend.figure_format ='retina'

import numpy as np
import pylab as pl
import pandas as pd
import matplotlib.pyplot as plt

import glob
import os

import datetime as dt

## Data
Calculate 
- Implied volatilities of every single stock i $\sigma^{Q}_{i,t}$
- Implied volatilities of Index $\sigma^{Q}_{M,t}$

In [3]:
# Validate relative file path and list files
os.listdir("../Option_Implied_Beta_Tobias/")

['Fundamentals_SP500_Full.xlsx',
 'usdOIScurve.csv',
 '.DS_Store',
 '0_Paper',
 'SP500_data',
 'Calculation_Process2.ipynb',
 '2019_04_26_Cpy_Python_Code-Skewness_v8.ipynb',
 'processed_data',
 'instrumentid_and_symbol.csv',
 'CRAMnoarbEOD_USOPT0007588D1_measuresByMaturity.csv',
 '.ipynb_checkpoints',
 'stocks_risk_neutral_measures']

###### Import instrument id on ticker mapping

In [4]:
id_ticker_map = pd.read_csv("../Option_Implied_Beta_Tobias/instrumentid_and_symbol.csv")
id_ticker_map.shape
id_ticker_map.head(5)


(7590, 3)

Unnamed: 0,instrumentid,symbol,name
0,USOPT0000001D1,1R,NFX (OPIS) Mont Belvieu Non-LST Propane Future
1,USOPT0000002D1,7Q,NFX Mont Belvieu Non- LST Natural Gasoline Fut...
2,USOPT0000003D1,8K,NFX (OPIS) Conway Propane Future
3,USOPT0000004D1,8M,NFX (OPIS) Conway Normal Butane Future
4,USOPT0000005D1,A,Agilent Technologies Inc


##### Implied volatilities of Index $\sigma^{Q}_{M,t}$
- Ein Tag: 14 Tage, 41 Tage -> linear interpolieren auf 30 Tage
- Interpolation aus Simon's vol surface paper

##### Annualization
- Annualise volatilites:
$$\sigma^{Q}_{M,365} = \sigma^{Q}_{M,t} * \frac{365}{t}$$

In [5]:
def annualize_variance(df):
    """
    Annualizes variance by multiplying by 365/'daystomaturity'
    
    @input: dataframe with columns ['instrumentID',
                                     'loctimestamp',
                                     'daystomaturity', 
                                     'bakshiVariance']
    """
    df['bakshi_variance_ann'] = df['bakshiVariance'] * 365/df['daystomaturity']
    df.drop(columns=['bakshiVariance'], inplace=True)
    return df

##### Interpolation
- Target time to maturity are 30 days, $\tau_j = 30$
$$\sigma^{Q}_{M,30} = \sigma^{Q}_{M,\tau_l} + \frac{30 - \tau_l}{\tau_h - \tau_l}(\sigma^{Q}_{M,\tau_h}-\sigma^{Q}_{M,\tau_l})$$
Where $\tau_h$ ($\tau_l$) represents the next longer (shorter) observed time to maturity, relative to the targeted time to maturity $\tau_j = 30$

In [6]:
def interpolate_maturities(sp500_implied_var):
    """
    Interpolates maturities linearly to 30 days
    
    @input: dataframe of one instrument id with columns ['instrumentID',
                                     'loctimestamp',
                                     'daystomaturity', 
                                     'bakshi_variance_ann']
    """
    
    # set index to date
    sp500_implied_var.set_index('loctimestamp', inplace=True)

    # store where 30 days maturity exists
    thirty_dtm = sp500_implied_var[sp500_implied_var['daystomaturity'] == 30].reset_index()
    thirty_dtm_dates = sp500_implied_var[sp500_implied_var['daystomaturity'] == 30].index
    # store every date where 30 daystomaturity does not exist
    to_interpolate = sp500_implied_var.drop(thirty_dtm_dates).reset_index()

    # insert empty 30 daystomaturity row for every date
    to_interpolate_target_rows = pd.DataFrame(columns=['loctimestamp',
                                                        'instrumentID',
                                                        'daystomaturity', 
                                                        'bakshi_variance_ann'])
    to_interpolate_target_rows['loctimestamp'] = to_interpolate['loctimestamp'].unique()
    to_interpolate_target_rows['instrumentID'] = to_interpolate['instrumentID']
    to_interpolate_target_rows['daystomaturity'] = 30

    # append data to interpolate with empty rows where interpolation result will be stored
    sp500_implied_var = to_interpolate.append(to_interpolate_target_rows, sort=False).append(thirty_dtm, sort=False)
    sp500_implied_var.sort_values(by=['loctimestamp', 'daystomaturity'], inplace=True)
    sp500_implied_var['bakshi_variance_ann'] = pd.to_numeric(sp500_implied_var['bakshi_variance_ann'])

    # interpolate: For every date, interpolate linearly by index=daystomaturity values
    result = sp500_implied_var.groupby(by=['loctimestamp']).apply(lambda g: g.drop(columns=['loctimestamp'])
                                                                  .set_index('daystomaturity')
                                                                  .interpolate(method='values')).reset_index()
    # Only return 30 daystomaturity
    return result[result['daystomaturity'] == 30]

##### Interpolation not on volatility surface, but later on beta term structure

Estimate the beta for observed maturities. This way we prevent issues with stocks that only have one maturity and don’t allow reliable interpolation of Q variance to 30 days (due to term structure in variance). Finally, we check the term structure of the recovered Q betas and potentially interpolate the beta to 30 days.

In [13]:
sp500_options = pd.read_csv("../Option_Implied_Beta_Tobias/CRAMnoarbEOD_USOPT0007588D1_measuresByMaturity.csv", 
                            sep=";", 
                            usecols=['instrumentID',
                                     'loctimestamp',
                                     'daystomaturity', 
                                     'bakshiVariance'],
                            parse_dates=['loctimestamp'],
                            infer_datetime_format=True)

# check if observed maturities are the same for every stock on a given date
sp500_options.head()
#sp500_options.groupby(by=['loctimestamp', 'instrumentID']).daystomaturity.nunique()

# Annualize
sp500_implied_var = annualize_variance(sp500_options)
# interpolate
#sp500_implied_var = interpolate_maturities(sp500_implied_var)

sp500_implied_var.shape
# Export to csv
#sp500_implied_var.to_csv('../Option_Implied_Beta_Tobias/processed_data/sp500_implied_vol.csv', index=False)

Unnamed: 0,instrumentID,loctimestamp,daystomaturity,bakshiVariance
0,USOPT0007588D1,2004-01-02,15,0.00136
1,USOPT0007588D1,2004-01-02,50,0.00461
2,USOPT0007588D1,2004-01-02,78,0.007467
3,USOPT0007588D1,2004-01-02,169,0.015998
4,USOPT0007588D1,2004-01-02,260,0.024475


(58244, 4)

##### Implied volatilities of every single stock i $\sigma^{Q}_{i,t}$
Read in risk neutral measures for single stocks and combine in dataframe

##### Interpolation

In [39]:
# the path to your csv file directory
mycsvdir = '../Option_Implied_Beta_Tobias/Single_Stock_Skewness/'

# get all the csv files in that directory
csvfiles_w_path = glob.glob(os.path.join(mycsvdir, '*.csv'))

# loop through the files and read them in with pandas
stock_risk_neutral_measures = pd.DataFrame(columns=['instrumentID', 
                                     'loctimestamp',
                                     'daystomaturity', 
                                     'bakshi_variance_ann']) 
for csvfile in csvfiles_w_path:
    df = pd.read_csv(csvfile, 
                     usecols=['loctimestamp',
                                     'daystomaturity', 
                                     'bakshiVariance',
                                     'interpolmaturity'],
                     parse_dates=['loctimestamp'],
                     infer_datetime_format=True)
    # add column with instrument id
    filename_wo_ext=os.path.basename(csvfile)
    df['instrumentID'] = os.path.splitext(filename_wo_ext)[0].partition("_")[0]
    # drop interpolated maturities, we interpolate ourselves
    df = df[df['interpolmaturity']==0].drop(columns='interpolmaturity')
    # annualise bakshi variances
    single_stock_implied_var = annualize_variance(df)
    # interpolate following Walther, Ulrich(2019)
    single_stock_implied_var = interpolate_maturities(single_stock_implied_var)

    # append
    stock_risk_neutral_measures = stock_risk_neutral_measures.append(single_stock_implied_var[['instrumentID',
                                     'loctimestamp',
                                     'daystomaturity', 
                                     'bakshi_variance_ann']], 
                                     sort=False)

print(f"shape: {stock_risk_neutral_measures.shape}")
print(f"Unique instrument id: {stock_risk_neutral_measures.instrumentID.unique().shape}")
stock_risk_neutral_measures.head(5)

shape: (1468488, 4)
Unique instrument id: (487,)


Unnamed: 0,instrumentID,loctimestamp,daystomaturity,bakshi_variance_ann
1,USOPT0002207D1,2004-01-02,30,0.073859
8,USOPT0002207D1,2004-01-05,30,0.080492
15,USOPT0002207D1,2004-01-06,30,0.110247
22,USOPT0002207D1,2004-01-07,30,0.115185
29,USOPT0002207D1,2004-01-08,30,0.114663


In [40]:
# Export to csv
stock_risk_neutral_measures.to_csv('../Option_Implied_Beta_Tobias/processed_data/single_stocks_implied_vol.csv', index=False)