In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from math import *
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import warnings; warnings.simplefilter('ignore')
init_notebook_mode(connected=True)

In [2]:
def read_data(date,index="SPY",indicator="impliedV"):
    path = os.path.join("data",indicator,index,date+"_"+index+"~market__"+indicator+".csv")
    df = pd.read_csv(path)
    return(df)

# volatility surface :

In [3]:
def plot_vol_surface(df,x_col="expiry",y_col="strike",target_z_col="midImpliedV"):
    df = df.reset_index()
    df["expiry"] = df["expiry"].astype("str")
    df["expiry"] = pd.to_datetime(df["expiry"],format="%Y-%m-%d")
    df_pivot = df.pivot(index=x_col,columns=y_col,values=target_z_col)
    fig = go.Figure(data=[go.Surface(x=df_pivot.columns.values,y=df_pivot.index ,z=df_pivot.values)])

    fig.update_layout(title='volatility surface', autosize=True,
                      width=800, height=600)

    fig.show()

In [5]:
df = read_data(index="SPY",date="20100108")
plot_vol_surface(df)

# calibration of implied volatility model ( quadratic polynomial regression)

In [38]:
def get_iv_atm(expiry,df):
    df1 = df[df.expiry == expiry]
    df_bis = df1.drop_duplicates("expiry")
    df_bis["strike"] = df_bis["fwd"]
    df_bis["midImpliedV"] = np.nan
    df1 = df1.append(df_bis,ignore_index=True)
    df1.sort_values("strike",inplace=True)
    df1["midImpliedV"] = df1["midImpliedV"].interpolate()
    return(df1[df1["strike"] == df1["fwd"]]["midImpliedV"].values[0])

In [39]:
def add_iv_atm_col(df):
    def change_iv_atm(r,expiry,v):
        if r["expiry"]==expiry:
            r["IV_ATM"]=v
        return r
    # compute of IV_ATM of each expiry and add it to the column df["IV_ATM"]
    df["IV_ATM"] = np.nan
    for expiry in df.expiry.unique():
        v = get_iv_atm(expiry,df)
        df = df.apply(lambda r: change_iv_atm(r,expiry,v), axis=1)
    return df


In [40]:
def Calibration_fixed_date(date,index="SPY",indicator="impliedV"):
    def change_level_skew_conv(r,expiry,lev,sk,conv,i):
        if r["expiry"]==expiry:
            r["Level_"+str(i)] = lev
            r["Skew_"+str(i)] = sk
            r["Convex_"+str(i)] = conv
        return r
    df = read_data(date=date,index=index,indicator=indicator)
    df = add_iv_atm_col(df)
    df["Moneyness_2"] = np.log(df["strike"]/df["fwd"])/(np.sqrt(df["tenor"])*df["IV_ATM"])
    for i in [2]:
        df["Level_"+str(i)] = np.nan
        df["Skew_"+str(i)] = np.nan
        df["Convex_"+str(i)] = np.nan
        for expiry in df.expiry.unique():
            df1 = df[df["expiry"] == expiry]
            params = np.polyfit(df1["Moneyness_"+str(i)], df1["midImpliedV"], 2)
            conv,skew,lev = params
            df = df.apply(lambda r: change_level_skew_conv(r,expiry,lev,skew,conv,i), axis=1)
    df = df.drop_duplicates("expiry")
    df = df.reset_index()
    df["date"] = date
    df = df[["date","expiry","dow","tenor","vTenor","spot","fwd","IV_ATM","Level_2","Skew_2","Convex_2"]]
    return df

In [41]:
def Calibration_all_dates(index="SPY",indicator="impliedV",nb_days=None):
    df_all = pd.DataFrame()
    l = os.listdir(os.path.join("data",indicator,index))
    l.sort()
    if nb_days is not None:
        l = l[:nb_days]
    for file in l:
        try:
            date = file[:8]
            df = Calibration_fixed_date(date,index=index,indicator=indicator)
            df_all = df_all.append(df, ignore_index=True)
        except:
            pass
    df_all.sort_values(["date","expiry"],inplace=True)
    return(df_all)

In [42]:
df_all = Calibration_all_dates(index="IWM",indicator="impliedV",nb_days=500)

In [43]:
df_all.head()

Unnamed: 0,date,expiry,dow,tenor,vTenor,spot,fwd,IV_ATM,Level_2,Skew_2,Convex_2
0,20100104,20100116,F,0.030137,0.030981,63.846333,63.875006,0.228845,0.229325,-0.041517,0.004779
1,20100104,20100220,F,0.126027,0.12626,63.846333,63.82387,0.24385,0.247957,-0.04828,0.001789
2,20100104,20100522,F,0.375342,0.376443,63.846333,63.584587,0.271812,0.274287,-0.055089,0.001617
3,20100104,20100821,F,0.624658,0.626041,63.846333,63.350654,0.281235,0.282385,-0.061535,0.0008
4,20100104,20101218,F,0.950685,0.952798,63.846333,64.096155,0.363917,0.309086,-0.080889,-0.004581


In [44]:
df_all.to_csv("SPY_2010_2011_ImpliedVStatistics.csv",index=False)

In [91]:
def plot_params_vs_tenor(date):
    
    df_date = df_all[df_all["date"]==date]
    fig = make_subplots(rows=1, cols=3, subplot_titles=["Level vs tenor","Skew vs tenor","Convex vs tenor"])
    
    for i,param in enumerate(["Level_2","Skew_2","Convex_2"]): 
        
        fig.add_trace(go.Scatter(x=df_date["tenor"], y=df_date[param], mode='markers+lines', name=param)
                                 ,row=1,col=i+1)
    
    fig.update_layout(height = 350,
                title_text="date : "+date)
    fig.show()
    


In [92]:
plot_params_vs_tenor("20100104")

In [93]:
plot_params_vs_tenor("20100928")

In [94]:
plot_params_vs_tenor("20100628")

In [95]:
plot_params_vs_tenor("20100318")

In [96]:
plot_params_vs_tenor("20100518")

In [97]:
plot_params_vs_tenor("20100325")

In [130]:
max_maturity_days = 350
df_all_sort = df_all.sort_values("tenor")
df_to_plot = df_all_sort.groupby("tenor").mean()
fig = make_subplots(rows=3,cols=1,subplot_titles=["Level vs tenor","Skew vs tenor","Convex vs tenor"])

for i,param in enumerate(["Level_2","Skew_2","Convex_2"]):
    fig.add_trace(go.Scatter(
        x=df_to_plot.index.values, y=df_to_plot[param].values,mode = "markers+lines",
                            fillcolor = "blue",
                            opacity = 0.4),i+1,1)
    fig.update_yaxes(title_text=param, row=1+i, col=1)
    fig.update_xaxes(title_text="tenor", row=1+i, col=1)
    
fig.update_layout( height = 1100, title = "Mean over all the dates of the parameters for each fixed tenor",
                  showlegend = False)
iplot(fig)