In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns   

In [2]:
from vol_surface.vol_surface_builder import dates, expire_dates


In [23]:
smile_features = pd.read_csv('data/smile_features.csv', index_col = 0)

In [24]:
smile_features.head()

Unnamed: 0,quote_date,expire_date,atm_iv,skew,curvature
0,2023-08-01,2023-08-01,0.368407,-5.294118,79.521206
1,2023-08-01,2023-08-02,0.075963,-5.731881,11.236018
2,2023-08-01,2023-08-03,0.097738,-4.005577,9.03263
3,2023-08-01,2023-08-04,0.103226,-3.435891,4.026443
4,2023-08-01,2023-08-07,0.097024,-1.872508,14.080939


In [25]:
smile_features.set_index(['quote_date', 'expire_date'], inplace = True)

In [26]:
smile_features.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,atm_iv,skew,curvature
quote_date,expire_date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-08-01,2023-08-01,0.368407,-5.294118,79.521206
2023-08-01,2023-08-02,0.075963,-5.731881,11.236018
2023-08-01,2023-08-03,0.097738,-4.005577,9.03263
2023-08-01,2023-08-04,0.103226,-3.435891,4.026443
2023-08-01,2023-08-07,0.097024,-1.872508,14.080939


In [27]:
term_structure_features = []

In [29]:
smile_features['quote_date'] = pd.to_datetime(smile_features['quote_date'])
smile_features['expire_date'] = pd.to_datetime(smile_features['expire_date'])


In [30]:
smile_features.reset_index(inplace = True)
grouped = smile_features.groupby('quote_date')

for quote_date, group in grouped:
    group['dte'] = (group['expire_date'] - quote_date).dt.days
    group = group[group['dte']>0]
    
    if len(group) > 5:
        x = group['dte'].values
        x = x/30 #so that curvature is not very low
        y =group['atm_iv'].values
        
        coeffs = np.polyfit(x,y,2)
        poly= np.poly1d(coeffs)
        
        iv_slope = poly.deriv()(30/30)
        iv_curvature = poly.deriv(2)(30/30)
        short_term_iv = poly(7/30)
        long_term_iv = poly(90/30)
        term_spread = long_term_iv - short_term_iv
        
        term_structure_features.append({
            'quote_date': quote_date,
            'iv_slope': iv_slope,
            'iv_curvature': iv_curvature,
            'short_term_iv': short_term_iv,
            'long_term_iv': long_term_iv,
            'term_spread': term_spread
        })

In [31]:
term_structure_features = pd.DataFrame(term_structure_features)

In [32]:
term_structure_features.head()

Unnamed: 0,quote_date,iv_slope,iv_curvature,short_term_iv,long_term_iv,term_spread
0,2023-01-03,0.002753,-0.000129,0.206402,0.213799,0.007397
1,2023-01-04,0.002241,-9.5e-05,0.204112,0.21015,0.006038
2,2023-01-05,-4.8e-05,2.4e-05,0.221476,0.221383,-9.3e-05
3,2023-01-06,0.004168,-0.000198,0.185371,0.196562,0.011192
4,2023-01-09,0.001635,-6.5e-05,0.20599,0.210401,0.004412


In [34]:
term_structure_features.set_index('quote_date', inplace=True)


In [35]:
len(term_structure_features)

250

In [36]:
term_structure = term_structure_features

In [37]:
term_structure.to_csv('data/term_structure.csv')