In [176]:
# -------
# IMPORT LIBRAIRIES
# -------
import numpy as np
import pandas as pd
import plotly.express as px
from python_module import sabr
from python_module import blackscholes
from sklearn.linear_model import LinearRegression
from scipy.cluster.hierarchy import linkage, fcluster

# -------
# DISPLAY SETTINGS
# -------
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)
pd.options.display.float_format = lambda x: f'{x:_.4f}'

In [177]:
# -------
# GENERATE MARKET DATA & PORTFOLIO
# -------
S = F = 100
alpha = 0.2
beta = 1
rho = -0.4
nu = 0.5
market_data_list = list()
for time_to_maturity in np.linspace(start=1, stop=250, num=10, dtype=int):
    T = time_to_maturity / 250
    for K in np.linspace(start=80, stop=100, num=10):
        IV = sabr.compute_vol(F, K, T, alpha, beta, rho, nu)
        pricing_results = blackscholes.compute_option(S, K, T, 0, IV, 'put', True)
        market_data_list.append({'time_to_maturity': time_to_maturity, 'T': T, 'K': K, 'IV': IV, **pricing_results})
market_data_df = pd.DataFrame(market_data_list)
market_data_df['weights'] = abs(np.random.normal(size=market_data_df.shape[0]))

In [178]:
market_data_df.tail()

Unnamed: 0,time_to_maturity,T,K,IV,price,delta,gamma,vega,theta,vanna,volga,weights
95,250,1.0,91.1111,0.2117,4.3751,-0.2927,0.0162,0.3438,-0.0144,-0.5419,29.5622,0.2165
96,250,1.0,93.3333,0.2088,5.1372,-0.3318,0.0174,0.363,-0.015,-0.3929,17.0855,0.7858
97,250,1.0,95.5556,0.206,5.9948,-0.3731,0.0184,0.3786,-0.0155,-0.2161,6.9943,0.8037
98,250,1.0,97.7778,0.2035,6.9522,-0.416,0.0192,0.3901,-0.0157,-0.0166,0.3529,0.3701
99,250,1.0,100.0,0.2012,8.0119,-0.4599,0.0197,0.3969,-0.0158,0.1985,-1.9962,0.0056


In [192]:
# -------
# FEATURE ENGINEERING
# -------
market_data_df['sqrt_t'] = np.log(market_data_df['T'])

# -------
# MODEL FIT
# -------
X = market_data_df[['gamma', 'vanna', 'volga', 'sqrt_t']].to_numpy()
Y = market_data_df['theta']
model = LinearRegression(fit_intercept=False)
model.fit(X, Y)

# -------
# MODEL PREDICTION
# -------

theta_breakdown = market_data_df[['gamma', 'vanna', 'volga', 'sqrt_t']].multiply(model.coef_)
theta_breakdown.columns = ['cost_of_gamma', 'cost_of_vanna', 'cost_of_volga', 'cost_of_roll_down']
market_data_df = pd.concat([market_data_df, theta_breakdown], axis=1)
market_data_df.loc[:, 'theta_pred'] = model.predict(X)

In [193]:
X = market_data_df[theta_breakdown.columns ].to_numpy()
Z = linkage(X, method='complete', metric='correlation')
num_clusters = 10
clusters = fcluster(Z, num_clusters, criterion='maxclust')
market_data_df['cluster'] = [str(x) for x in clusters]

In [194]:
px.scatter(data_frame=market_data_df, x='K', y='time_to_maturity', color='cluster', size='weights')

In [191]:
market_data_df[['theta', 'theta_pred']]

Unnamed: 0,theta,theta_pred
0,-0.0,-0.0
1,-0.0,-0.0
2,-0.0,-0.0
3,-0.0,-0.0
4,-0.0,-0.0
5,-0.0,-0.0
6,-0.0,-0.0
7,-0.0005,-0.0005
8,-0.0536,-0.0537
9,-0.2503,-0.2504
