In [112]:
# -------
# IMPORT LIBRAIRIES
# -------
import numpy as np
import pandas as pd
import plotly.express as px
from python_module import sabr
from python_module import blackscholes
from sklearn.linear_model import Lasso
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from scipy.cluster.hierarchy import linkage, fcluster

# -------
# DISPLAY SETTINGS
# -------
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)
pd.options.display.float_format = lambda x: f'{x:_.4f}'

In [113]:
def compute_lasso_regression(X, y, nb_features, max_iteration=100):
    alpha = 1e-07
    for _ in range(max_iteration):
        lasso = Lasso(alpha=alpha, max_iter=10000000)
        lasso.fit(X, Y)
        if np.sum(lasso.coef_ != 0) == nb_features:
            return lasso.coef_
        else:
            alpha *= 1.1
    print('did not converge')

In [114]:
# -------
# GENERATE MARKET DATA AND GREEKS
# -------
S = F = 100
alpha = 0.2
beta = 1
rho = -0.4
nu = 0.5
market_data_list = list()
for time_to_maturity in np.linspace(start=1, stop=60, num=60, dtype=int):
    T = time_to_maturity / 250
    for K in np.linspace(start=80, stop=120, num=21, dtype=int):
        IV = sabr.compute_vol(F, K, T, alpha, beta, rho, nu)
        pricing_results = blackscholes.compute_option(S, K, T, 0, IV, 'put', True)
        pv_slide5 = blackscholes.compute_option(S*0.95, K, T, 0, IV, 'put', False)
        slide5_pnl = pv_slide5 - pricing_results['price']
        market_data_list.append({'symbol': f"{time_to_maturity}_{K}", 'time_to_maturity': time_to_maturity, 'T': T, 'K': K, 'IV': IV, 'Slide5PnL': slide5_pnl, **pricing_results})
market_data_df = pd.DataFrame(market_data_list)
market_data_df = market_data_df.set_index('symbol')

In [115]:
# -------
# COMPUTE COST OF GAMMA / VANNA / VOLGA
# -------
maturities = market_data_df['T'].unique()
for maturity in maturities:
    maturity_index = market_data_df[market_data_df['T']==maturity].index
    temp_df = market_data_df.loc[maturity_index]
    X = temp_df[['gamma', 'vanna', 'volga']].to_numpy()
    Y = temp_df['theta']
    model = LinearRegression(fit_intercept=False)
    model.fit(X, Y)
    market_data_df.loc[maturity_index, 'theta_pred'] = model.predict(X)
    market_data_df.loc[maturity_index, 'cost_of_gamma'] = temp_df[['gamma', 'vanna', 'volga']].multiply(model.coef_)['gamma'].to_numpy() * -1
    market_data_df.loc[maturity_index, 'cost_of_vanna'] = temp_df[['gamma', 'vanna', 'volga']].multiply(model.coef_)['vanna'].to_numpy() * -1
    market_data_df.loc[maturity_index, 'cost_of_volga'] = temp_df[['gamma', 'vanna', 'volga']].multiply(model.coef_)['volga'].to_numpy() * -1

In [116]:
# -------
# ADD PORTFOLIO WEIGHTS
# -------
np.random.seed(42)
market_data_df['weights'] = abs(np.random.normal(size=market_data_df.shape[0]))
market_data_df.tail()

Unnamed: 0_level_0,time_to_maturity,T,K,IV,Slide5PnL,price,delta,gamma,vega,theta,vanna,volga,theta_pred,cost_of_gamma,cost_of_vanna,cost_of_volga,weights
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
60_112,60,0.24,112,0.1913,4.6084,12.5451,-0.8775,0.0217,0.0994,-0.0157,1.3331,75.9319,-0.0157,0.0173,-0.0022,0.0006,1.2096
60_114,60,0.24,114,0.1903,4.7322,14.3605,-0.9129,0.017,0.0777,-0.0122,1.2092,80.4864,-0.0122,0.0136,-0.002,0.0006,1.6726
60_116,60,0.24,116,0.1895,4.8215,16.2332,-0.9397,0.0129,0.0586,-0.0092,1.0383,78.9387,-0.0092,0.0103,-0.0017,0.0006,0.419
60_118,60,0.24,118,0.1889,4.884,18.1476,-0.9593,0.0095,0.0428,-0.0067,0.8495,72.5011,-0.0067,0.0076,-0.0014,0.0005,0.705
60_120,60,0.24,120,0.1884,4.9263,20.0917,-0.9731,0.0067,0.0304,-0.0047,0.6659,62.9256,-0.0048,0.0054,-0.0011,0.0005,0.0558


In [117]:
# -------
# PRJECT OPTION PRICE TO STRIKE AND TIME AXES
# -------
ref_index = market_data_df['price'].idxmax()
ref_price = market_data_df.loc[ref_index]['price']
ref_iv_parameter = market_data_df.loc[ref_index]['IV']
ref_time_parameter = market_data_df.loc[ref_index]['T']
ref_strike_parameter = market_data_df.loc[ref_index]['K']

for index in market_data_df.index:
    variable_price = market_data_df.loc[index]['price']
    variable_time_parameter = market_data_df.loc[index]['T']
    variable_strike_parameter = market_data_df.loc[index]['K']
    variable_iv_parameter = market_data_df.loc[index]['IV']

    time_projection = blackscholes.compute_option(S, ref_strike_parameter, variable_time_parameter, 0, ref_iv_parameter, 'put', False) - ref_price
    strike_projection = blackscholes.compute_option(S, variable_strike_parameter, ref_time_parameter, 0, ref_iv_parameter, 'put', False) - ref_price
    volatility_projection = blackscholes.compute_option(S, ref_strike_parameter, ref_time_parameter, 0, variable_iv_parameter, 'put', False) - ref_price

    market_data_df.loc[index, 'time_projection'] = time_projection
    market_data_df.loc[index, 'strike_projection'] = strike_projection
    market_data_df.loc[index, 'volatility_projection'] = volatility_projection

In [121]:
# -------
# CREATE BUCKET BASED ON STRIKE AND TIME PROJECTION
# -------
features = ['time_projection', 'strike_projection', 'volatility_projection']
X = market_data_df[features]
Z = linkage(X, method='complete', metric='chebyshev')
num_clusters = 10
clusters = fcluster(Z, num_clusters, criterion='maxclust')
market_data_df['cluster'] = [str(x) for x in clusters]

In [122]:
# -------
# PLOT
# -------
px.scatter(data_frame=market_data_df, x='K', y='time_to_maturity', color='cluster', size='weights')

In [123]:
fig = px.scatter_3d(market_data_df, x='K', y='T', z='IV', color='cluster',  width=1000, height=1000)
fig.show()