In [17]:
# -------
# IMPORT LIBRAIRIES
# -------
import numpy as np
import pandas as pd
import plotly.express as px
from python_module import sabr
from python_module import blackscholes
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from scipy.cluster.hierarchy import linkage, fcluster

# -------
# DISPLAY SETTINGS
# -------
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)
pd.options.display.float_format = lambda x: f'{x:_.4f}'

In [18]:
# -------
# GENERATE MARKET DATA & PORTFOLIO
# -------
S = F = 100
alpha = 0.2
beta = 1
rho = -0.4
nu = 0.5
market_data_list = list()
for time_to_maturity in np.linspace(start=1, stop=60, num=20, dtype=int):
    T = time_to_maturity / 250
    for K in np.linspace(start=80, stop=100, num=20):
        IV = sabr.compute_vol(F, K, T, alpha, beta, rho, nu)
        pricing_results = blackscholes.compute_option(S, K, T, 0, IV, 'put', True)
        market_data_list.append({'time_to_maturity': time_to_maturity, 'T': T, 'K': K, 'IV': IV, **pricing_results})
market_data_df = pd.DataFrame(market_data_list)

In [19]:
market_data_df['weights'] = 5 # abs(np.random.normal(size=market_data_df.shape[0]))

In [20]:
market_data_df.tail()

Unnamed: 0,time_to_maturity,T,K,IV,price,delta,gamma,vega,theta,vanna,volga,weights
395,60,0.24,95.7895,0.2049,2.1668,-0.316,0.0354,0.1743,-0.0295,-0.6571,15.4141,5
396,60,0.24,96.8421,0.2036,2.5373,-0.3551,0.0373,0.1824,-0.0307,-0.4968,9.0428,5
397,60,0.24,97.8947,0.2025,2.951,-0.3959,0.0388,0.1887,-0.0316,-0.3138,4.0596,5
398,60,0.24,98.9474,0.2014,3.4092,-0.4378,0.0399,0.1931,-0.0321,-0.1134,0.8701,5
399,60,0.24,100.0,0.2003,3.9127,-0.4804,0.0406,0.1952,-0.0323,0.0976,-0.2346,5


In [21]:
market_data_df['delta'] *= -1

In [22]:
# -------
# FEATURE ENGINEERING
# -------
market_data_df['log_K'] = np.log(market_data_df['K']/S)
market_data_df['log_t'] = np.log(market_data_df['T'])
features = ['log_K', 'log_t']

# -------
# MODEL FIT
# -------
X = market_data_df[features]
X_poly = np.hstack([X, X**2])
Y = market_data_df['price']
model = LinearRegression(fit_intercept=True)
model.fit(X_poly, Y)

# -------
# MODEL PREDICTION
# -------
market_data_df.loc[:, 'price_pred'] = model.predict(X_poly)
X = pd.DataFrame(X_poly).multiply(model.coef_)
Z = linkage(X, method='complete', metric='chebyshev')
num_clusters = 10
clusters = fcluster(Z, num_clusters, criterion='maxclust')
market_data_df['cluster'] = [str(x) for x in clusters]

In [23]:
px.scatter(data_frame=market_data_df, x='K', y='time_to_maturity', color='cluster', size='weights')

In [24]:
market_data_df[['price', 'price_pred']]

Unnamed: 0,price,price_pred
0,0.0000,-0.3404
1,0.0000,-0.4473
2,0.0000,-0.5246
3,0.0000,-0.5737
4,0.0000,-0.5954
...,...,...
395,2.1668,1.7768
396,2.5373,2.0154
397,2.9510,2.2712
398,3.4092,2.5434


In [25]:
from sklearn.metrics import r2_score

In [26]:
r2_score(market_data_df['price'], market_data_df['price_pred'])

0.8484393319902956