## Predicting the greeks using the ML algorithms and sk learn

In [17]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import QuantileTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np
import pandas as pd

## For comparision first preduct the price usinf the closed for solution

In [18]:
# Discretize option price to derive Greeks (finite differences)
def black_scholes_price(S, K, T, r, sigma, option_type="call"):
    d1 = (np.log(S / K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))
    d2 = d1 - sigma * np.sqrt(T)
    if option_type == "call":
        price = S * norm.cdf(d1) - K * np.exp(-r * T) * norm.cdf(d2)
    else:
        price = K * np.exp(-r * T) * norm.cdf(-d2) - S * norm.cdf(-d1)
    return price

## Generate the data for the mu, sigma generated in the predicting_mu_sigma.ipynb

In [19]:
# Generate option price path and compute discrete Greeks via finite differences
def generate_data_with_fd(S0, mu, sigma, T, dt, K, r):
    t, S = simulate_gbm(S0, mu, sigma, T, dt)
    option_prices = [black_scholes_price(s, K, T - ti, r, sigma) for ti, s in zip(t, S)]
    delta_fd = np.gradient(option_prices, S)
    gamma_fd = np.gradient(delta_fd, S)
    rho_fd = [K * (T - ti) * np.exp(-r * (T - ti)) * norm.cdf((np.log(s / K) + (r + 0.5 * sigma ** 2) * (T - ti)) / (sigma * np.sqrt(T - ti)))
              if T - ti > 0 else 0 for ti, s in zip(t, S)]
    vega_fd = [s * norm.pdf((np.log(s / K) + (r + 0.5 * sigma ** 2) * (T - ti)) / (sigma * np.sqrt(T - ti))) * np.sqrt(T - ti)
               if T - ti > 0 else 0 for ti, s in zip(t, S)]

    df = pd.DataFrame({
        'time': T - t,
        'stock_price': S,
        'option_price': option_prices,
        'delta': delta_fd,
        'gamma': gamma_fd,
        'vega': vega_fd,
        'rho': rho_fd,
        'mu': mu,
        'variance': sigma ** 2
    })

    return df.dropna()

## Simulate gbm code from the GBM paths file

In [20]:
def simulate_gbm(S0, mu, sigma, T, dt):
    N = int(T / dt)
    t = np.linspace(0, T, N)
    W = np.random.standard_normal(size=N)
    W = np.cumsum(W) * np.sqrt(dt)
    S = S0 * np.exp((mu - 0.5 * sigma**2) * t + sigma * W)
    return t, S

## Using sklearn and the basic liner models predicting the greeks

In [21]:
# Parameters
S0 = 100
mu = 0.05
sigma = 0.2
T = 1.0
dt = 1/252
K = 100
r = 0.05

# Generate data with finite difference derivatives
df_fd = generate_data_with_fd(S0, mu, sigma, T, dt, K, r)

# Regression targets
targets = ['delta', 'gamma', 'vega', 'rho']
X = df_fd[['time', 'stock_price']]
results = {}

# Fit model for each Greek
for target in targets:
    y = df_fd[target]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    pipe = Pipeline([
        ('scaler', QuantileTransformer(output_distribution='normal')),
        ('model', Ridge())
    ])

    param_grid = {
        'model__alpha': [0.1, 1.0, 10.0, 100.0]
    }

    search = GridSearchCV(pipe, param_grid, cv=3, scoring='neg_mean_squared_error')
    search.fit(X_train, y_train)

    y_pred = search.predict(X_test)
    results[target] = {
        'best_alpha': search.best_params_['model__alpha'],
        'mse': mean_squared_error(y_test, y_pred),
        'r2': r2_score(y_test, y_pred)
    }

results

NameError: name 'norm' is not defined