# Greeks 

The Greeks are the quantities representing the sensitivity of the price of options to a change of the underlying parameters.
The Greeks in the Black–Scholes model are relatively easy to calculate, a desirable property of financial models, and are very useful for derivatives traders, especially those who seek to hedge their portfolios from adverse changes in market conditions.

The most important Greeks are:

Delta
$$
Δ = \frac{\partial V}{\partial S}
$$


Vega
$$
\mathcal{V} = \frac{\partial V}{\partial \sigma}
$$

Theta
$$
\Theta = \frac{\partial V}{\partial \tau}
$$

Gamma

$$
\Gamma = \frac{\partial^2 V}{\partial S^2}
$$

In [1]:
import numpy as np
import pandas as pd

from scipy import stats

import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle
from torch.autograd import Variable
from torch.utils.data import Dataset

In [2]:
# Set seeds
torch.manual_seed(0)
np.random.seed(0)

In [3]:
synthetic_options_path = '../data/heston_synthetic_options.csv'

In [4]:
def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.        
    """    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype('category')
    
    return df

In [5]:
synthetic_options = pd.read_csv(synthetic_options_path, index_col=0)
synthetic_options = reduce_mem_usage(synthetic_options)

  mask |= (ar1 == a)


In [6]:
synthetic_options = shuffle(synthetic_options, random_state=0)
synthetic_options = synthetic_options.reset_index()
synthetic_options = synthetic_options.drop('index', axis=1)

In [7]:
synthetic_options

Unnamed: 0,Price,Strike,Type,Kappa,Rho,Theta,Xi,V_0,Interest Rate,Time to Expiration,Option Price
0,96,78.0,C,1.074219,-0.311279,0.375732,0.180298,0.367432,0.045074,0.533203,17.828125
1,100,56.0,C,1.495117,-0.869629,0.011398,0.010925,0.257080,0.050201,0.977051,42.187500
2,100,68.0,P,0.988770,-0.664551,0.344482,0.045258,0.328125,0.051971,0.588867,0.026703
3,100,75.0,C,1.996094,-0.051880,0.417480,0.208374,0.328613,0.053741,0.215088,25.312500
4,100,71.0,P,0.344971,-0.344727,0.227783,0.340820,0.054413,0.016891,0.800781,0.070496
...,...,...,...,...,...,...,...,...,...,...,...
1059740,100,71.0,P,1.039062,-0.757324,0.227661,0.312500,0.445068,0.016006,0.487061,1.505859
1059741,100,57.0,C,1.905273,-0.276855,0.322266,0.061432,0.120117,0.025070,0.947754,41.687500
1059742,92,132.0,P,0.149536,-0.433350,0.262695,0.054565,0.404541,0.028809,0.551758,38.375000
1059743,100,135.0,C,0.971680,-0.711426,0.270508,0.438477,0.483887,0.097107,0.289307,0.180542


## Preprocessing

In [8]:
synthetic_options = pd.get_dummies(synthetic_options, prefix='', prefix_sep='')

In [9]:
df_cols = synthetic_options.drop('Option Price', axis=1).columns

In [10]:
input_sc = StandardScaler()
output_sc = StandardScaler()
input_data = input_sc.fit_transform(synthetic_options.drop('Option Price', axis=1))
output_data = output_sc.fit_transform(synthetic_options['Option Price'].values.reshape(-1, 1))

train_size = 0.8
val_size = 0.1

last_train_idx = int(np.round(len(input_data) * train_size))
last_val_idx = last_train_idx + int(np.round(len(input_data) * val_size))

X_train = input_data[0:last_train_idx]
X_val = input_data[last_train_idx:last_val_idx]
X_test = input_data[last_val_idx:]

y_train = output_data[0:last_train_idx]
y_val = output_data[last_train_idx:last_val_idx]
y_test = output_data[last_val_idx:]

In [11]:
X_train = Variable(torch.Tensor(X_train))
X_val = Variable(torch.Tensor(X_val))
X_test = Variable(torch.Tensor(X_test))

y_train = Variable(torch.Tensor(y_train))
y_val = Variable(torch.Tensor(y_val))
y_test = Variable(torch.Tensor(y_test))

## Model

In [12]:
CUDA = torch.cuda.is_available()
device = 'cuda:0' if CUDA else 'cpu'

In [13]:
class ResBlock(nn.Module):

  def __init__(self, module):
    super(ResBlock, self).__init__()
    self.module = module

  def forward(self, x):
    return self.module(x) + x

In [14]:
class HiddenLayer(nn.Module):

  def __init__(self, layer_size, act_fn):
      super(HiddenLayer, self).__init__()
      
      if act_fn == 'ReLU':
        self.layer = nn.Sequential(
          nn.Linear(layer_size, layer_size),
          nn.ReLU())
      elif act_fn == 'LeakyReLU':
        self.layer = nn.Sequential(
          nn.Linear(layer_size, layer_size),
          nn.LeakyReLU())
      elif act_fn == 'ELU':
        self.layer = nn.Sequential(
          nn.Linear(layer_size, layer_size),
          nn.ELU())
    
  def forward(self, x):
    return self.layer(x)

In [15]:
class Net(nn.Module):

  def __init__(self, input_size, output_size, hidden_size, num_layers, act_fn):
    super(Net, self).__init__()
    self.input_size = input_size
    self.output_size = output_size
    self.hidden_size = hidden_size

    if act_fn == 'ReLU':
      self.initial_layer = nn.Sequential(
          nn.Linear(self.input_size, self.hidden_size),
          nn.ReLU())
    elif act_fn == 'LeakyReLU':
      self.initial_layer = nn.Sequential(
          nn.Linear(self.input_size, self.hidden_size),
          nn.LeakyReLU())
    elif act_fn == 'ELU':
      self.initial_layer = nn.Sequential(
          nn.Linear(self.input_size, self.hidden_size),
          nn.ELU())

    self.hidden_layers_list = []

    for i in range(num_layers // 2):
      self.hidden_layers_list.append(
          ResBlock(
            nn.Sequential(
                HiddenLayer(self.hidden_size, act_fn),
                HiddenLayer(self.hidden_size, act_fn)
            )
        )
      )

    self.hidden_layers = nn.Sequential(*self.hidden_layers_list)

    self.net = nn.Sequential(
        self.initial_layer,
        self.hidden_layers,
        nn.Linear(self.hidden_size, self.output_size)
    )
  
  def forward(self, x):
    return self.net(x)

In [16]:
def init_weights(m, init_m: str):

  @torch.no_grad()
  def init_uniform(m):
    if isinstance(m, nn.Linear):
      torch.nn.init.uniform_(m.weight)
      m.bias.data.fill_(0.01)

  @torch.no_grad()
  def init_normal(m):
    if isinstance(m, nn.Linear):
      torch.nn.init.normal_(m.weight)
      m.bias.data.fill_(0.01)

  @torch.no_grad()
  def init_xuniform(m):
    if isinstance(m, nn.Linear):
      torch.nn.init.xavier_uniform_(m.weight)
      m.bias.data.fill_(0.01)

  @torch.no_grad()
  def init_xnormal(m):
    if isinstance(m, nn.Linear):
      torch.nn.init.xavier_normal_(m.weight)
      m.bias.data.fill_(0.01)

  if init_m == 'uniform':
    m.apply(init_uniform)
  elif init_m == 'normal':
    m.apply(init_normal)
  elif init_m == 'xaiver uniform':
    m.apply(init_xuniform)
  elif init_m == 'xavier normal':
    m.apply(init_xnormal)

In [17]:
input_size = 11
output_size = 1
num_layers = 4
hidden_size = 600
batch_size = 1141
lr = 0.00012243587926335812
init_method = 'xaiver uniform'
act_fn = 'LeakyReLU'

model = Net(input_size, output_size, hidden_size, num_layers, act_fn)
init_weights(model, init_method)

loss_fn = nn.MSELoss()

In [18]:
X_test = X_test.to(device)
y_test = y_test.to(device)

In [19]:
class OptDataset(Dataset):

  def __init__(self, X, y):
    self.X = X
    self.y = y

  def __getitem__(self, idx):
    return self.X[idx], self.y[idx]

  def __len__(self):
    return len(self.X)

In [20]:
save_model_path = '../models/final_heston_model.chkpt'

model = Net(input_size, output_size, hidden_size, num_layers, act_fn)
model.load_state_dict(torch.load(save_model_path, map_location=device))
model = model.to(device)

In [21]:
model.eval()
test_size = 30

with torch.no_grad():
    test_out = model(X_test[0:test_size])

test_out = output_sc.inverse_transform(test_out.cpu().detach().numpy())
real_out = output_sc.inverse_transform(y_test[0:test_size].cpu().detach().numpy())

In [22]:
model.zero_grad()
inp_g = X_test[0:test_size].clone().detach().requires_grad_(True)
test_out_g = model(inp_g)

In [23]:
cols = ['Price', 'Strike', 'Kappa', 'Rho', 'Theta', 'Xi', 'V_0',
       'Interest Rate', 'Time to Expiration', 'C', 'P']
test_options = pd.DataFrame(input_sc.inverse_transform(X_test[0:test_size].cpu().detach().numpy()), columns=cols)
test_options['Prediction'] = test_out
test_options['Real'] = real_out

In [24]:
test_options

Unnamed: 0,Price,Strike,Kappa,Rho,Theta,Xi,V_0,Interest Rate,Time to Expiration,C,P,Prediction,Real
0,91.997589,165.022598,1.689353,-0.744619,0.207761,0.196682,0.179774,0.091636,0.966828,-3e-06,1.000003,66.684967,66.73999
1,100.000237,90.006714,1.781204,-0.265676,0.151816,0.362019,0.33252,0.010186,0.874121,-3e-06,1.000003,5.274084,5.499053
2,119.000381,118.9981,0.743793,-0.631957,0.020763,0.349615,0.302988,0.066769,0.478711,0.999997,3e-06,4.311134,4.79489
3,115.996803,91.001205,0.809019,-0.214292,0.486074,0.305426,0.465927,0.075435,0.799731,-3e-06,1.000003,0.453975,0.336961
4,100.000237,64.013428,0.520082,-0.871746,0.435766,0.16976,0.252438,0.041319,0.708434,0.999997,3e-06,34.846703,34.719139
5,119.000381,177.97049,1.700059,-0.272527,0.273446,0.499447,0.381089,0.035153,0.197282,0.999997,3e-06,0.068439,0.15695
6,100.000237,101.999115,1.182622,-0.249436,0.069802,0.078282,0.14788,0.025334,0.582037,0.999997,3e-06,2.661062,3.164198
7,98.001068,53.015518,1.452399,-0.810086,0.039505,0.261969,0.25904,0.057435,0.341413,0.999997,3e-06,42.84853,42.787884
8,100.000237,79.008804,1.810506,-0.489986,0.033022,0.42432,0.236206,0.034151,0.187983,-3e-06,1.000003,0.307016,0.241661
9,100.000237,148.018738,0.100272,-0.180037,0.220708,0.111194,0.437267,0.079532,0.365224,-3e-06,1.000003,44.869846,44.842133


# Get the Greeks from the Neural Net

One method for obtaining the option's Greek from any pricing model is the finite-difference methods. 

In numerical analysis, finite-difference methods (FDM) are a class of numerical techniques for solving differential equations by approximating derivatives with finite differences. Both the spatial domain and time interval (if applicable) are discretized, or broken into a finite number of steps, and the value of the solution at these discrete points is approximated by solving algebraic equations containing finite differences and values from nearby points.

Finite difference methods convert ordinary differential equations (ODE) or partial differential equations (PDE), which may be nonlinear, into a system of linear equations that can be solved by matrix algebra techniques. 

The essence of the method is that we will approximate the partial derivative representing the particular sensitivity of interest.

For example, if we know that the delta of an option is the derivative of the option's value with respect to the underlying price $\frac{\partial V}{\partial S}$. If we calculate the two option prices, one at $S$ and the other at $S + \Delta S$, subtract the prices and divide by $\Delta S$, we have a ***forward difference approximation*** to the derivative: 

$$
\frac{\partial V}{\partial S} \approx \frac{V(S + \Delta S, T, \sigma, r, X) - V(S, T, \sigma, r, X)}{\Delta S}
$$

The same reasoning can be applied to all the first order derivative of an option. 

However, $\Gamma$ on the other hand is a second order derivative and the previous formula cannot be used to approximate this greek. It can be proved that $\Gamma$ can be approximate with the following formula:

$$
\frac{\partial^2 V}{\partial S^2} = \frac{V(S + \Delta S, T, \sigma, r, X) - 2 * V(S, T, \sigma, r, X) + V(S - \Delta S, T, \sigma, r, X)}{(\Delta S)^2}
$$

In [25]:
def get_d1_d2(S, X, T, t, r, sigma):
    """
    Compute d1 and d2 values for the black-scholes pricing model


    :param S: underlying price
    :param X: option's strike price
    :param T: option's time to maturity (in years)
    :param t: current time (in years)
    :param r: interest rate
    :param sigma: underlying volatility
    :return: (d1, d2)
    """
    d1 = (np.log(S / X) + (r + sigma * sigma / 2.) * (T - t)) / (sigma * np.sqrt(T - t))
    d2 = d1 - sigma * np.sqrt(T - t)
    return d1, d2


def black_scholes(S, X, T, t, r, sigma, o_type: str = "C") -> np.single:
    """
    Compute option price using the black-scholes model

    :param S: underlying price
    :param X: option's strike price
    :param T: option's time to maturity (in years)
    :param t: current time (in years)
    :param r: interest rate (in percentual)
    :param sigma: underlying volatility
    :param o_type: option type, "C" for a call option and "P" for a put option
    :return: the black-scholes option price
    """
    d1, d2 = get_d1_d2(S, X, T, t, r, sigma)
    if o_type == "C":
        return S * stats.norm.cdf(d1, 0, 1) - X * np.exp(-r * (T - t)) * stats.norm.cdf(d2, 0, 1)
    else:
        return X * np.exp(-r * (T - t)) * stats.norm.cdf(-d2, 0, 1) - S * stats.norm.cdf(-d1, 0, 1)

In [26]:
def delta_fdm_bs(S, X, sigma, tau, r, delta_S, o_type: str = 'C'):
  return (black_scholes(S + delta_S, X, tau, 0, r, sigma, o_type) - black_scholes(S, X, tau, 0, r, sigma, o_type)) / delta_S

def theta_fdm_bs(S, X, sigma, tau, r, delta_tau, o_type: str = 'C'):
  return (black_scholes(S, X, tau + delta_tau, 0, r, sigma, o_type) - black_scholes(S, X, tau, 0, r, sigma, o_type)) / delta_tau

def vega_fdm_bs(S, X, sigma, tau, r, delta_sigma, o_type: str = 'C'):
  return (black_scholes(S, X, tau, 0, r, sigma + delta_sigma, o_type) - black_scholes(S, X, tau, 0, r, sigma, o_type)) / delta_sigma

def gamma_fdm_bs(S, X, sigma, tau, r, delta_S, o_type: str = 'C'):
  return (black_scholes(S + delta_S, X, tau, 0, r, sigma, o_type) - \
          2 * black_scholes(S, X, tau, 0, r, sigma, o_type) + \
          black_scholes(S - delta_S, X, tau, 0, r, sigma, o_type)) / (delta_S ** 2)

But we can use the FDM to get the greeks directly from the neural net? That's simple, we just have to replace the BS Model with out neural net pricer. 

In [27]:
def delta_fdm_net(S, X, kappa, rho, theta, xi, v0, tau, r, delta_S, o_type: str = 'C'):
  c = 1 if o_type == 'C' else 0
  p = 1 if o_type == 'P' else 0
  input_df_1 = pd.DataFrame(np.array([[S + delta_S, X, kappa, rho, theta, xi, v0, r,  tau,  c, p]]), columns=df_cols)
  input_df_2 = pd.DataFrame(np.array([[S, X, kappa, rho, theta, xi, v0, r,  tau,  c, p]]), columns=df_cols)
  net_input_1 = torch.Tensor(
      input_sc.transform(input_df_1)).to(device)
  net_input_2 = torch.Tensor(
      input_sc.transform(input_df_2)).to(device)
  sc_output_1 = output_sc.inverse_transform(model(net_input_1).detach().cpu().numpy())
  sc_output_2 = output_sc.inverse_transform(model(net_input_2).detach().cpu().numpy())
  return (sc_output_1 - sc_output_2) / delta_S

def theta_fdm_net(S, X, kappa, rho, theta, xi, v0, tau, r, delta_tau, o_type: str = 'C'):
  c = 1 if o_type == 'C' else 0
  p = 1 if o_type == 'P' else 0
  input_df_1 = pd.DataFrame(np.array([[S, X, kappa, rho, theta, xi, v0, r,  tau + delta_tau,  c, p]]), columns=df_cols)
  input_df_2 = pd.DataFrame(np.array([[S, X, kappa, rho, theta, xi, v0, r,  tau,  c, p]]), columns=df_cols)
  net_input_1 = torch.Tensor(
      input_sc.transform(input_df_1)).to(device)
  net_input_2 = torch.Tensor(
      input_sc.transform(input_df_2)).to(device)
  sc_output_1 = output_sc.inverse_transform(model(net_input_1).detach().cpu().numpy())
  sc_output_2 = output_sc.inverse_transform(model(net_input_2).detach().cpu().numpy())
  return (sc_output_1 - sc_output_2) / delta_tau

def gamma_fdm_net(S, X, kappa, rho, theta, xi, v0, tau, r, delta_S, o_type: str = 'C'):
  c = 1 if o_type == 'C' else 0
  p = 1 if o_type == 'P' else 0
  input_df_1 = pd.DataFrame(np.array([[S + delta_S, X, kappa, rho, theta, xi, v0, r,  tau,  c, p]]), columns=df_cols)
  input_df_2 = pd.DataFrame(np.array([[S, X, kappa, rho, theta, xi, v0, r,  tau,  c, p]]), columns=df_cols)
  input_df_3 = pd.DataFrame(np.array([[S - delta_S, X, kappa, rho, theta, xi, v0, r,  tau,  c, p]]), columns=df_cols)
  net_input_1 = torch.Tensor(
      input_sc.transform(input_df_1)).to(device)
  net_input_2 = torch.Tensor(
      input_sc.transform(input_df_2)).to(device)
  net_input_3 = torch.Tensor(
      input_sc.transform(input_df_3)).to(device)
  sc_output_1 = output_sc.inverse_transform(model(net_input_1).detach().cpu().numpy())
  sc_output_2 = output_sc.inverse_transform(model(net_input_2).detach().cpu().numpy())
  sc_output_3 = output_sc.inverse_transform(model(net_input_3).detach().cpu().numpy())
  return (sc_output_1 - 2 * sc_output_2 + sc_output_3) / (delta_S ** 2)

### Example 1

In [28]:
S = 92
X = 165
rho = -0.744619
kappa = 1.689353 
theta = 0.207761 
xi = 0.196682 
v0 = 0.179774
tau = 0.966828 
r = 0.091636 
delta_s = 0.1
delta_sigma = 0.01
delta_tau = 0.0027
type_ = 'P'

print('Underlying price: ', S)
print('Strike price: ', X)
print('Time to Expiration: ', tau)
print('Interest rate: ', r)
print('Option type: ', type_,'\n')

print('ANN\'s Delta: ', delta_fdm_net(S, X, kappa, rho, theta, xi, v0, tau, r, delta_s, type_))
print('-------------')
print('ANN\'s Theta: ', theta_fdm_net(S, X, kappa, rho, theta, xi, v0, tau, r, delta_tau, type_) / 364)
print('-------------')
print('ANN\'s Gamma: ', gamma_fdm_net(S, X, kappa, rho, theta, xi, v0, tau, r, delta_s, type_))

Underlying price:  92
Strike price:  165
Time to Expiration:  0.966828
Interest rate:  0.091636
Option type:  P 

ANN's Delta:  [[-1.0305786]]
-------------
ANN's Theta:  [[0.00670716]]
-------------
ANN's Gamma:  [[0.18615723]]


### Example 2

In [29]:
S = 100
X = 110
rho = -0.723259
kappa = 0.549236
theta = 0.234988
xi = 0.453115
v0 = 0.458942
tau = 0.910310
r = 0.01
delta_s = 0.1
delta_sigma = 0.01
delta_tau = 0.0027
type_ = 'C'

print('Underlying price: ', S)
print('Strike price: ', X)
print('Time to Expiration: ', tau)
print('Interest rate: ', r)
print('Option type: ', type_,'\n')

print('ANN\'s Delta: ', delta_fdm_net(S, X, kappa, rho, theta, xi, v0, tau, r, delta_s, type_))
print('-------------')
print('ANN\'s Theta: ', theta_fdm_net(S, X, kappa, rho, theta, xi, v0, tau, r, delta_tau, type_))
print('-------------')
print('ANN\'s Gamma: ', gamma_fdm_net(S, X, kappa, rho, theta, xi, v0, tau, r, delta_s, type_))

Underlying price:  100
Strike price:  110
Time to Expiration:  0.91031
Interest rate:  0.01
Option type:  C 

ANN's Delta:  [[0.39126396]]
-------------
ANN's Theta:  [[1.0006516]]
-------------
ANN's Gamma:  [[-0.08869171]]


### Autodiff results

In [30]:
model.zero_grad()
inp_g = X_test[0].clone().detach().requires_grad_(True)
test_out_g = model(inp_g)
grad_ns = torch.autograd.grad(test_out_g, inp_g, retain_graph=True)[0].data
grad_ns

tensor([-0.3645,  1.7209, -0.0351,  0.0071,  0.0349, -0.0231, -0.0373, -0.0702,
         0.0377,  0.1268, -0.1870], device='cuda:0')