# Greeks 

The Greeks are the quantities representing the sensitivity of the price of options to a change of the underlying parameters.
The Greeks in the Black–Scholes model are relatively easy to calculate, a desirable property of financial models, and are very useful for derivatives traders, especially those who seek to hedge their portfolios from adverse changes in market conditions.

The most important Greeks are:

Delta
$$
Δ = \frac{\partial V}{\partial S}
$$


Vega
$$
\mathcal{V} = \frac{\partial V}{\partial \sigma}
$$

Theta
$$
\Theta = \frac{\partial V}{\partial \tau}
$$

Gamma

$$
\Gamma = \frac{\partial^2 V}{\partial S^2}
$$

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from scipy import stats
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle
from torch.autograd import Variable
from torch.utils.data import Dataset

In [2]:
# Set seeds
torch.manual_seed(0)
np.random.seed(0)

In [3]:
b_synthetic_opt_path = '../data/binom_synthetic_options.csv'
t_synthetic_opt_path = '../data/trinomial_synthetic_options.csv'

In [4]:
def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.        
    """    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype('category')
    
    return df

In [5]:
b_synthetic_opt = pd.read_csv(b_synthetic_opt_path, index_col=0)
t_synthetic_opt = pd.read_csv(t_synthetic_opt_path, index_col=0)

b_synthetic_opt = reduce_mem_usage(b_synthetic_opt)
t_synthetic_opt = reduce_mem_usage(t_synthetic_opt)
  
b_synthetic_opt = shuffle(b_synthetic_opt, random_state=0)
t_synthetic_opt = shuffle(t_synthetic_opt, random_state=0)

  mask |= (ar1 == a)


In [6]:
synthetic_options = pd.concat([b_synthetic_opt,
                               t_synthetic_opt],
                              ignore_index=True)
synthetic_options = shuffle(synthetic_options, random_state=0)
synthetic_options = synthetic_options.reset_index()
synthetic_options = synthetic_options.drop('index', axis=1)

In [7]:
synthetic_options

Unnamed: 0,Price,Strike,Type,Vol,Interest Rate,Time to Expiration,Option Price
0,94.0,76.0,C,0.300049,0.070007,0.700195,23.218750
1,93.0,114.0,C,0.899902,0.010002,0.700195,21.140625
2,98.0,97.0,C,0.300049,0.049988,0.500000,9.953125
3,90.0,119.0,P,0.899902,0.049988,0.899902,47.750000
4,84.0,114.0,P,0.700195,0.070007,0.899902,39.593750
...,...,...,...,...,...,...,...
3223035,117.0,80.0,C,0.300049,0.070007,0.700195,41.250000
3223036,88.0,97.0,C,0.500000,0.010002,0.099976,2.419922
3223037,100.0,65.0,C,0.500000,0.049988,0.010002,35.031250
3223038,99.0,100.0,C,0.300049,0.070007,0.700195,11.703125


## Preprocessing

In [8]:
synthetic_options = pd.get_dummies(synthetic_options, prefix='', prefix_sep='')

In [9]:
df_cols = synthetic_options.drop('Option Price', axis=1).columns
df_cols

Index(['Price', 'Strike', 'Vol', 'Interest Rate', 'Time to Expiration', 'C',
       'P'],
      dtype='object')

In [10]:
input_sc = StandardScaler()
output_sc = StandardScaler()
input_data = input_sc.fit_transform(synthetic_options.drop('Option Price', axis=1))
output_data = output_sc.fit_transform(synthetic_options['Option Price'].values.reshape(-1, 1))

train_size = 0.8
val_size = 0.1

last_train_idx = int(np.round(len(input_data) * train_size))
last_val_idx = last_train_idx + int(np.round(len(input_data) * val_size))

X_train = input_data[0:last_train_idx]
X_val = input_data[last_train_idx:last_val_idx]
X_test = input_data[last_val_idx:]

y_train = output_data[0:last_train_idx]
y_val = output_data[last_train_idx:last_val_idx]
y_test = output_data[last_val_idx:]

In [11]:
X_train = Variable(torch.Tensor(X_train))
X_val = Variable(torch.Tensor(X_val))
X_test = Variable(torch.Tensor(X_test))

y_train = Variable(torch.Tensor(y_train))
y_val = Variable(torch.Tensor(y_val))
y_test = Variable(torch.Tensor(y_test))

## Model

In [12]:
CUDA = torch.cuda.is_available()
device = 'cuda:0' if CUDA else 'cpu'

In [13]:
class ResBlock(nn.Module):

  def __init__(self, module):
    super(ResBlock, self).__init__()
    self.module = module

  def forward(self, x):
    return self.module(x) + x

In [14]:
class HiddenLayer(nn.Module):

  def __init__(self, layer_size, act_fn):
      super(HiddenLayer, self).__init__()
      
      if act_fn == 'ReLU':
        self.layer = nn.Sequential(
          nn.Linear(layer_size, layer_size),
          nn.ReLU())
      elif act_fn == 'LeakyReLU':
        self.layer = nn.Sequential(
          nn.Linear(layer_size, layer_size),
          nn.LeakyReLU())
      elif act_fn == 'ELU':
        self.layer = nn.Sequential(
          nn.Linear(layer_size, layer_size),
          nn.ELU())
    
  def forward(self, x):
    return self.layer(x)

In [15]:
class Net(nn.Module):

  def __init__(self, input_size, output_size, hidden_size, num_layers, act_fn):
    super(Net, self).__init__()
    self.input_size = input_size
    self.output_size = output_size
    self.hidden_size = hidden_size

    if act_fn == 'ReLU':
      self.initial_layer = nn.Sequential(
          nn.Linear(self.input_size, self.hidden_size),
          nn.ReLU())
    elif act_fn == 'LeakyReLU':
      self.initial_layer = nn.Sequential(
          nn.Linear(self.input_size, self.hidden_size),
          nn.LeakyReLU())
    elif act_fn == 'ELU':
      self.initial_layer = nn.Sequential(
          nn.Linear(self.input_size, self.hidden_size),
          nn.ELU())

    self.hidden_layers_list = []

    for i in range(num_layers // 2):
      self.hidden_layers_list.append(
          ResBlock(
            nn.Sequential(
                HiddenLayer(self.hidden_size, act_fn),
                HiddenLayer(self.hidden_size, act_fn)
            )
        )
      )

    self.hidden_layers = nn.Sequential(*self.hidden_layers_list)

    self.net = nn.Sequential(
        self.initial_layer,
        self.hidden_layers,
        nn.Linear(self.hidden_size, self.output_size)
    )
  
  def forward(self, x):
    return self.net(x)

In [16]:
def init_weights(m, init_m: str):

  @torch.no_grad()
  def init_uniform(m):
    if isinstance(m, nn.Linear):
      torch.nn.init.uniform_(m.weight)
      m.bias.data.fill_(0.01)

  @torch.no_grad()
  def init_normal(m):
    if isinstance(m, nn.Linear):
      torch.nn.init.normal_(m.weight)
      m.bias.data.fill_(0.01)

  @torch.no_grad()
  def init_xuniform(m):
    if isinstance(m, nn.Linear):
      torch.nn.init.xavier_uniform_(m.weight)
      m.bias.data.fill_(0.01)

  @torch.no_grad()
  def init_xnormal(m):
    if isinstance(m, nn.Linear):
      torch.nn.init.xavier_normal_(m.weight)
      m.bias.data.fill_(0.01)

  if init_m == 'uniform':
    m.apply(init_uniform)
  elif init_m == 'normal':
    m.apply(init_normal)
  elif init_m == 'xaiver uniform':
    m.apply(init_xuniform)
  elif init_m == 'xavier normal':
    m.apply(init_xnormal)

In [17]:
input_size = 7
output_size = 1
num_layers = 6
hidden_size = 600
batch_size = 1393
epochs = 2000
lr = 9.193458959442868e-05
init_method = 'xaiver uniform'
act_fn = 'LeakyReLU'

model = Net(input_size, output_size, hidden_size, num_layers, act_fn)
init_weights(model, init_method)

loss_fn = nn.MSELoss()

In [18]:
X_train = X_train.to(device)
y_train = y_train.to(device)

X_val = X_val.to(device)
y_val = y_val.to(device)

X_test = X_test.to(device)
y_test = y_test.to(device)

In [19]:
class OptDataset(Dataset):

  def __init__(self, X, y):
    self.X = X
    self.y = y

  def __getitem__(self, idx):
    return self.X[idx], self.y[idx]

  def __len__(self):
    return len(self.X)

In [20]:
save_model_path = '../models/final_bintri_model.chkpt'

model = Net(input_size, output_size, hidden_size, num_layers, act_fn)
model.load_state_dict(torch.load(save_model_path, map_location=device))
model = model.to(device)

In [21]:
test_size = 30

with torch.no_grad():
    test_out = model(X_test[0:test_size])

test_out = output_sc.inverse_transform(test_out.cpu().detach().numpy())
real_out = output_sc.inverse_transform(y_test[0:test_size].cpu().detach().numpy())

In [22]:
model.zero_grad()
inp_g = X_test[0].clone().detach().requires_grad_(True)
test_out_g = model(inp_g)

In [23]:
cols = ['Price', 'Strike', 'Vol', 'Interest Rate', 'Time to Expiration', 'C', 'P']
test_options = pd.DataFrame(input_sc.inverse_transform(X_test[0:test_size].cpu().detach().numpy()), columns=cols)
test_options['Prediction'] = test_out
test_options['Real'] = real_out

In [24]:
test_options

Unnamed: 0,Price,Strike,Vol,Interest Rate,Time to Expiration,C,P,Prediction,Real
0,79.012611,89.001038,0.300038,0.090027,0.700247,0.0,1.0,11.165807,11.955178
1,120.008972,138.989365,0.100124,0.049989,0.099802,1.0,0.0,0.033564,0.009182
2,94.997108,133.992172,0.899779,0.049989,0.49999,1.0,0.0,13.6476,13.661748
3,102.000168,101.993736,0.300038,0.010006,0.49999,1.0,0.0,8.870663,8.843196
4,99.999916,92.998787,0.950074,0.019984,0.21005,0.0,1.0,13.225761,13.146127
5,99.999916,81.988579,0.549789,0.080021,0.710059,1.0,0.0,29.887131,29.878733
6,92.996857,125.996658,0.300038,0.030004,0.700247,0.0,1.0,32.77396,33.374008
7,98.999062,110.999954,0.700287,0.030004,0.300095,0.0,1.0,22.061502,22.094215
8,99.999916,61.000366,0.799752,0.090027,0.49999,1.0,0.0,45.529526,45.51165
9,121.012009,159.01091,0.499995,0.090027,0.49999,1.0,0.0,7.420802,7.456037


# Get the Greeks from the Neural Net

One method for obtaining the option's Greek from any pricing model is the finite-difference methods. 

In numerical analysis, finite-difference methods (FDM) are a class of numerical techniques for solving differential equations by approximating derivatives with finite differences. Both the spatial domain and time interval (if applicable) are discretized, or broken into a finite number of steps, and the value of the solution at these discrete points is approximated by solving algebraic equations containing finite differences and values from nearby points.

Finite difference methods convert ordinary differential equations (ODE) or partial differential equations (PDE), which may be nonlinear, into a system of linear equations that can be solved by matrix algebra techniques. 

The essence of the method is that we will approximate the partial derivative representing the particular sensitivity of interest.

For example, if we know that the delta of an option is the derivative of the option's value with respect to the underlying price $\frac{\partial V}{\partial S}$. If we calculate the two option prices, one at $S$ and the other at $S + \Delta S$, subtract the prices and divide by $\Delta S$, we have a ***forward difference approximation*** to the derivative: 

$$
\frac{\partial V}{\partial S} \approx \frac{V(S + \Delta S, T, \sigma, r, X) - V(S, T, \sigma, r, X)}{\Delta S}
$$

The same reasoning can be applied to all the first order derivative of an option. 

However, $\Gamma$ on the other hand is a second order derivative and the previous formula cannot be used to approximate this greek. It can be proved that $\Gamma$ can be approximate with the following formula:

$$
\frac{\partial^2 V}{\partial S^2} = \frac{V(S + \Delta S, T, \sigma, r, X) - 2 * V(S, T, \sigma, r, X) + V(S - \Delta S, T, \sigma, r, X)}{(\Delta S)^2}
$$

In [25]:
def get_d1_d2(S, X, T, t, r, sigma):
    """
    Compute d1 and d2 values for the black-scholes pricing model


    :param S: underlying price
    :param X: option's strike price
    :param T: option's time to maturity (in years)
    :param t: current time (in years)
    :param r: interest rate
    :param sigma: underlying volatility
    :return: (d1, d2)
    """
    d1 = (np.log(S / X) + (r + sigma * sigma / 2.) * (T - t)) / (sigma * np.sqrt(T - t))
    d2 = d1 - sigma * np.sqrt(T - t)
    return d1, d2


def black_scholes(S, X, T, t, r, sigma, o_type: str = "C") -> np.single:
    """
    Compute option price using the black-scholes model

    :param S: underlying price
    :param X: option's strike price
    :param T: option's time to maturity (in years)
    :param t: current time (in years)
    :param r: interest rate (in percentual)
    :param sigma: underlying volatility
    :param o_type: option type, "C" for a call option and "P" for a put option
    :return: the black-scholes option price
    """
    d1, d2 = get_d1_d2(S, X, T, t, r, sigma)
    if o_type == "C":
        return S * stats.norm.cdf(d1, 0, 1) - X * np.exp(-r * (T - t)) * stats.norm.cdf(d2, 0, 1)
    else:
        return X * np.exp(-r * (T - t)) * stats.norm.cdf(-d2, 0, 1) - S * stats.norm.cdf(-d1, 0, 1)

In [26]:
def delta_fdm_bs(S, X, sigma, tau, r, delta_S, o_type: str = 'C'):
  return (black_scholes(S + delta_S, X, tau, 0, r, sigma, o_type) - black_scholes(S, X, tau, 0, r, sigma, o_type)) / delta_S

def theta_fdm_bs(S, X, sigma, tau, r, delta_tau, o_type: str = 'C'):
  return (black_scholes(S, X, tau + delta_tau, 0, r, sigma, o_type) - black_scholes(S, X, tau, 0, r, sigma, o_type)) / delta_tau

def vega_fdm_bs(S, X, sigma, tau, r, delta_sigma, o_type: str = 'C'):
  return (black_scholes(S, X, tau, 0, r, sigma + delta_sigma, o_type) - black_scholes(S, X, tau, 0, r, sigma, o_type)) / delta_sigma

def gamma_fdm_bs(S, X, sigma, tau, r, delta_S, o_type: str = 'C'):
  return (black_scholes(S + delta_S, X, tau, 0, r, sigma, o_type) - \
          2 * black_scholes(S, X, tau, 0, r, sigma, o_type) + \
          black_scholes(S - delta_S, X, tau, 0, r, sigma, o_type)) / (delta_S ** 2)

But we can use the FDM to get the greeks directly from the neural net? That's simple, we just have to replace the BS Model with out neural net pricer. 

In [29]:
def delta_fdm_net(S, X, sigma, tau, r, delta_S, o_type: str = 'C'):
  c = 1 if o_type == 'C' else 0
  p = 1 if o_type == 'P' else 0
  input_df_1 = pd.DataFrame(np.array([[S + delta_S, X, sigma, r,  tau,  c, p]]), columns=df_cols)
  input_df_2 = pd.DataFrame(np.array([[S, X, sigma, r,  tau,  c, p]]), columns=df_cols)
  net_input_1 = torch.Tensor(
      input_sc.transform(input_df_1)).to(device)
  net_input_2 = torch.Tensor(
      input_sc.transform(input_df_2)).to(device)
  sc_output_1 = output_sc.inverse_transform(model(net_input_1).detach().cpu().numpy())
  sc_output_2 = output_sc.inverse_transform(model(net_input_2).detach().cpu().numpy())
  return (sc_output_1 - sc_output_2) / delta_S

def theta_fdm_net(S, X, sigma, tau, r, delta_tau, o_type: str = 'C'):
  c = 1 if o_type == 'C' else 0
  p = 1 if o_type == 'P' else 0
  input_df_1 = pd.DataFrame(np.array([[S, X, sigma, r,  tau + delta_tau,  c, p]]), columns=df_cols)
  input_df_2 = pd.DataFrame(np.array([[S, X, sigma, r,  tau,  c, p]]), columns=df_cols)
  net_input_1 = torch.Tensor(
      input_sc.transform(input_df_1)).to(device)
  net_input_2 = torch.Tensor(
      input_sc.transform(input_df_2)).to(device)
  sc_output_1 = output_sc.inverse_transform(model(net_input_1).detach().cpu().numpy())
  sc_output_2 = output_sc.inverse_transform(model(net_input_2).detach().cpu().numpy())
  return (sc_output_1 - sc_output_2) / delta_tau

def vega_fdm_net(S, X, sigma, tau, r, delta_sigma, o_type: str = 'C'):
  c = 1 if o_type == 'C' else 0
  p = 1 if o_type == 'P' else 0
  input_df_1 = pd.DataFrame(np.array([[S, X, sigma + delta_sigma, r,  tau,  c, p]]), columns=df_cols)
  input_df_2 = pd.DataFrame(np.array([[S, X, sigma, r,  tau,  c, p]]), columns=df_cols)
  net_input_1 = torch.Tensor(
      input_sc.transform(input_df_1)).to(device)
  net_input_2 = torch.Tensor(
      input_sc.transform(input_df_2)).to(device)
  sc_output_1 = output_sc.inverse_transform(model(net_input_1).detach().cpu().numpy())
  sc_output_2 = output_sc.inverse_transform(model(net_input_2).detach().cpu().numpy())
  return (sc_output_1 - sc_output_2) / delta_sigma

def gamma_fdm_net(S, X, sigma, tau, r, delta_S, o_type: str = 'C'):
  c = 1 if o_type == 'C' else 0
  p = 1 if o_type == 'P' else 0
  input_df_1 = pd.DataFrame(np.array([[S + delta_S, X, sigma, r,  tau,  c, p]]), columns=df_cols)
  input_df_2 = pd.DataFrame(np.array([[S, X, sigma, r,  tau,  c, p]]), columns=df_cols)
  input_df_3 = pd.DataFrame(np.array([[S - delta_S, X, sigma, r,  tau,  c, p]]), columns=df_cols)
  net_input_1 = torch.Tensor(
      input_sc.transform(input_df_1)).to(device)
  net_input_2 = torch.Tensor(
      input_sc.transform(input_df_2)).to(device)
  net_input_3 = torch.Tensor(
      input_sc.transform(input_df_3)).to(device)
  sc_output_1 = output_sc.inverse_transform(model(net_input_1).detach().cpu().numpy())
  sc_output_2 = output_sc.inverse_transform(model(net_input_2).detach().cpu().numpy())
  sc_output_3 = output_sc.inverse_transform(model(net_input_3).detach().cpu().numpy())
  return (sc_output_1 - 2 * sc_output_2 + sc_output_3) / (delta_S ** 2)

### Example 1

In [37]:
S = 100
X = 110
vol = 0.39
tau = 0.910310
r = 0.01
delta_s = 0.1
delta_sigma = 0.01
delta_tau = 0.0027
type_ = 'C'

print('Underlying price: ', S)
print('Strike price: ', X)
print('Volatility: ', vol)
print('Time to Expiration: ', tau)
print('Interest rate: ', r)
print('Option type: ', type_,'\n')

print('BS Delta: ', delta_fdm_bs(S, X, vol, tau, r, delta_s, type_))
print('ANN\'s Delta: ', delta_fdm_net(S, X, vol, tau, r, delta_s, type_))
print('-------------')
print('BS Theta: ', theta_fdm_bs(S, X, vol, tau, r, delta_tau, type_) / 364)
print('ANN\'s Theta: ', theta_fdm_net(S, X, vol, tau, r, delta_tau, type_) / 364)
print('-------------')
print('BS Vega: ', vega_fdm_bs(S, X, vol, tau, r, delta_sigma, type_) / 100)
print('ANN\'s Vega: ', vega_fdm_net(S, X, vol, tau, r, delta_sigma, type_)/ 100)
print('-------------')
print('BS Gamma: ', gamma_fdm_bs(S, X, vol, tau, r, delta_s, type_))
print('ANN\'s Gamma: ', gamma_fdm_net(S, X, vol, tau, r, delta_s, type_))

Underlying price:  100
Strike price:  110
Volatility:  0.39
Time to Expiration:  0.91031
Interest rate:  0.01
Option type:  C 

BS Delta:  0.4823389002504541
ANN's Delta:  [[0.48894882]]
-------------
BS Theta:  0.023372964785468603
ANN's Theta:  [[0.02222329]]
-------------
BS Vega:  0.38032043130596094
ANN's Vega:  [[0.3822813]]
-------------
BS Gamma:  0.010710223327947686
ANN's Gamma:  [[-0.00520611]]


### Example 2

In [31]:
S = 79.012611
X = 89.001038
vol = 0.300038
tau = 0.700247
r = 0.090027
delta_s = 0.1
delta_sigma = 0.01
delta_tau = 0.0027
type_ = 'P'

print('Underlying price: ', S)
print('Strike price: ', X)
print('Volatility: ', vol)
print('Time to Expiration: ', tau)
print('Interest rate: ', r)
print('Option type: ', type_,'\n')

print('BS Delta: ', delta_fdm_bs(S, X, vol, tau, r, delta_s, type_))
print('ANN\'s Delta: ', delta_fdm_net(S, X, vol, tau, r, delta_s, type_))
print('-------------')
print('BS Theta: ', theta_fdm_bs(S, X, vol, tau, r, delta_tau, type_) / 364)
print('ANN\'s Theta: ', theta_fdm_net(S, X, vol, tau, r, delta_tau, type_) / 364)
print('-------------')
print('BS Vega: ', vega_fdm_bs(S, X, vol, tau, r, delta_sigma, type_) / 100)
print('ANN\'s Vega: ', vega_fdm_net(S, X, vol, tau, r, delta_sigma, type_) / 100)
print('-------------')
print('BS Gamma: ', gamma_fdm_bs(S, X, vol, tau, r, delta_s, type_))
print('ANN\'s Gamma: ', gamma_fdm_net(S, X, vol, tau, r, delta_s, type_))

Underlying price:  79.012611
Strike price:  89.001038
Volatility:  0.300038
Time to Expiration:  0.700247
Interest rate:  0.090027
Option type:  P 

BS Delta:  -0.5378358133078365
ANN's Delta:  [[-0.6069088]]
-------------
BS Theta:  0.0022929478639916265
ANN's Theta:  [[0.00218429]]
-------------
BS Vega:  0.26266418868629415
ANN's Vega:  [[0.23565292]]
-------------
BS Gamma:  0.020014582818816958
ANN's Gamma:  [[0.07305145]]


### Autodiff results

In [32]:
model.zero_grad()
inp_g = X_test[0].clone().detach().requires_grad_(True)
test_out_g = model(inp_g)
grad_ns = torch.autograd.grad(test_out_g, inp_g, retain_graph=True)[0].data
grad_ns

tensor([-0.3918,  1.1929,  0.3682, -0.0448,  0.0126,  0.0629,  0.0440],
       device='cuda:0')