# Generating Option Data using the Heston Model with Least Square Montecarlo

In [1]:
!pip install --upgrade tensorflow

Requirement already up-to-date: tensorflow in /home/paolo/.local/lib/python3.8/site-packages (2.8.0)


In [2]:
!pip install --upgrade tf-quant-finance

Requirement already up-to-date: tf-quant-finance in /home/paolo/.local/lib/python3.8/site-packages (0.0.1.dev31)


In [3]:
!pip install scikit-optimize



In [9]:
#@title **Imports** { display-mode: "form" }

from typing import Tuple

import numpy as np

import pandas as pd

# Import for Tensorflow Quant Finance
import tf_quant_finance as tff 

# Shortcut alias
pde = tff.math.pde

from IPython.core.pylabtools import figsize
figsize(21, 14)  # better graph size for Colab 

from tf_quant_finance.experimental.lsm_algorithm import lsm
from tf_quant_finance.experimental.lsm_algorithm import payoff
from typing import Tuple

from skopt.space import Space
from skopt.sampler import Lhs

ModuleNotFoundError: No module named 'tf_quant_finance'

# Heston model 

Different form the Geometric Browian process that assumes that the volatility is constant, the Heston model is a stochastic volatility model: the volatility follows a random process.

The Heston model assumes that $S_t$, the price of the asset, is determined by a stochastic process:

$$
dS_t = \mu S_t dt + \sqrt{v_t} S_t dW_t^S, S_{t_0} = S_0,
$$

$$
v_t = \kappa (\theta - v_t) dt + \gamma \sqrt{v_t} dW_t^v, v_{t_0} = v_0
$$

$$
dW_t^S dW_t^v = \rho dt
$$

with $v_t$ the instantaneous variance, and $W_t^S$, $W_t^v$ are two Wiener processes with correlation correlation coefficient $\rho$. The second equation models a mean reversion process for the variance, where $\kappa$ is the reversion speed, $\theta$ is the log average variance, $\gamma$ is the volatility of the variance and $v_0$ is the initial volatility.

In [5]:
def generate_heston_path(S, T, r, kappa, theta, v_0, rho, xi,
                         steps, n_paths, return_vol: bool = False):
    """
    
    :param S: Underlying price at time t=0
    :param T: Time to Expiration (in years)
    :param r: Interest Free rate
    :param kappa: rate of mean reversion
    :param theta: long run average variance
    :param v_0: Volatility at time t=0
    :param rho: instantaneous correlation of the Weiner processes
    :param xi: volatility of volatility
    :param steps: number of steps of the simulation
    :param n_paths: number of simulations
    :param return_vol: return volatility
    :return: 
    """
    dt = T / steps
    size = (n_paths, steps)
    prices = np.zeros(size)
    sigs = np.zeros(size)
    S_t = S
    v_t = v_0

    for t in range(steps):
        mu = np.array([0, 0])
        cov = np.array([[1, rho],
                        [rho, 1]])
        WT = np.random.multivariate_normal(mu, cov=cov, size=n_paths) * np.sqrt(dt)
        S_t = S_t * (np.exp((r - v_t / 2) * dt + np.sqrt(v_t) * WT[:, 0]))
        v_t = np.abs(v_t + kappa * (theta - v_t) * dt + xi * np.sqrt(v_t) * WT[:, 1])
        prices[:, t] = S_t
        sigs[:, t] = v_t

    if return_vol:
        return prices, sigs

    return prices

In [6]:
S = 100 # Underlying Price
N = 50 # Number of simulations

dtype = np.float64

In [10]:
def params_ranges(S,
                  strike_step: float = 1.0,
                  vol_range: Tuple[float, float, float] = (0.02, 1.02, 0.02),
                  interest_range: Tuple[float, float, float] = (0.002, 0.1, 0.01),
                  tau_range: Tuple[float, float, float] = (0.15, 1.1, 0.02)) -> Tuple[
    np.array, np.array, np.array, np.array]:
    """
    Generate ranges for strike prices, volatility, interest rates and time to expiration
    :param strike_step:
    :param S: the underlying price
    :param vol_range: (start_vol, end_vol, step_vol) for the volatility range
    :param interest_range: (start_vol, end_vol, step_vol) for the volatility range
    :param tau_range: (start_vol, end_vol, step_vol) for the volatility range
    :return: (strike_range, volatility_range, interest_rates_range, time_to_expiration_range)
    """
    strikes = np.arange(S // 2, S + (S // 2) + 1, strike_step)
    vols = np.arange(vol_range[0], vol_range[1], vol_range[2])
    interests = np.arange(interest_range[0], interest_range[1], interest_range[2])
    taus = np.arange(tau_range[0], tau_range[1], tau_range[2])

    return strikes, vols, interests, taus

In [11]:
s_range, vol_range, r_range, t_range = params_ranges(100, 
              vol_range=(0.1, 1.05, 0.05), 
              interest_range=(0.01, 0.11, 0.01),
              tau_range=(0.1, 1.2, 0.1))

For sampling tecnique used is the ***Latin hypercube sampling (LHS)***, which is able to generate random samples of the parameter values form a multinomial distribution.

In our case the variables (dimension of the hyperspace) are 7, namely:

- interest rate $r$
- time to expiration $\tau$
- correlation $\rho$
- reversion speed $\kappa$
- the volatility of the volatility $x_i$
- the long average variance $\theta$
- the initial volatility $v_0$


In [12]:
space = Space([
               (0.01, 0.10), # interest rate
               (0.1, 1.1), # time to expiration
               (-0.9, 0.0), # correlation
               (0.0, 2.0), # reversion speed
               (0.0, 0.5), # volatility of volatility
               (0.0, 0.5),  # long average variance
               (0.05, 0.50), # initial volatility
               ])
lhs = Lhs(lhs_type="classic", criterion=None)
x = lhs.generate(space.dimensions, 3000)

NameError: name 'Space' is not defined

After we've generated our samples, we are going to generate the heston paths for each of these points whit the following list comprehension.

In [None]:
paths_list = [(generate_heston_path(S, t, r, k, theta, v0, rho, xi,
                         int(365 * t), N), r ,t, rho, k, xi, theta, v0) for r, t, rho, k, xi, theta, v0 in x]

In [None]:
option_data = pd.DataFrame(
        columns=['Price', 
                 'Strike', 
                 'Type', 
                 'Kappa',
                 'Rho',
                 'Theta',
                 'Xi',
                 'V_0', 
                 'Interest Rate', 
                 'Time to Expiration', 
                 'Option Price'])

The generation option loop is going to calculate an option chain for each of the paths generated in the previous step, and append this option chain to the option dataframe. Each option chain is generated in a way that the moneyness values ($m = S/K$ for a call and $m = K/S$ for a put) are between $0.5$ and $1.5$, so in this particular case the strikes are between $50 \$$ and $150 \$ $.

In [None]:
basis_fn = lsm.make_polynomial_basis(2)
payoff_fn = payoff.make_basket_put_payoff(s_range, dtype=dtype)

for path, r ,t, rho, k, xi, theta, v0 in paths_list:
  # Option price
  path = np.expand_dims(path, -1)
  steps = int(365 * t)
  interest_rates = np.array([0.015] * (steps - 1))
  discount_factors = np.exp(-np.cumsum(interest_rates))
  opt_prices = lsm.least_square_mc(
        path, np.arange(1, steps, 1), payoff_fn, basis_fn,
        discount_factors=discount_factors, dtype=dtype).numpy()
  opt_chain = pd.DataFrame(
        columns=['Price', 
                 'Strike', 
                 'Type', 
                 'Kappa',
                 'Rho',
                 'Theta',
                 'Xi',
                 'V_0', 
                 'Interest Rate', 
                 'Time to Expiration', 
                 'Option Price'])
  opt_chain['Strike'] = s_range
  opt_chain['Option Price'] = opt_prices
  opt_chain['Type'] = 'P'
  opt_chain['Price'] = S
  opt_chain['Interest Rate'] = r
  opt_chain['Time to Expiration'] = t
  opt_chain['Kappa'] = k
  opt_chain['Rho'] = rho
  opt_chain['V_0'] = v0
  opt_chain['Theta'] = theta
  opt_chain['Xi'] = xi

  option_data = pd.concat([option_data, opt_chain], ignore_index=True)

In [None]:
option_data

In [None]:
option_data.to_csv('../data/heston_mc_synthetic_puts.csv')