# Generating training/testing data

In this module, we generate the data, randomly sampling the Heston parameters. First we import some packages:

In [2]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import QuantLib as ql
import time

import HestonUtils

The function below generates data sampled from either a narrow or wide Heston range (the other observed parameters like $S$, $K$, $T$, $r$, and $q$ are drawn uniformly from the same range every time).

In [3]:
def generateData(M=1000, data_type='NARROW', SAVE_DATA=False):
    '''
    M: number of data points to generate
    data_type: either 'NARROW' or 'WIDE' depending on which sampling range we want to draw from
    SAVE_DATA: True if we want to save the data in ./data (useful if generating large-sized data), false otherwise
    '''
    # observed parameters: S, K, T, r, q
    Ss = np.random.uniform(low=0.5,high=1.5,size=M)
    Ks = np.array([1]*M)
    Ts = np.random.randint(low=1,high=365*2+1,size=M) / 365.
    rs = np.random.uniform(low=0,high=0.05,size=M)
    qs = np.random.uniform(low=0,high=0.05,size=M)
    
    # observe moneyness: S/K
    observed_params = np.array([Ss/Ks, Ts, rs, qs]).T
    
    # Heston parameters: v_0, kappa, v_bar, sigma, rho
    heston_params = np.zeros((M,5))
    if 'NARROW' in data_type:
        bounds = ((0, 0.5),
                  (0, 3),
                  (0, 0.5),
                  (0, 1),
                  (-0.9, 0))
    else: # WIDE
        bounds = ((0, 1),
                  (0, 10),
                  (0, 1),
                  (0, 2),
                  (-1, 0))
    bounds = np.array(bounds)
    
    for m in range(M):
        heston_params[m] = np.random.uniform(low=bounds[:,0], high=bounds[:,1])
        
        # Feller condition must be satisifed
        while 2*heston_params[m,1]*heston_params[m,2] <= heston_params[m,3]**2:
            heston_params[m] = np.random.uniform(low=bounds[:,0], high=bounds[:,1])
    
    # nn_parameters is what we will feed into our neural network
    nn_parameters = np.hstack((observed_params[:,:2], heston_params, observed_params[:,2:]))
    # ql_parameters is equivalent to nn_parameters but in a form that QuantLib takes
    ql_parameters = HestonUtils.convertNNtoQLparams(nn_parameters)
    
    h_prices = np.array([HestonUtils.QuantlibHestonPrice(*ql_parameters[i]) for i in range(M)])
    
    if SAVE_DATA:
        np.save(f'data/Heston_params_{data_type}.npy', nn_parameters)
        np.save(f'data/Heston_target_{data_type}.npy', h_prices)
        
    return nn_parameters, h_prices

An example of what the data will look like:

In [4]:
np.random.seed(1)
some_data = generateData(M=10, data_type='NARROW', SAVE_DATA=False)
print(some_data[0])
print(some_data[1])

[[ 0.917022    0.69863014  0.14924765  1.33840352  0.11106227  0.07336417
  -0.47768533  0.02286024  0.04148017]
 [ 1.22032449  0.98082192  0.04808613  2.7101105   0.05974523  0.52479938
  -0.8247393   0.02153493  0.0136525 ]
 [ 0.50011437  1.28493151  0.45843067  2.73134515  0.14946506  0.58438912
  -0.39067918  0.04695639  0.00296216]
 [ 0.80233257  0.69315068  0.30696916  2.86960697  0.13048949  0.23101542
  -0.41989636  0.03891946  0.0335264 ]
 [ 0.64675589  1.34520548  0.47496907  1.47917878  0.27030025  0.7654851
  -0.85918884  0.03579853  0.02965328]
 [ 0.59233859  1.83287671  0.22399009  2.67640762  0.18879217  0.53842469
  -0.31293101  0.04013788  0.0335827 ]
 [ 0.68626021  1.09315068  0.18063051  1.71302568  0.31891824  0.12631489
  -0.27881587  0.00464004  0.02058939]
 [ 0.84556073  1.54246575  0.3238747   1.06181727  0.38161653  0.35653172
  -0.22249048  0.02590763  0.00987754]
 [ 0.89676747  1.59178082  0.44067091  0.03500758  0.24905453  0.07379201
  -0.19174367  0.043251