In [61]:
import sys
import os

# Path to Project-Root
project_root = os.path.abspath("..")

data_path = os.path.join(project_root, "data", "raw")
os.makedirs(data_path, exist_ok=True)

# Notebook for generating dataset

In [62]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from utils.bs_formula import black_scholes_price

In [63]:
# Set random seed for reproducibility
np.random.seed(42)

# Number of samples
N = 300_000

# Preallocate
data = []

The model is trained and optimized to perform accurately within the following input ranges. One can change them at any time if necessary

In [64]:
# Sampling ranges
S_range = (50, 150)
K_range = (50, 150)
T_range = (0.05, 2.0)
r_range = (0.0, 0.1)
sigma_range = (0.05, 0.6)

Iteratively generate the data

In [65]:
for _ in tqdm(range(N)):
    S = np.random.uniform(*S_range)
    K = np.random.uniform(*K_range)
    T = np.random.uniform(*T_range)
    r = np.random.uniform(*r_range)
    sigma = np.random.uniform(*sigma_range)
    option_type = np.random.choice(['call', 'put'])

    price = black_scholes_price(S, K, T, r, sigma, option_type)
    type_code = 0 if option_type == 'call' else 1

    data.append([S, K, T, r, sigma, type_code, price])

# To DataFrame
columns = ['S', 'K', 'T', 'r', 'sigma', 'type', 'price']
df = pd.DataFrame(data, columns=columns)

# Save as CSV
df.to_csv(os.path.join(data_path, "bs_dataset.csv"), index=False)

100%|██████████████████████████████████████████████████████████████████████████████████| 300000/300000 [00:18<00:00, 16653.69it/s]


Information regarding the dataset

In [66]:
print(df.info())
print(df.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300000 entries, 0 to 299999
Data columns (total 7 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   S       300000 non-null  float64
 1   K       300000 non-null  float64
 2   T       300000 non-null  float64
 3   r       300000 non-null  float64
 4   sigma   300000 non-null  float64
 5   type    300000 non-null  int64  
 6   price   300000 non-null  float64
dtypes: float64(6), int64(1)
memory usage: 16.0 MB
None
            S           K         T         r     sigma  type         price
0   87.454012  145.071431  1.477388  0.059866  0.135810     0  3.236445e-02
1   59.997492   95.924889  0.700732  0.014287  0.407989     0  1.055673e+00
2  146.990985  133.244264  0.464061  0.018182  0.150872     1  1.109715e+00
3  111.165316   50.706631  0.094972  0.052477  0.269924     1  6.970430e-22
4   79.214465   86.636184  0.939336  0.078518  0.159821     0  4.323768e+00
