# **Data generation**

In [1]:
import numpy as np
from scipy.stats import norm
from datetime import datetime
import torch
from torch.utils.data import DataLoader, TensorDataset, random_split

In [2]:
def black_model(F, K, T, sigma, option_type='call'):
	# Parameters
	d1 = (np.log(F / K) + (sigma**2 / 2) * T) / (sigma * np.sqrt(T))
	d2 = d1 - sigma * np.sqrt(T)

	# Discount factor (assuming risk-free rate is 0)
	DF_T = 1

	if option_type == 'call':
		return DF_T * (F * norm.cdf(d1) - K * norm.cdf(d2))
	elif option_type == 'put':
		return DF_T * (K * norm.cdf(-d2) - F * norm.cdf(-d1))
	else:
		raise ValueError("option_type must be 'call' or 'put'")

def generate_data(num_samples, S=1):
	# Generate random parameters
	K = np.random.uniform(1, 2.5, num_samples)
	T = np.random.uniform(0.004, 4, num_samples)
	sigma = np.random.uniform(0.1, 0.5, num_samples)


	call_prices = black_model(S, K, T, sigma, option_type='call')

	# Prepare input data matrix X
	d1 = (np.log(S/K) + sigma**2*T/2) / (sigma * np.sqrt(T))
	X = np.vstack((K, T, sigma, np.log(S/K), np.sqrt(T), T*sigma**2)).T
	y = call_prices

	return X, y

In [3]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

In [None]:
def generate_static_test_data(num_test_samples=100000, file_name='static_test_data.pt'):
	global scaler

	# Generate test data
	X_test, y_test = generate_data(num_samples=num_test_samples)

	# Apply the same scaling as used for training
	X_test_scaled = scaler.transform(X_test)  # Use scaler fitted on training data

	# Convert to tensors
	X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float64)
	y_test_tensor = torch.tensor(y_test, dtype=torch.float64).unsqueeze(1)

	# Save the test data
	torch.save((X_test_tensor, y_test_tensor), file_name)

def load_static_test_data(file_name='static_test_data.pt'):
	# Load test data from the file
	X_test_tensor, y_test_tensor = torch.load(file_name)
	return X_test_tensor, y_test_tensor

In [None]:
def generate_and_prepare_training_data(num_train_samples=1000000, batch_size=128):
	global scaler

	# Generate training data
	X_train, y_train = generate_data(num_samples=num_train_samples)

	# Fit the scaler on the training data
	X_train_scaled = scaler.fit_transform(X_train)  # Fit and scale training data

	# Convert to tensors
	X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float64)
	y_train_tensor = torch.tensor(y_train, dtype=torch.float64).unsqueeze(1)

	# Create DataLoader
	train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
	train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

	return train_loader

# **Train**

## Model definition

In [6]:
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
from torch import nn
import torch.nn.functional as F

In [91]:
class FeedforwardNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(FeedforwardNN, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, hidden_size, dtype=torch.float64),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size, dtype=torch.float64),
            nn.ReLU(),
            nn.Linear(hidden_size, output_size, dtype=torch.float64)
        )
        self.name = "FFN model"

    def forward(self, x):
        x = x.to(torch.float64)  # Ensure input is float64
        return self.model(x)

In [90]:
class ResidualBlock(nn.Module):
    def __init__(self, dim):
        super(ResidualBlock, self).__init__()
        self.layer = nn.Sequential(
            nn.Linear(dim, dim, dtype=torch.float64),
            nn.ReLU(),
            nn.Linear(dim, dim, dtype=torch.float64)
        )

    def forward(self, x):
        x = x.to(torch.float64)  # Ensure input is float64
        return x + self.layer(x)

class ResNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_blocks):
        super(ResNet, self).__init__()
        self.input_layer = nn.Linear(input_size, hidden_size, dtype=torch.float64)
        self.blocks = nn.Sequential(*[ResidualBlock(hidden_size) for _ in range(num_blocks)])
        self.output_layer = nn.Linear(hidden_size, output_size, dtype=torch.float64)
        self.name = "ResNet model"

    def forward(self, x):
        x = x.to(torch.float64)  # Ensure input is float64
        x = torch.relu(self.input_layer(x))
        x = self.blocks(x)
        return self.output_layer(x)

In [89]:
class SIRENLayer(nn.Module):
    def __init__(self, input_size, out_size, omega=30.0, is_first=False):
        super().__init__()
        self.input_size = input_size
        self.out_size = out_size
        self.is_first = is_first
        self.omega = omega

        # Linear layer
        self.linear = nn.Linear(input_size, out_size, dtype=torch.float64)

        # Initialize weights
        self.init_weights()

    def init_weights(self):
        with torch.no_grad():
            if self.is_first:
                self.linear.weight.uniform_(-1 / self.input_size, 1 / self.input_size)
            else:
                self.linear.weight.uniform_(-np.sqrt(6 / self.input_size) / self.omega,
                                             np.sqrt(6 / self.input_size) / self.omega)

    def forward(self, x):
        x = x.to(torch.float64)  # Ensure input is float64
        return torch.sin(self.omega * self.linear(x))

class SIREN(nn.Module):
    def __init__(self, input_size, hidden_size, hidden_layers, out_size, omega=30.0):
        super().__init__()

        # First layer
        self.layers = [SIRENLayer(input_size, hidden_size, omega=omega, is_first=True)]

        # Hidden layers
        for _ in range(hidden_layers):
            self.layers.append(SIRENLayer(hidden_size, hidden_size, omega=omega))

        # Final layer
        self.layers.append(nn.Linear(hidden_size, out_size, dtype=torch.float64))

        # Combine into a sequential module
        self.net = nn.Sequential(*self.layers)
        self.name = "SIREN model"

    def forward(self, x):
        x = x.to(torch.float64)  # Ensure input is float64
        return self.net(x)

In [88]:
class CNN(nn.Module):
    def __init__(self, input_channels, output_size):
        super(CNN, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(input_channels, 16, kernel_size=3, padding=1, dtype=torch.float64),
            nn.ReLU(),
            nn.Conv2d(16, 32, kernel_size=3, padding=1, dtype=torch.float64),
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(32 * 8 * 8, output_size, dtype=torch.float64)  # Assuming input is 8x8 grid
        )
        self.name = "CNN model"

    def forward(self, x):
        x = x.to(torch.float64)  # Ensure input is float64
        return self.model(x)

In [83]:
class TransformerModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_heads, num_layers, output_size):
        super(TransformerModel, self).__init__()
        self.embedding = nn.Linear(input_size, hidden_size, dtype=torch.float64)
        self.encoder_layer = nn.TransformerEncoderLayer(
            d_model=hidden_size, nhead=num_heads, dtype=torch.float64
        )
        self.transformer = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
        self.output_layer = nn.Linear(hidden_size, output_size, dtype=torch.float64)
        self.name = "Transformer model"

    def forward(self, x):
        x = x.to(torch.float64)  # Ensure input is float64
        x = self.embedding(x).unsqueeze(1)  # Add sequence dimension
        x = self.transformer(x)
        return self.output_layer(x.squeeze(1))

In [84]:
class Autoencoder(nn.Module):
    def __init__(self, input_size, hidden_size, latent_dim):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_size, hidden_size, dtype=torch.float64),
            nn.ReLU(),
            nn.Linear(hidden_size, latent_dim, dtype=torch.float64)
        )
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, hidden_size, dtype=torch.float64),
            nn.ReLU(),
            nn.Linear(hidden_size, input_size, dtype=torch.float64)
        )
        self.name = "Autoencoder model"

    def forward(self, x):
        x = x.to(torch.float64)  # Ensure input is float64
        latent = self.encoder(x)
        return self.decoder(latent)

In [87]:
class PINN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(PINN, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, hidden_size, dtype=torch.float64),
            nn.Tanh(),
            nn.Linear(hidden_size, hidden_size, dtype=torch.float64),
            nn.Tanh(),
            nn.Linear(hidden_size, output_size, dtype=torch.float64)
        )
        self.name = "PINN model"

    def forward(self, x):
        x = x.to(torch.float64)  # Ensure input is float64
        return self.model(x)

In [82]:
class MDN(nn.Module):
    def __init__(self, input_size, hidden_size, num_components, output_size):
        super(MDN, self).__init__()
        self.hidden_layer = nn.Linear(input_size, hidden_size, dtype=torch.float64)
        self.alpha_layer = nn.Linear(hidden_size, num_components, dtype=torch.float64)  # Mixing coefficients
        self.mu_layer = nn.Linear(hidden_size, num_components * output_size, dtype=torch.float64)  # Means
        self.sigma_layer = nn.Linear(hidden_size, num_components * output_size, dtype=torch.float64)  # Variances
        self.name = "MDN model"

    def forward(self, x):
        x = x.to(torch.float64)  # Ensure input is float64
        x = torch.relu(self.hidden_layer(x))
        alphas = torch.softmax(self.alpha_layer(x), dim=-1)
        mus = self.mu_layer(x)
        sigmas = torch.exp(self.sigma_layer(x))  # Ensure positivity
        return alphas, mus, sigmas

In [None]:
class KernelAttentionLayer(nn.Module):
	def __init__(self, input_size, output_size):
		super(KernelAttentionLayer, self).__init__()
		self.query_layer = nn.Linear(input_size, output_size, dtype=torch.float64)
		self.key_layer = nn.Linear(input_size, output_size, dtype=torch.float64)
		self.value_layer = nn.Linear(input_size, output_size, dtype=torch.float64)
		self.scale = 1.0 / np.sqrt(output_size)

	def forward(self, x):
		Q = self.query_layer(x)
		K = self.key_layer(x)
		V = self.value_layer(x)

		attention_weights = torch.softmax(Q @ K.T * self.scale, dim=-1)
		output = attention_weights @ V

		return output

In [None]:
class BlackScholesNet(nn.Module):
	def __init__(self, input_size, hidden_size):
		super(BlackScholesNet, self).__init__()
		self.kernel_attention = KernelAttentionLayer(input_size, hidden_size)
		self.fc1 = nn.Linear(hidden_size, hidden_size, dtype=torch.float64)
		self.fc2 = nn.Linear(hidden_size, 1, dtype=torch.float64)
		self.name = 'KAN Black Scholes'

	def forward(self, x):
		x = self.kernel_attention(x)
		x = torch.tanh(self.fc1(x))
		output = self.fc2(x)

		return output

In [89]:
class BlackScholesNet(nn.Module):
	def __init__(self, input_size=1, hidden_size=128, output_size=1, dropout_p = 0.33):
		super(BlackScholesNet, self).__init__()
		self.fc1 = nn.Linear(input_size, hidden_size, dtype=torch.float64)
		self.bn1 = nn.BatchNorm1d(hidden_size, dtype=torch.float64)  # Batch Normalization
		self.fc2 = nn.Linear(hidden_size, hidden_size, dtype=torch.float64)
		self.bn2 = nn.BatchNorm1d(hidden_size, dtype=torch.float64)  # Batch Normalization
		self.fc3 = nn.Linear(hidden_size, output_size, dtype=torch.float64)
		self.dropout = nn.Dropout(p=dropout_p)  # Dropout for regularization
		self.name = 'Black Model'

	def forward(self, x, K):
		x = F.tanh(self.bn1(self.fc1(x)))  # Tanh and Batch Normalization
		x = self.dropout(x)  # Dropout
		x = F.tanh(self.bn2(self.fc2(x)))  # Tanh and Batch Normalization
		x = self.fc3(x)
		x1, x2 = x[:, [0]], x[:, [1]]
		return F.sigmoid(x1) - K * F.sigmoid(x2)

In [14]:
class BlackScholesNet(nn.Module):
	def __init__(self, input_size=1, hidden_size=128, output_size=1, dropout_p = 0.33):
		super(BlackScholesNet, self).__init__()
		self.fc1 = nn.Linear(input_size, hidden_size, dtype=torch.float64)
		self.bn1 = nn.BatchNorm1d(hidden_size, dtype=torch.float64)  # Batch Normalization
		self.fc2 = nn.Linear(hidden_size, hidden_size, dtype=torch.float64)
		self.bn2 = nn.BatchNorm1d(hidden_size, dtype=torch.float64)  # Batch Normalization
		self.fc3 = nn.Linear(hidden_size, output_size, dtype=torch.float64)
		self.dropout = nn.Dropout(p=dropout_p)  # Dropout for regularization
		self.name = 'Black Model without activation'

	def forward(self, x):
		x = F.tanh(self.bn1(self.fc1(x)))  # Tanh and Batch Normalization
		x = self.dropout(x)  # Dropout
		x = F.tanh(self.bn2(self.fc2(x)))  # Tanh and Batch Normalization
		x = self.fc3(x)
		return x

In [27]:
class BlackScholesNet(nn.Module):
	def __init__(self, input_size=1, hidden_size=128, output_size=1, dropout_p=0.3):
		super(BlackScholesNet, self).__init__()
		self.fc1 = nn.Linear(input_size, hidden_size, dtype=torch.float64)
		self.bn1 = nn.BatchNorm1d(hidden_size, dtype=torch.float64)
		self.fc2 = nn.Linear(hidden_size, hidden_size, dtype=torch.float64)
		self.bn2 = nn.BatchNorm1d(hidden_size, dtype=torch.float64)
		self.fc_out = nn.Linear(hidden_size, output_size, dtype=torch.float64)
		self.dropout = nn.Dropout(p=dropout_p)
		self.name = 'Black ResNet Model'

	def forward(self, x):
		residual = self.fc1(x)  # Transform residual to match shape of x
		x = F.relu(self.bn1(self.fc1(x)))
		x = self.dropout(x)
		x = F.relu(self.bn2(self.fc2(x)))
		x = x + residual  # Adding the skip connection after matching dimensions
		x = self.fc_out(x)
		return x

## Generate test data

In [None]:
num_test_samples = 2000
batch_size = 256

In [56]:
# generate_static_test_data(num_test_samples)

In [None]:
X_test_tensor, y_test_tensor = load_static_test_data()

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

## Finding optimal hyperparameters

### Define the device

In [42]:
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device('cpu')

### Run the search

In [270]:
train_loader = generate_and_prepare_training_data(10000, batch_size)

In [271]:
X_test_tensor, y_test_tensor = load_static_test_data('static_test_data_s.pt')

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

  X_test_tensor, y_test_tensor = torch.load(file_name)


In [None]:
import optuna
import torch
import torch.nn.functional as F

def objective(trial):
	# Define hyperparameters to be optimized
	input_size = 4
	hidden_size = 128
	dropout_p = trial.suggest_float('dropout_p', 0.1, 0.5)
	lr = trial.suggest_float('lr', 1e-5, 1e-1, log=True)

	model = BlackScholesNet(input_size=input_size, hidden_size=hidden_size, output_size=1)
	model.dropout.p = dropout_p

	optimizer = torch.optim.NAdam(model.parameters(), lr=lr)

	scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3)

	num_epochs = 100
	for epoch in range(num_epochs):
		model.train()
		epoch_mse = 0
		epoch_mae = 0
		epoch_mre = 0

		for X_batch, y_batch in train_loader:
			X_batch, y_batch = X_batch.to(device), y_batch.to(device)

			optimizer.zero_grad()

			outputs = model(X_batch)
			# outputs = (outputs[:, 0] + outputs[:, 1]) / 2
			y = y_batch

			# mse_loss = F.mse_loss(outputs, y)
			mae_loss = F.l1_loss(outputs, y)
			# relative_errors = torch.abs(outputs - y) / (y + 1e-8)
			# mre_loss = relative_errors.mean()

			mae_loss.backward()
			optimizer.step()

			# epoch_mse += mse_loss.item()
			epoch_mae += mae_loss.item()
			# epoch_mre += mre_loss.item()

		avg_epoch_mae = epoch_mae / len(train_loader)
		scheduler.step(avg_epoch_mae)

	test_losses = 0.
	test_maes = 0.
	test_max_aes = 0.
	test_mres = 0.
	test_max_res = 0.

	model.eval()

	with torch.inference_mode():
		for X_batch, y_batch in test_loader:
			X_batch, y_batch = X_batch.to(device), y_batch.to(device)

			# Forward pass
			outputs = model(X_batch)
			# outputs = (outputs[:, 0] + outputs[:, 1]) / 2

			y = y_batch

			# Mean Absolute Error (MAE)
			abs_errors = torch.abs(outputs - y)
			test_maes += abs_errors.sum().item()

	# avg_test_loss = test_losses / len(test_loader.dataset)
	avg_test_mae = test_maes / len(test_loader.dataset)
	# avg_test_mre = test_mres / len(test_loader.dataset)

	# Return the average MAE as the objective to be minimized
	return avg_test_mae

  from .autonotebook import tqdm as notebook_tqdm


In [273]:
# Create Optuna study and optimize
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=500)

# Print best parameters and best value
print("Best hyperparameters:", study.best_params)
print("Best MAE:", study.best_value)

[I 2024-11-13 02:13:30,923] A new study created in memory with name: no-name-4d12a20f-78bb-4a37-b8de-b1d08383787d
[I 2024-11-13 02:13:49,594] Trial 0 finished with value: 0.008538148411389517 and parameters: {'dropout_p': 0.29188936206204374, 'lr': 3.6650461245806255e-05}. Best is trial 0 with value: 0.008538148411389517.
[I 2024-11-13 02:14:08,364] Trial 1 finished with value: 0.01606481424222981 and parameters: {'dropout_p': 0.4927096150720369, 'lr': 2.0102768896209213e-05}. Best is trial 0 with value: 0.008538148411389517.
[I 2024-11-13 02:14:27,561] Trial 2 finished with value: 0.006898602722963613 and parameters: {'dropout_p': 0.2903698277645128, 'lr': 0.0002805596088872334}. Best is trial 2 with value: 0.006898602722963613.
[I 2024-11-13 02:14:46,363] Trial 3 finished with value: 0.001978334345428275 and parameters: {'dropout_p': 0.32277925052099243, 'lr': 0.009128901482677115}. Best is trial 3 with value: 0.001978334345428275.
[I 2024-11-13 02:15:05,228] Trial 4 finished with va

Best hyperparameters: {'dropout_p': 0.10830718268190546, 'lr': 0.09638266753852863}
Best MAE: 0.0005869729237254167


Best hyperparameters: {'dropout_p': 0.10830718268190546, 'lr': 0.09638266753852863}
Best MAE: 0.0005869729237254167

Best hyperparameters: {'dropout_p': 0.20862014926048447, 'lr': 0.0002448376394581503}
Best MAE: 0.00049047276004533371

## Prepare the training data

In [15]:
num_train_samples = 100000
batch_size = 128

In [16]:
train_loader = generate_and_prepare_training_data(num_train_samples, batch_size)
n_features = train_loader.dataset.tensors[0].shape[1]

## Model initialization

In [39]:
dropout_p = 0.10830718268190546
lr = 0.09638266753852863

In [104]:
models = []
optimizers = []
schedulers = []

In [105]:
models.append( BlackScholesNet(input_size=n_features, output_size=1, hidden_size=128, dropout_p=dropout_p))
models.append( FeedforwardNN(input_size=n_features, output_size=1, hidden_size=128))
models.append( ResNet(input_size=n_features, output_size=1, hidden_size=128, num_blocks=4))
models.append( SIREN(input_size=n_features, out_size=1, hidden_size=128, hidden_layers=4))
# models.append( CNN(input_channels=n_features, output_size=1))
models.append( TransformerModel(input_size=n_features, output_size=1, hidden_size=128, num_heads=4, num_layers=4))
models.append( Autoencoder(input_size=n_features, hidden_size=128, latent_dim=4))
models.append( PINN(input_size=n_features, output_size=1, hidden_size=128))
# models.append( MDN(input_size=n_features, output_size=1, hidden_size=128, num_components=64))

In [106]:
for model in models:
	optimizer = optim.NAdam(model.parameters(), lr=lr)
	scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3)
	optimizers.append(optimizer)
	schedulers.append(scheduler)

In [92]:
# model.load_state_dict(torch.load('models\\Black Model_mae.pth'))

In [107]:
for model in models:
	model.to(device)

Best hyperparameters: {'hidden_size': 256, 'dropout_p': 0.16165214075232218, 'lr': 0.0024867405570057574, 'batch_size': 64, 'optimizer': 'NAdam'}
Best MAE: 0.003318011008932989

Best hyperparameters: {'hidden_size': 128, 'dropout_p': 0.3974039374569882, 'lr': 0.012408717790861197, 'batch_size': 128, 'optimizer': 'NAdam'}
Best MAE: 0.008668262018621945

## Training loop

### Parameters

In [44]:
num_epochs = 100
num_stages = 1

In [45]:
def weights_init(m):
	if isinstance(m, nn.Linear):
		nn.init.xavier_uniform_(m.weight)
		if m.bias is not None:
			nn.init.constant_(m.bias, 0)

In [108]:
for model in models:
	model.apply(weights_init)

### Huber Loss

In [241]:
def huber_loss(y_pred, y_true, delta=1.0):
	error = y_true - y_pred
	is_small_error = torch.abs(error) <= delta
	small_error_loss = 0.5 * error**2
	large_error_loss = delta * (torch.abs(error) - 0.5 * delta)

	return torch.where(is_small_error, small_error_loss, large_error_loss).mean()

In [None]:
for stage in range(num_stages):
	print(f"[ {datetime.now().strftime("%H:%M:%S")} ] ***** Stage [{stage+1}/{num_stages}] {'*'*150}")

	for epoch in range(num_epochs):
		epoch_huber = 0.
		epoch_mae = 0.
		epoch_mre = 0.
		epoch_mse = 0.

		for X_batch, y_batch in train_loader:
			X_batch, y_batch = X_batch.to(device), y_batch.to(device)
			# X_batch = X_batch.unsqueeze(1)  # Add a dimension for sequence length
			model.train()

			outputs = model(X_batch)
			# outputs = (outputs[:, 0] + outputs[:, 1] ) / 2

			y = y_batch

			# Calculate losses
			hub_loss = huber_loss(outputs, y, 0.02)
			mse_loss = F.mse_loss(outputs, y)
			mae_loss = F.l1_loss(outputs, y)
			mask = y >= 1e-10
			y_m = y[mask]
			relative_errors = torch.abs(outputs[mask] - y_m ) / y_m
			mre_loss = relative_errors.mean()

			# Backpropagation
			optimizer.zero_grad()
			hub_loss.backward()
			optimizer.step()

			# Accumulate losses
			epoch_mse += mse_loss.item()
			epoch_mae += mae_loss.item()
			epoch_mre += mre_loss.item()
			epoch_huber += hub_loss

		print(f"[ {datetime.now().strftime("%H:%M:%S")} ] ----- Epoch [{epoch+1}/{num_epochs}] {'-'*150}")

		avg_epoch_huber = epoch_huber / len(train_loader)
		avg_epoch_mse = epoch_mse / len(train_loader)
		avg_epoch_mae = epoch_mae / len(train_loader)
		avg_epoch_mre = epoch_mre / len(train_loader)

		print(f"{model.name:<50} | Huber loss: {avg_epoch_huber:<25} | MSE: {avg_epoch_mse:<25} | MAE: {avg_epoch_mae:<25} | MRE: {avg_epoch_mre:<25} |")

		scheduler.step(avg_epoch_huber)

[ 01:57:17 ] ***** Stage [1/1] ******************************************************************************************************************************************************
[ 01:57:19 ] ----- Epoch [1/100] ------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model without activation                     | Huber loss: 0.0006164860572768361     | MSE: 0.0028085483186428215     | MAE: 0.03959589209837654       | MRE: 1238187.8357990088        |
[ 01:57:22 ] ----- Epoch [2/100] ------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model without activation                     | Huber loss: 0.00014561837257630244    | MSE: 0.0003577709197416124     | MAE: 0.014340423312230238      | MRE: 476061.4230647505         |
[ 01:57:24 ] ----- Epoch [3/100] ---------------------------------

### Log-cosh loss

In [None]:
for stage in range(num_stages):
	print(f"[ {datetime.now().strftime("%H:%M:%S")} ] ***** Stage [{stage+1}/{num_stages}] {'*'*150}")

	for epoch in range(num_epochs):
		cnt = 0
		epoch_lcosh = 0.
		epoch_mae = 0.
		epoch_mre = 0.
		epoch_mse = 0.

		for X_batch, y_batch in train_loader:
			X_batch, y_batch = X_batch.to(device), y_batch.to(device)

			model.train()

			outputs = model(X_batch, X_batch[:, 0])
			outputs = (outputs[:, 0] + outputs[:, 1] ) / 2
			y = y_batch[:, 0]

			# Calculate losses
			lcosh_loss = torch.mean(torch.log(torch.cosh(outputs-y)))
			mse_loss = F.mse_loss(outputs, y)
			mae_loss = F.l1_loss(outputs, y)
			mask = y >= 1e-10
			y_m = y[mask]
			relative_errors = torch.abs(outputs[mask] - y_m ) / y_m
			mre_loss = relative_errors.sum()

			cnt += len(relative_errors)

			# Backpropagation
			optimizer.zero_grad()
			lcosh_loss.backward()
			optimizer.step()

			# Accumulate losses
			epoch_mse += mse_loss.item()
			epoch_mae += mae_loss.item()
			epoch_mre += mre_loss.item()
			epoch_lcosh += hub_loss

		print(f"[ {datetime.now().strftime("%H:%M:%S")} ] ----- Epoch [{epoch+1}/{num_epochs}] {'-'*150}")

		avg_epoch_lcosh = epoch_lcosh / len(train_loader)
		avg_epoch_mse = epoch_mse / len(train_loader)
		avg_epoch_mae = epoch_mae / len(train_loader)
		avg_epoch_mre = epoch_mre / cnt

		print(f"{model.name:<50} | Huber loss: {avg_epoch_lcosh:<25} | MSE: {avg_epoch_mse:<25} | MAE: {avg_epoch_mae:<25} | MRE: {avg_epoch_mre:<25} |")

		scheduler.step(avg_epoch_mae)

[ 18:51:10 ] ***** Stage [1/1] ******************************************************************************************************************************************************
[ 18:51:30 ] ----- Epoch [1/30] ------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model                                        | Huber loss: 2.805671036771152e-05     | MSE: 0.01731447072122037       | MAE: 0.089106594409645         | MRE: 3058633.5212305137        |
[ 18:51:50 ] ----- Epoch [2/30] ------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model                                        | Huber loss: 2.805671036771152e-05     | MSE: 0.0010450003118724588     | MAE: 0.023971658696214888      | MRE: 771303.588995485          |
[ 18:52:09 ] ----- Epoch [3/30] ------------------------------------

### MSE Only

In [None]:
for epoch in range(num_epochs):
	# Initialize epoch metrics for each model
	epoch_mae = [0.] * len(models)
	epoch_mre = [0.] * len(models)
	epoch_mse = [0.] * len(models)

	for X_batch, y_batch in train_loader:
		X_batch, y_batch = X_batch.to(device), y_batch.to(device)

		# Loop over each model
		for i, model in enumerate(models):
			model.train()

			# Forward pass
			outputs = model(X_batch)
			outputs = outputs[:, 0]
			y = y_batch[:, 0]

			# Calculate losses
			mse_loss = F.mse_loss(outputs, y)
			mae_loss = F.l1_loss(outputs, y)
			mask = y >= 1e-10
			y_m = y[mask]
			relative_errors = torch.abs(outputs[mask] - y_m ) / y_m
			mre_loss = relative_errors.mean()

			# Backpropagation
			optimizers[i].zero_grad()
			mse_loss.backward()
			optimizers[i].step()

			# Accumulate losses for this model
			epoch_mse[i] += mse_loss.item()
			epoch_mae[i] += mae_loss.item()
			epoch_mre[i] += mre_loss.item()

	print(f"[ {datetime.now().strftime("%H:%M:%S")} ] ----- Epoch [{epoch+1}/{num_epochs}] {'-'*150}")
	# Average metrics for each model
	for i in range(len(models)):
		avg_epoch_mse = epoch_mse / len(train_loader)
		avg_epoch_mae = epoch_mae / len(train_loader)
		avg_epoch_mre = epoch_mre / len(train_loader)

		print(f"{models.name:<50} | MSE: {avg_epoch_mse:<25} | MAE: {avg_epoch_mae:<25} | MRE: {avg_epoch_mre:<25} |")

		# Scheduler step
		schedulers[i].step(avg_epoch_mse)

Black Model without activation is training
FFN model is training
ResNet model is training
SIREN model is training
Transformer model is training
Autoencoder model is training
PINN model is training
Black Model without activation is training
FFN model is training
ResNet model is training
SIREN model is training
Transformer model is training
Autoencoder model is training
PINN model is training
Black Model without activation is training
FFN model is training
ResNet model is training
SIREN model is training
Transformer model is training
Autoencoder model is training
PINN model is training
Black Model without activation is training
FFN model is training
ResNet model is training
SIREN model is training
Transformer model is training
Autoencoder model is training
PINN model is training
Black Model without activation is training
FFN model is training
ResNet model is training
SIREN model is training
Transformer model is training
Autoencoder model is training
PINN model is training
Black Model wit

KeyboardInterrupt: 

### MAE Only

In [None]:
for stage in range(num_stages):
	print(f"[ {datetime.now().strftime("%H:%M:%S")} ] ***** Stage [{stage+1}/{num_stages}] {'*'*150}")

	for epoch in range(num_epochs):
		epoch_mae = 0.
		epoch_mre = 0.
		epoch_mse = 0.

		for X_batch, y_batch in train_loader:
			X_batch, y_batch = X_batch.to(device), y_batch.to(device)

			model.train()

			outputs = model(X_batch)
			# outputs = (outputs[:, 0] + outputs[:, 1] ) / 2
			outputs=outputs[:, 0]
			y = y_batch[:, 0]

			# Calculate losses
			mse_loss = F.mse_loss(outputs, y)
			mae_loss = F.l1_loss(outputs, y)
			mask = y >= 1e-10
			y_m = y[mask]
			relative_errors = torch.abs(outputs[mask] - y_m ) / y_m
			mre_loss = relative_errors.mean()

			# Backpropagation
			optimizer.zero_grad()
			mae_loss.backward()
			optimizer.step()

			# Accumulate losses
			epoch_mse += mse_loss.item()
			epoch_mae += mae_loss.item()
			epoch_mre += mre_loss.item()

		print(f"[ {datetime.now().strftime("%H:%M:%S")} ] ----- Epoch [{epoch+1}/{num_epochs}] {'-'*150}")

		avg_epoch_mse = epoch_mse / len(train_loader)
		avg_epoch_mae = epoch_mae / len(train_loader)
		avg_epoch_mre = epoch_mre / len(train_loader)

		print(f"{model.name:<50} | MSE: {avg_epoch_mse:<25} | MAE: {avg_epoch_mae:<25} | MRE: {avg_epoch_mre:<25} |")

		scheduler.step(avg_epoch_mae)

[ 02:36:41 ] ***** Stage [1/1] ******************************************************************************************************************************************************
[ 02:36:43 ] ----- Epoch [1/100] ------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model without activation                     | MSE: 0.2033266645362455        | MAE: 0.1933763983580105        | MRE: 6175101.603659511         |
[ 02:36:45 ] ----- Epoch [2/100] ------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model without activation                     | MSE: 0.04957884548806898       | MAE: 0.1329738964021373        | MRE: 4241745.057257743         |
[ 02:36:47 ] ----- Epoch [3/100] -----------------------------------------------------------------------------------------------------------------

### MRE Only

In [None]:
# for epoch in range(num_epochs):
# 	# Initialize epoch metrics for each model
# 	epoch_mae = [0.] * len(models)
# 	epoch_mre = [0.] * len(models)
# 	epoch_mse = [0.] * len(models)

# 	for X_batch, y_batch in train_loader:
# 		X_batch, y_batch = X_batch.to(device), y_batch.to(device)

# 		# Loop over each model
# 		for i, model in enumerate(models):
# 			model.train()

# 			# Forward pass
# 			outputs = model(X_batch, X_batch[:, 0])
# 			outputs = outputs[:, 0] + outputs[:, 1] / X_batch[:, 0]
# 			y = y_batch[:, 0]

# 			# Calculate losses
# 			mse_loss = F.mse_loss(outputs, y)
# 			mae_loss = F.l1_loss(outputs, y)
# 			relative_errors = torch.abs(outputs - y) / (y + 1e-8)
# 			mre_loss = relative_errors.mean()

# 			# Backpropagation
# 			optimizers.zero_grad()
# 			mre_loss.backward()
# 			optimizers.step()

# 			# Accumulate losses for this model
# 			epoch_mse += mse_loss.item()
# 			epoch_mae += mae_loss.item()
# 			epoch_mre += mre_loss.item()

# 	print(f"[ {datetime.now().strftime("%H:%M:%S")} ] ----- Epoch [{epoch+1}/{num_epochs}] {'-'*150}")
# 	# Average metrics for each model
# 	for i in range(len(models)):
# 		avg_epoch_mse = epoch_mse / len(train_loader)
# 		avg_epoch_mae = epoch_mae / len(train_loader)
# 		avg_epoch_mre = epoch_mre / len(train_loader)

# 		print(f"{models.name:<50} | MSE: {avg_epoch_mse:<25} | MAE: {avg_epoch_mae:<25} | MRE: {avg_epoch_mre:<25} |")

# 		# Scheduler step
# 		schedulers.step(avg_epoch_mre)


### Combined loss

In [None]:
# for epoch in range(num_epochs):
# 	# Initialize epoch metrics for each model
# 	epoch_mae = [0.] * len(models)
# 	epoch_mre = [0.] * len(models)
# 	epoch_mse = [0.] * len(models)

# 	for X_batch, y_batch in train_loader:
# 		X_batch, y_batch = X_batch.to(device), y_batch.to(device)

# 		# Loop over each model
# 		for i, model in enumerate(models):
# 			model.train()

# 			# Forward pass
# 			outputs = model(X_batch, X_batch[:, 0])
# 			outputs = outputs[:, 0] + outputs[:, 1] / X_batch[:, 0]
# 			y = y_batch[:, 0]

# 			# Calculate losses
# 			mse_loss = F.mse_loss(outputs, y)
# 			mae_loss = F.l1_loss(outputs, y)
# 			relative_errors = torch.abs(outputs - y) / (y + 1e-8)
# 			mre_loss = relative_errors.mean()

# 			# Composite loss
# 			composite_loss = (weights_mse * mse_loss +
# 							  weights_mae * mae_loss +
# 							  weights_mre * mre_loss)

# 			# Backpropagation
# 			optimizers.zero_grad()
# 			composite_loss.backward()
# 			optimizers.step()

# 			# Accumulate losses for this model
# 			epoch_mse += mse_loss.item()
# 			epoch_mae += mae_loss.item()
# 			epoch_mre += mre_loss.item()

# 	print(f"[ {datetime.now().strftime("%H:%M:%S")} ] ----- Epoch [{epoch+1}/{num_epochs}] {'-'*150}")
# 	# Compute average metrics for each model
# 	for i in range(len(models)):
# 		avg_epoch_mse = epoch_mse / len(train_loader)
# 		avg_epoch_mae = epoch_mae / len(train_loader)
# 		avg_epoch_mre = epoch_mre / len(train_loader)

# 		print(f"{models.name:<50} | MSE: {avg_epoch_mse:<25} | MAE: {avg_epoch_mae:<25} | MRE: {avg_epoch_mre:<25} |")

# 		# Update weights for loss adjustment
# 		weights_mse = avg_epoch_mse / target_loss
# 		weights_mae = avg_epoch_mae / target_loss
# 		weights_mre = avg_epoch_mre / target_loss

# 		# Scheduler step
# 		schedulers.step(composite_loss)


### Saving model

In [46]:
model_path = f"models/{model.name}_mae_52.pth"

In [47]:
torch.save(model.state_dict(), model_path)

# **Evaluation**

### Loading a model

In [40]:
models_path = 'models'
models = []

In [None]:
import os

for model_name in [m for m in os.listdir(models_path) if m.endswith('pth')]:
	print(model_name)
	# model = BlackScholesNet(input_size=5, hidden_size=128, output_size=2)
	# model.load_state_dict(torch.load(os.path.join(models_path, model_name)))
	# model.to(device)

	# models.append(model)

In [None]:
model = BlackScholesNet(input_size=5, hidden_size=128, output_size=2)
model.load_state_dict(torch.load(f"models\\black.pth"))
model.to(device)

### Generate test data

In [135]:
num_test_samples = 20000
batch_size = 128

In [136]:
generate_static_test_data(num_test_samples, 'static_test_data_m.pt')

In [137]:
X_test_tensor, y_test_tensor = load_static_test_data('static_test_data_m.pt')

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

  X_test_tensor, y_test_tensor = torch.load(file_name)


### Validation

In [None]:
# Initialize metrics for each model
test_losses = 0.
test_maes = 0.
test_max_aes = 0.
test_mres = 0.
test_max_res = 0.
cnt = 0

model.eval()

with torch.inference_mode():
	for X_batch, y_batch in test_loader:
		X_batch, y_batch = X_batch.to(device), y_batch.to(device)

		# Forward pass
		outputs = model(X_batch)
		# outputs = (outputs[:, 0] + outputs[:, 1]) / 2
		outputs = outputs[:, 0]

		y = y_batch[:, 0]

		# Mean Squared Error (MSE)
		mse_loss = F.mse_loss(outputs, y)
		test_losses += mse_loss.item()

		# Mean Absolute Error (MAE)
		abs_errors = torch.abs(outputs - y)
		test_maes += abs_errors.sum().item()

		# Maximum Absolute Error (Max AE)
		max_ae = abs_errors.max().item()
		test_max_aes = max(test_max_aes, max_ae)

		# Mean Relative Error (MRE)
		mask = y >= 1e-10
		y_m = y[mask]
		relative_errors = torch.abs(outputs[mask] - y_m ) / y_m

		# Calculate MRE
		test_mres += relative_errors.sum().item()
		cnt += len(relative_errors)

		# Calculate max relative error
		test_max_res = max(test_max_res, relative_errors.max().item())


# Calculate the average metrics over all test samples for each model
avg_test_loss = test_losses / len(test_loader.dataset)
avg_test_mae = test_maes / len(test_loader.dataset)
avg_test_mre = test_mres / cnt

print('-'*250)
print(f"{model.name:<50} Results | MSE: {avg_test_loss:<25} | MAE: {avg_test_mae:<25} | Max AE: {test_max_aes:<25} | MRE: {avg_test_mre:<25} | Max RE: {test_max_res:<25}")


----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model without activation                     Results | MSE: 4.2055868646587654e-07    | MAE: 0.00451268110162455       | Max AE: 0.08146280012130169       | MRE: 10940.277957825232        | Max RE: 9272840.882569332        


----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model without activation					 Results | MSE: 6.318817860136378e-08	 | MAE: 0.0009265426649857153	 | Max AE: 0.06433858529715808	   | MRE: 5802.172805252652		 | Max RE: 4261342.999367215  
128 hid_sz m size of ds

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model without activation					 Results | MSE: 1.6209543008076999e-07	| MAE: 0.0024346850476961745	 | Max AE: 0.07303780266972237	   | MRE: 5075.648672021843		 | Max RE: 3539802.196658173		
64 hid_sz

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black ResNet Model								 Results | MSE: 1.0580289371034923e-07	| MAE: 0.0025658051734388544	 | Max AE: 0.05611778693486996	   | MRE: 186458.27062391344		| Max RE: 58482321.75801978   

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black ResNet Model								 Results | MSE: 2.8335606296519785e-08	| MAE: 0.0014160330767518587	 | Max AE: 0.007916218192563523	  | MRE: 104935.5520285946		 | Max RE: 30905757.920977533  

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model without activation					 Results | MSE: 1.124798628428752e-07	 | MAE: 0.0024631195551487453	 | Max AE: 0.037993997379657585	  | MRE: 13487.431753687495		| Max RE: 3748383.199971202		


----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model										Results | MSE: 1.860992115070968e-09	 | MAE: 0.0004510874317589473	 | Max AE: 0.015712605802571444	  | MRE: 3039.6012895961358		| Max RE: 24053090.766225673	   


----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model										Results | MSE: 4.3176854823214384e-08	| MAE: 0.002548539893560493	  | Max AE: 0.02216988133309572	   | MRE: 52930.97271546223		 | Max RE: 74994568.97747828		


weight decay

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model										Results | MSE: 2.3365065048548754e-08	| MAE: 0.0019228472067950922	 | Max AE: 0.020785850080788537	  | MRE: 19540.95551665145		 | Max RE: 38428277.40361217		


----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Deep Model								   Results | MSE: 1.2292241128713081e-08	| MAE: 0.0014149500863335678	 | Max AE: 0.026046665725385276	  | MRE: 15973.972288640323		| Max RE: 13644321.567340782	   


----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black CNN Model									Results | MSE: 1.6229420763649162e-08	| MAE: 0.0016403877602419335	 | Max AE: 0.02096367339459762	   | MRE: 39130.069827259045		| Max RE: 49456677.41935457		


----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black ResNet Model								 Results | MSE: 1.357335095783607e-07	 | MAE: 0.004738837337220532	  | Max AE: 0.021764388623186998	  | MRE: 147254.71198362616		| Max RE: 146963541.83840838	   


----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model										Results | MSE: 1.6345862189413596e-07	| MAE: 0.005929486075345317	  | Max AE: 0.024445273630216757	  | MRE: 308086.0669714369		 | Max RE: 102909205.05644394	   


hub no activation

### tests

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model										Results | MSE: 6.311032924102108e-08	 | MAE: 0.0024049330370174466	 | Max AE: 0.0486327963291493		| MRE: 0.17257685070773462	   | Max RE: 13.89843139038193		


----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model										Results | MSE: 4.767372257527874e-09	 | MAE: 0.0008842677848565959	 | Max AE: 0.010480693476281389	  | MRE: 8610.273830920263		 | Max RE: 24765156.665712476   
</br>
delta 0.02

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model										Results | MSE: 1.7072262899170862e-08	| MAE: 0.0014622379697655967	 | Max AE: 0.012304333512939192	  | MRE: 19133.356655940905		| Max RE: 32084991.04810058		
</br>
Huber loss delta = 0.01


----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model										Results | MSE: 6.5584848876120205e-09	| MAE: 0.0010204205638990932	 | Max AE: 0.01436321507469207	   | MRE: 21704.53521771374		 | Max RE: 39553088.01572265
</br>
Huber loss delta = 1.0

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model										Results | MSE: 9.577022434796093e-09	 | MAE: 0.001128070044492659	  | Max AE: 0.01750700016971668	   | MRE: 0.0					   | Max RE: 0.0   
with grad norm

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model										Results | MSE: 3.437281101835667e-08	 | MAE: 0.0014345937355705574	 | Max AE: 0.026702327077854637	  | MRE: 0.0					   | Max RE: 0.0		  

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model										Results | MSE: 4.870335576499605e-09	 | MAE: 0.0006644182282886656	 | Max AE: 0.02035231469468729	   | MRE: 0.0					   | Max RE: 0.0	 

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model										Results | MSE: 3.0745519796730916e-09	| MAE: 0.0006669217488797097	 | Max AE: 0.01045388565348454	   | MRE: 0.0					   | Max RE: 0.0	

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model										Results | MSE: 5.394526925641141e-09	 | MAE: 0.0007469986503670395	 | Max AE: 0.024494726553518364	  | MRE: 0.0					   | Max RE: 0.0   

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model										Results | MSE: 9.944437335789435e-10	 | MAE: 0.0002790903619311921	 | Max AE: 0.020291466710218475	  | MRE: 0.0					   | Max RE: 5.520420072604463e+31	

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Base Model										 Results | MSE: 3.700808076204782e-09	 | MAE: 0.0006494232504191063	 | Max AE: 0.02520344930434637	   | MRE: 207.39712310149338		| Max RE: 115508.51118180675 

### Greeks

In [None]:
import torch
from torch.distributions import Normal

def d1(K, T, sigma, F):
	return (torch.log(F / K) + (0.5 * sigma**2) * T) / (sigma * torch.sqrt(T))

def d2(d1, T, sigma):
	return d1 - sigma * torch.sqrt(T)

def delta(d1, F=1, option_type='call'):
	normal_dist = Normal(0, 1)

	if option_type == 'call':
		return normal_dist.cdf(d1)
	elif option_type == 'put':
		return normal_dist.cdf(d1) - 1
	else:
		raise ValueError("Option type must be 'call' or 'put'")

def gamma(T, sigma, d1, F=1):
	normal_dist = Normal(0, 1)
	pdf_d1 = torch.exp(normal_dist.log_prob(d1))

	return pdf_d1 / (F * sigma * torch.sqrt(T))

def theta(K, T, sigma, d1, d2, F=1, r=0, option_type='call'):
	normal_dist = Normal(0, 1)
	pdf_d1 = torch.exp(normal_dist.log_prob(d1))

	if option_type == 'call':
		return (-F * pdf_d1 * sigma / (2 * torch.sqrt(T)) - r * K * torch.exp(-r * T) * normal_dist.cdf(d2))
	elif option_type == 'put':
		return (-F * pdf_d1 * sigma / (2 * torch.sqrt(T)) + r * K * torch.exp(-r * T) * normal_dist.cdf(-d2))
	else:
		raise ValueError("Option type must be 'call' or 'put'")

def vega(T, d1, F=1):
	normal_dist = Normal(0, 1)
	pdf_d1 = torch.exp(normal_dist.log_prob(d1))

	return F * pdf_d1 * torch.sqrt(T)

def greeks(K, T, sigma, F=1, r=0, option_type='call'):
	dp = d1(K, T, sigma, F)
	dm = d2(dp, T, sigma)

	return delta(dp, F, option_type), gamma(T, sigma, dp, F), theta(K, T, sigma, dp, dm, F, r, option_type), vega(T, dp, F)


In [224]:
num_samples = 20000
K = np.random.uniform(1, 2.5, num_samples)
T = np.random.uniform(0.004, 4, num_samples)
sigma = np.random.uniform(0.1, 0.5, num_samples)

# Подготовка данных
X = np.vstack((K, T, np.log(K), sigma * np.sqrt(T))).T
# X = scaler.fit_transform(X)
X_tensor = torch.tensor(X, dtype=torch.float64, requires_grad=True)

prices = torch.tensor(black_model(1, K, T, sigma), dtype=torch.float64)

In [225]:
greeks_result = greeks(X_tensor[:, 0],X_tensor[:, 1], X_tensor[:, 3] / torch.sqrt(X_tensor[:, 1]))

In [263]:
# Входные данные для модели
y = model(X_tensor)
# y = (y[:, 0] + y[:, 1] ) / 2
y = y[:, 0]

abs_errors = torch.abs(y - prices)

# Mean Squared Error (MSE)
mse = F.mse_loss(y, prices).item()

# Mean Absolute Error (MAE)
mae = F.l1_loss(y, prices)

# Mean Relative Error (MRE)
mask = prices >= 1e-10
y_m = prices[mask]
relative_errors = torch.abs(y[mask] - y_m ) / y_m
mre = relative_errors.mean().item()
max_mre = relative_errors.max().item()

print(model.name)
print(f"Mean Squared Error (MSE): {mse}")
print(f"Mean Absolute Error (MAE): {mae.item()}")
print(f"Max Absolute Error (MAE): {abs_errors.max().item()}")
print(f"Mean Relative Error (MRE): {mre}")
print(f"Max Relative Error (MRE): {max_mre}")

Optimized Black Scholes Net
Mean Squared Error (MSE): 0.00019129475982090213
Mean Absolute Error (MAE): 0.008392075799808637
Max Absolute Error (MAE): 0.06874600830798266
Mean Relative Error (MRE): 7803.016820621607
Max Relative Error (MRE): 3614406.206458734


In [227]:
import torch
import numpy as np
import random

# Фиксируем seed для повторяемости
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [None]:
model.zero_grad()  # Сброс градиентов модели

y = model(X_tensor)
# y = (y[:, 0] + y[:, 1] ) / 2

y.backward(torch.ones_like(y), retain_graph=True)
K_grad = X_tensor.grad[:, 0].clone()
T_grad = X_tensor.grad[:, 1].clone()
sigma_grad = (X_tensor.grad[:, 3] / torch.sqrt(X_tensor[:, 1])).clone()

X_tensor.grad.zero_()

y = model(X_tensor)
# y = (y[:, 0] + y[:, 1] ) / 2

y.backward(torch.ones_like(y), retain_graph=True)
delta_grad = X_tensor.grad[:, 0].clone().requires_grad_(True)

X_tensor.grad.zero_()

# Вычисление второго градиента (гамма)
y = model(X_tensor)
#y = (y[:, 0] + y[:, 1] ) / 2

y.backward(torch.ones_like(y), retain_graph=True)
delta_grad.backward(torch.ones_like(delta_grad), retain_graph=True)
gamma_grad = X_tensor.grad[:, 0].clone()

delta_loss = F.mse_loss(delta_grad, greeks_result[0]).item()
gamma_loss = F.mse_loss(gamma_grad, greeks_result[1]).item()
theta_loss = F.mse_loss(T_grad, greeks_result[2]).item()
vega_loss = F.mse_loss(sigma_grad, greeks_result[3]).item()

print("Losses for current model:")
print('Delta: ', delta_loss)
print('Gamma: ', gamma_loss)
print('Theta: ', theta_loss)
print('Vega: ', vega_loss)

print(model.name)

Losses for current model:
Delta:  0.042062057584169525
Gamma:  0.4063527065205174
Theta:  0.0013791814731462268
Vega:  0.0542447592479735
Optimized Black Scholes Net


In [None]:
# Losses for current model:
# Delta:  0.03840783685662016
# Gamma:  0.3949252876906595
# Theta:  0.0013525013536893962
# Vega:  0.05303354700018913
# Black Model without activation
# 4 features 100 epochs 100k size of train

In [None]:
# Losses for current model:
# Delta:  0.07002200651184401
# Gamma:  0.530716834946635
# Theta:  0.0013875311415749527
# Vega:  0.06860741523103477
# Black Model without activation
# 4 features 100 epochs 10k size of train

In [None]:
# Losses for current model:
# Delta:  0.06471299801971224
# Gamma:  0.44321279099703076
# Theta:  0.004059831016096307
# Vega:  0.10431985297626094
# Black ResNet Model
# 64 hidden size and 30 epochs

In [None]:
# Losses for current model:
# Delta:  0.07888154450938009
# Gamma:  0.4840537113065917
# Theta:  0.0014586058836390592
# Vega:  0.07845643866992827
# Black ResNet Model

In [None]:
# Losses for current model:
# Delta:  0.09284443305834009
# Gamma:  0.5210671480727084
# Theta:  0.0013278433010317388
# Vega:  0.07444485311843482
# Black Model without activation

In [None]:
# Losses for current model:
# Delta:  0.023927056489781124
# Black Model without activation

In [None]:
# Losses for current model:
# Delta:  0.04097502044021693
# Black Model

In [None]:
# Losses for current model:
# Delta:  0.23041457848259805
# Black Model
# weight decay

In [None]:
# Losses for current model:
# Delta:  0.09470602026672634
# Gamma:  0.4605984598995799
# Black Model

In [None]:
# Losses for current model:
# Delta:  0.07544267106592446
# Gamma:  0.4395989266865998
# Black Deep Model

In [None]:
# Losses for the RNN model:
# Delta Loss:  0.08094750341367543
# Gamma Loss:  0.4721712001137761

In [None]:
# Losses for current model:
# Delta:  0.03421519741554771
# Gamma:  0.37361078598119973
# Black ResNet Model

In [None]:
# Losses for current model:
# Delta:  0.02984523250068021
# Gamma:  0.32884577927187386 hub no act