# **Data generation**

In [1]:
import numpy as np
from scipy.stats import norm
from datetime import datetime
import torch
from torch.utils.data import DataLoader, TensorDataset, random_split

  cpu = _conversion_method_template(device=torch.device("cpu"))


In [2]:
def black_model(F, K, T, sigma, option_type='call'):
	# Parameters
	d1 = (np.log(F / K) + (sigma**2 / 2) * T) / (sigma * np.sqrt(T))
	d2 = d1 - sigma * np.sqrt(T)
	
	# Discount factor (assuming risk-free rate is 0)
	DF_T = 1
	
	if option_type == 'call':
		return DF_T * (F * norm.cdf(d1) - K * norm.cdf(d2))
	elif option_type == 'put':
		return DF_T * (K * norm.cdf(-d2) - F * norm.cdf(-d1))
	else:
		raise ValueError("option_type must be 'call' or 'put'")

def generate_data(num_samples, S=1):
	# Generate random parameters
	K = np.random.uniform(1, 2.5, num_samples)
	T = np.random.uniform(0.004, 4, num_samples)
	sigma = np.random.uniform(0.1, 0.5, num_samples)

	call_prices = black_model(S, K, T, sigma, option_type='call')
	
	# Prepare input data matrix X
	X = np.vstack((K, T, np.log(K), sigma * np.sqrt(T), sigma**2 * T)).T
	y = call_prices
	
	return X, y

In [3]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [4]:
def generate_static_test_data(num_test_samples=100000, file_name='static_test_data.pt'):
    X_test, y_test = generate_data(num_samples=num_test_samples)
    
    # Преобразуем в тензоры
    X_test_tensor = torch.tensor(X_test, dtype=torch.float64)
    y_test_tensor = torch.tensor(y_test, dtype=torch.float64).unsqueeze(1)

    # Сохраняем данные
    torch.save((X_test_tensor, y_test_tensor), file_name)

def load_static_test_data(file_name='static_test_data.pt'):
	# Загружаем данные из файла
	X_test_tensor, y_test_tensor = torch.load(file_name)

	return X_test_tensor, y_test_tensor

In [5]:
def generate_and_prepare_training_data(num_train_samples=1000000, batch_size=128):
    X_train, y_train = generate_data(num_samples=num_train_samples)

    X_train_tensor = torch.tensor(X_train, dtype=torch.float64)
    y_train_tensor = torch.tensor(y_train, dtype=torch.float64).unsqueeze(1)

    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    return train_loader

# **Train**

## Model definition

In [6]:
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
from torch import nn
import torch.nn.functional as F

In [28]:
class KernelAttentionLayer(nn.Module):
    def __init__(self, input_size, output_size):
        super(KernelAttentionLayer, self).__init__()
        self.query_layer = nn.Linear(input_size, output_size, dtype=torch.float64)
        self.key_layer = nn.Linear(input_size, output_size, dtype=torch.float64)
        self.value_layer = nn.Linear(input_size, output_size, dtype=torch.float64)
        self.scale = 1.0 / np.sqrt(output_size)
    
    def forward(self, x):
        Q = self.query_layer(x)
        K = self.key_layer(x)
        V = self.value_layer(x)
        
        attention_weights = torch.softmax(Q @ K.T * self.scale, dim=-1)
        output = attention_weights @ V
        
        return output

In [35]:
class BlackScholesNet(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(BlackScholesNet, self).__init__()
        self.kernel_attention = KernelAttentionLayer(input_size, hidden_size)
        self.fc1 = nn.Linear(hidden_size, hidden_size, dtype=torch.float64)
        self.fc2 = nn.Linear(hidden_size, 1, dtype=torch.float64)
        self.name = 'KAN Black Scholes'
    
    def forward(self, x):	
        x = self.kernel_attention(x)
        x = torch.tanh(self.fc1(x))
        output = self.fc2(x)
        
        return output

In [254]:
class BlackScholesNet(nn.Module):
	def __init__(self, input_size=1, hidden_size=128, output_size=1, dropout_p = 0.33):
		super(BlackScholesNet, self).__init__()
		self.fc1 = nn.Linear(input_size, hidden_size, dtype=torch.float64)
		self.bn1 = nn.BatchNorm1d(hidden_size, dtype=torch.float64)  # Batch Normalization
		self.fc2 = nn.Linear(hidden_size, hidden_size, dtype=torch.float64)
		self.bn2 = nn.BatchNorm1d(hidden_size, dtype=torch.float64)  # Batch Normalization
		self.fc3 = nn.Linear(hidden_size, output_size, dtype=torch.float64)
		self.dropout = nn.Dropout(p=dropout_p)  # Dropout for regularization
		self.name = 'Black Model'

	def forward(self, x, K):
		x = F.tanh(self.bn1(self.fc1(x)))  # Tanh and Batch Normalization
		x = self.dropout(x)  # Dropout
		x = F.tanh(self.bn2(self.fc2(x)))  # Tanh and Batch Normalization
		x = self.fc3(x)
		x1, x2 = x[:, [0]], x[:, [1]]
		return F.sigmoid(x1) - K * F.sigmoid(x2)

In [178]:
class BlackScholesNet(nn.Module):
	def __init__(self, input_size=1, hidden_size=128, output_size=1, dropout_p = 0.33):
		super(BlackScholesNet, self).__init__()
		self.fc1 = nn.Linear(input_size, hidden_size, dtype=torch.float64)
		self.bn1 = nn.BatchNorm1d(hidden_size, dtype=torch.float64)  # Batch Normalization
		self.fc2 = nn.Linear(hidden_size, hidden_size, dtype=torch.float64)
		self.bn2 = nn.BatchNorm1d(hidden_size, dtype=torch.float64)  # Batch Normalization
		self.fc3 = nn.Linear(hidden_size, output_size, dtype=torch.float64)
		self.dropout = nn.Dropout(p=dropout_p)  # Dropout for regularization
		self.name = 'Black Model without activation'

	def forward(self, x, K):
		x = F.tanh(self.bn1(self.fc1(x)))  # Tanh and Batch Normalization
		x = self.dropout(x)  # Dropout
		x = F.tanh(self.bn2(self.fc2(x)))  # Tanh and Batch Normalization
		x = self.fc3(x)
		return x
		# x1, x2 = x[:, [0]], x[:, [1]]
		# return F.sigmoid(x1) - K * F.sigmoid(x2)

In [138]:
class BlackScholesNet(nn.Module):
	def __init__(self, input_size=1, hidden_size=128, output_size=1, dropout_p=0.3):
		super(BlackScholesNet, self).__init__()
		self.fc1 = nn.Linear(input_size, hidden_size, dtype=torch.float64)
		self.bn1 = nn.BatchNorm1d(hidden_size, dtype=torch.float64)
		self.fc2 = nn.Linear(hidden_size, hidden_size, dtype=torch.float64)
		self.bn2 = nn.BatchNorm1d(hidden_size, dtype=torch.float64)
		self.fc_out = nn.Linear(hidden_size, output_size, dtype=torch.float64)
		self.dropout = nn.Dropout(p=dropout_p)
		self.name = 'Black ResNet Model'

	def forward(self, x):
		residual = self.fc1(x)  # Transform residual to match shape of x
		x = F.relu(self.bn1(self.fc1(x)))
		x = self.dropout(x)
		x = F.relu(self.bn2(self.fc2(x)))
		x = x + residual  # Adding the skip connection after matching dimensions
		x = self.fc_out(x)
		return x

In [218]:
class BlackScholesNet(nn.Module):
	def __init__(self, input_size=1, hidden_size=128, output_size=1, num_layers=2, dropout_p=0.3):
		super(BlackScholesNet, self).__init__()
		self.rnn = nn.LSTM(input_size, hidden_size, num_layers=num_layers, batch_first=True, dtype=torch.float64)
		self.fc = nn.Linear(hidden_size, output_size, dtype=torch.float64)
		self.dropout = nn.Dropout(p=dropout_p)
		self.name = 'Black RNN Model'

	def forward(self, x):
		x, _ = self.rnn(x)
		x = self.fc(x[:, -1, :])  # Take output of the last time step
		return  F.tanh(x)

## Generate test data

In [55]:
num_test_samples = 500000
batch_size = 256

In [56]:
# generate_static_test_data(num_test_samples)

In [None]:
X_test_tensor, y_test_tensor = load_static_test_data()

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

## Finding optimal hyperparameters

### Define the device

In [10]:
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device('cpu')

### Run the search

In [79]:
train_loader = generate_and_prepare_training_data(1000000, batch_size)

In [None]:
X_test_tensor, y_test_tensor = load_static_test_data()

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [86]:
import optuna
import torch
import torch.nn.functional as F

def objective(trial):
	# Define hyperparameters to be optimized
	input_size = 5
	hidden_size = 128
	# dropout_p = trial.suggest_float('dropout_p', 0.1, 0.5)
	# lr = trial.suggest_float('lr', 1e-5, 1e-1, log=True)
	dropout_p = 0.20862014926048447
	lr = 0.0002448376394581503
	
	model = BlackScholesNet(input_size=input_size, hidden_size=hidden_size, output_size=2)
	model.dropout.p = dropout_p

	optimizer = torch.optim.NAdam(model.parameters(), lr=lr)
	
	scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3)

	num_epochs = 30
	for epoch in range(num_epochs):
		model.train()
		epoch_mse = 0
		epoch_mae = 0
		epoch_mre = 0
		
		for X_batch, y_batch in train_loader:
			X_batch, y_batch = X_batch.to(device), y_batch.to(device)
			
			optimizer.zero_grad()
			
			outputs = model(X_batch, X_batch[:, 0])
			outputs = (outputs[:, 0] + outputs[:, 1]) / 2
			y = y_batch[:, 0]
			
			# mse_loss = F.mse_loss(outputs, y)
			mae_loss = F.l1_loss(outputs, y)
			# relative_errors = torch.abs(outputs - y) / (y + 1e-8)
			# mre_loss = relative_errors.mean()
			
			mae_loss.backward()
			optimizer.step()
			
			# epoch_mse += mse_loss.item()
			epoch_mae += mae_loss.item()
			# epoch_mre += mre_loss.item()
		
		avg_epoch_mae = epoch_mae / len(train_loader)
		scheduler.step(avg_epoch_mae)
		
	test_losses = 0.
	test_maes = 0.
	test_max_aes = 0.
	test_mres = 0.
	test_max_res = 0.

	model.eval()

	with torch.inference_mode():
		for X_batch, y_batch in test_loader:
			X_batch, y_batch = X_batch.to(device), y_batch.to(device)

			# Forward pass
			outputs = model(X_batch, X_batch[:, 0])
			outputs = (outputs[:, 0] + outputs[:, 1]) / 2
			
			y = y_batch[:, 0]

			# Mean Squared Error (MSE)
			# mse_loss = F.mse_loss(outputs, y)
			# test_losses += mse_loss.item()

			# Mean Absolute Error (MAE)
			abs_errors = torch.abs(outputs - y)
			test_maes += abs_errors.sum().item()

			# # Maximum Absolute Error (Max AE)
			# max_ae = abs_errors.max().item()
			# test_max_aes = max(test_max_aes, max_ae)

			# # Mean Relative Error (MRE)
			# mask = y == 0
			# zero_price_mre = abs_errors[mask]
			# price_mre = abs_errors[~mask]

			# # Avoid division by zero for non-zero y values
			# nonzero_y = y[~mask]
			# price_mre = price_mre / nonzero_y if nonzero_y.numel() > 0 else price_mre

			# # Calculate MRE
			# total_mre = zero_price_mre.sum() + price_mre.sum()
			# test_mres += total_mre.item() if zero_price_mre.numel() > 0 or price_mre.numel() > 0 else 0

			# # Handle empty tensors and `inf` values for Max RE
			# zero_price_max = zero_price_mre.max() if zero_price_mre.numel() > 0 else 0
			# price_max = price_mre.max() if price_mre.numel() > 0 else 0

			# # Calculate max relative error
			# max_re = max(zero_price_max.item(), price_max.item())
			# test_max_res = max(test_max_res, max_re)

	# avg_test_loss = test_losses / len(test_loader.dataset)
	avg_test_mae = test_maes / len(test_loader.dataset)
	# avg_test_mre = test_mres / len(test_loader.dataset)
			
	# Return the average MAE as the objective to be minimized
	return avg_test_mae

In [None]:
# Create Optuna study and optimize
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)

# Print best parameters and best value
print("Best hyperparameters:", study.best_params)
print("Best MAE:", study.best_value)

Best hyperparameters: {'dropout_p': 0.20862014926048447, 'lr': 0.0002448376394581503}
Best MAE: 0.00049047276004533371

## Model initialization

In [41]:
dropout_p = 0.20862014926048447
lr = 0.004448376394581503

In [42]:
model = BlackScholesNet(input_size=5, hidden_size=64)
# model.dropout.p = dropout_p
optimizer = optim.NAdam(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3)

In [92]:
# model.load_state_dict(torch.load('models\\Black Model_mae.pth'))

In [43]:
model.to(device)

BlackScholesNet(
  (kernel_attention): KernelAttentionLayer(
    (query_layer): Linear(in_features=5, out_features=64, bias=True)
    (key_layer): Linear(in_features=5, out_features=64, bias=True)
    (value_layer): Linear(in_features=5, out_features=64, bias=True)
  )
  (fc1): Linear(in_features=64, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=1, bias=True)
)

Best hyperparameters: {'hidden_size': 256, 'dropout_p': 0.16165214075232218, 'lr': 0.0024867405570057574, 'batch_size': 64, 'optimizer': 'NAdam'}
Best MAE: 0.003318011008932989

Best hyperparameters: {'hidden_size': 128, 'dropout_p': 0.3974039374569882, 'lr': 0.012408717790861197, 'batch_size': 128, 'optimizer': 'NAdam'}
Best MAE: 0.008668262018621945

## Prepare the training data

In [18]:
num_train_samples = 1000000
batch_size = 128

In [19]:
train_loader = generate_and_prepare_training_data(num_train_samples, batch_size)

## Training loop

### Parameters

In [20]:
num_epochs = 100
num_stages = 1

In [21]:
def weights_init(m):
	if isinstance(m, nn.Linear):
		nn.init.xavier_uniform_(m.weight)
		if m.bias is not None:
			nn.init.constant_(m.bias, 0)

In [44]:
model.apply(weights_init)

BlackScholesNet(
  (kernel_attention): KernelAttentionLayer(
    (query_layer): Linear(in_features=5, out_features=64, bias=True)
    (key_layer): Linear(in_features=5, out_features=64, bias=True)
    (value_layer): Linear(in_features=5, out_features=64, bias=True)
  )
  (fc1): Linear(in_features=64, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=1, bias=True)
)

### Huber Loss

In [117]:
def huber_loss(y_pred, y_true, delta=1.0):
	error = y_true - y_pred
	is_small_error = torch.abs(error) <= delta
	small_error_loss = 0.5 * error**2
	large_error_loss = delta * (torch.abs(error) - 0.5 * delta)

	return torch.where(is_small_error, small_error_loss, large_error_loss).mean()

In [None]:
for stage in range(num_stages):
	print(f"[ {datetime.now().strftime("%H:%M:%S")} ] ***** Stage [{stage+1}/{num_stages}] {'*'*150}")
	
	for epoch in range(num_epochs):
		epoch_huber = 0.
		epoch_mae = 0.
		epoch_mre = 0.
		epoch_mse = 0.
		
		for X_batch, y_batch in train_loader:
			X_batch, y_batch = X_batch.to(device), y_batch.to(device)
			# X_batch = X_batch.unsqueeze(1)  # Add a dimension for sequence length
			model.train()
			
			outputs = model(X_batch, X_batch[:, 0])
			outputs = (outputs[:, 0] + outputs[:, 1] ) / 2
			# outputs = outputs[:, 0]
			y = y_batch[:, 0]
			
			# Calculate losses
			hub_loss = huber_loss(outputs, y, 0.02)
			mse_loss = F.mse_loss(outputs, y)
			mae_loss = F.l1_loss(outputs, y)
			mask = y >= 1e-10
			y_m = y[mask]
			relative_errors = torch.abs(outputs[mask] - y_m ) / y_m
			mre_loss = relative_errors.mean()
			
			# Backpropagation
			optimizer.zero_grad()
			hub_loss.backward()
			optimizer.step()
			
			# Accumulate losses
			epoch_mse += mse_loss.item()
			epoch_mae += mae_loss.item()
			epoch_mre += mre_loss.item()
			epoch_huber += hub_loss
		
		print(f"[ {datetime.now().strftime("%H:%M:%S")} ] ----- Epoch [{epoch+1}/{num_epochs}] {'-'*150}")

		avg_epoch_huber = epoch_huber / len(train_loader)
		avg_epoch_mse = epoch_mse / len(train_loader)
		avg_epoch_mae = epoch_mae / len(train_loader)
		avg_epoch_mre = epoch_mre / len(train_loader)
		
		print(f"{model.name:<50} | Huber loss: {avg_epoch_huber:<25} | MSE: {avg_epoch_mse:<25} | MAE: {avg_epoch_mae:<25} | MRE: {avg_epoch_mre:<25} |")
		
		scheduler.step(avg_epoch_huber)

In [253]:
import torch
from torch.distributions import Normal

def d1(K, T, sigma, F):
	return (torch.log(F / K) + (0.5 * sigma**2) * T) / (sigma * torch.sqrt(T))

def d2(d1, T, sigma):
	return d1 - sigma * torch.sqrt(T)

def delta(d1, F=1, option_type='call'):
	normal_dist = Normal(0, 1)

	if option_type == 'call':
		return normal_dist.cdf(d1)
	elif option_type == 'put':
		return normal_dist.cdf(d1) - 1
	else:
		raise ValueError("Option type must be 'call' or 'put'")

def gamma(T, sigma, d1, F=1):
	normal_dist = Normal(0, 1)
	pdf_d1 = torch.exp(normal_dist.log_prob(d1)) 

	return pdf_d1 / (F * sigma * torch.sqrt(T))

def theta(K, T, sigma, d1, d2, F=1, r=0, option_type='call'):
	normal_dist = Normal(0, 1)
	pdf_d1 = torch.exp(normal_dist.log_prob(d1)) 

	if option_type == 'call':
		return (-F * pdf_d1 * sigma / (2 * torch.sqrt(T)) - r * K * torch.exp(-r * T) * normal_dist.cdf(d2))
	elif option_type == 'put':
		return (-F * pdf_d1 * sigma / (2 * torch.sqrt(T)) + r * K * torch.exp(-r * T) * normal_dist.cdf(-d2))
	else:
		raise ValueError("Option type must be 'call' or 'put'")

def vega(T, d1, F=1):
	normal_dist = Normal(0, 1)
	pdf_d1 = torch.exp(normal_dist.log_prob(d1))
	
	return F * pdf_d1 * torch.sqrt(T)

def greeks(K, T, sigma, F=1, r=0, option_type='call'):
	dp = d1(K, T, sigma, F)
	dm = d2(dp, T, sigma)
	
	return delta(dp, F, option_type), gamma(T, sigma, dp, F), theta(K, T, sigma, dp, dm, F, r, option_type), vega(T, dp, F)

In [None]:
for stage in range(num_stages):
	print(f"[ {datetime.now().strftime('%H:%M:%S')} ] ***** Stage [{stage+1}/{num_stages}] {'*'*150}")
	
	for epoch in range(num_epochs):
		cnt = 0
		epoch_huber = 0.
		epoch_mae = 0.
		epoch_mre = 0.
		epoch_mse = 0.
		epoch_delta_loss = 0.
		epoch_gamma_loss = 0.
		
		for X_batch, y_batch in train_loader:
			X_batch, y_batch = X_batch.to(device), y_batch.to(device)

			X_batch = X_batch.clone().detach().requires_grad_(True)#torch.tensor(X_batch, dtype=torch.float64, requires_grad=True)
			y_batch = y_batch.clone().detach().requires_grad_(True)#torch.tensor(y_batch, dtype=torch.float64, requires_grad=True)

			model.train()
			
			outputs = model(X_batch, X_batch[:, 0])
			outputs = (outputs[:, 0] + outputs[:, 1]) / 2
			y = y_batch[:, 0]
			
			# Вычисление основных потерь
			hub_loss = huber_loss(outputs, y, 0.02)
			mse_loss = F.mse_loss(outputs, y)
			mae_loss = F.l1_loss(outputs, y)
			mask = y >= 1e-10
			y_m = y[mask]
			relative_errors = torch.abs(outputs[mask] - y_m) / y_m
			mre_loss = relative_errors.mean()
			
			# Обнуление градиентов перед вычислением грека
			optimizer.zero_grad()

			# --- Вычисление дельты и гаммы ---
			deltas, gammas, thetas, vegas = greeks(X_batch[:, 0], X_batch[:, 1], X_batch[:, 3] / torch.sqrt(X_batch[:, 1]))

			outputs.backward(torch.ones_like(outputs), retain_graph=True)
			K_grad = X_batch.grad[:, 0].clone()  

			X_batch.grad.zero_()

			y = model(X_batch, X_batch[:, 0])
			y = (y[:, 0] + y[:, 1] ) / 2
			# y = y[:, 0]
			y.backward(torch.ones_like(y), retain_graph=True)
			delta_grad = X_batch.grad[:, 0].clone().requires_grad_(True)

			X_batch.grad.zero_()

			# Вычисление второго градиента (гамма)
			y = model(X_batch, X_batch[:, 0])
			y = (y[:, 0] + y[:, 1] ) / 2
			# y = y[:, 0]
			y.backward(torch.ones_like(y), retain_graph=True)
			delta_grad.backward(torch.ones_like(delta_grad), retain_graph=True)
			gamma_grad = X_tensor.grad[:, 0].clone()

			# --- Вычисление потерь по дельте ---
			outputs.backward(torch.ones_like(outputs), retain_graph=True)  # Рассчитываем дельту
			delta_pred = X_batch.grad[:, 0].clone().requires_grad_(True)  # Градиент по K — это дельта
			delta_loss = F.mse_loss(delta_pred, deltas)

			# Обнуление градиентов для следующего шага
			X_batch.grad.zero_()

			outputs = model(X_batch, X_batch[:, 0])
			outputs = (outputs[:, 0] + outputs[:, 1]) / 2

			# --- Вычисление потерь по гамме ---
			outputs.backward(torch.ones_like(delta_pred), retain_graph=True)  # Рассчитываем гамму
			delta_pred.backward(torch.ones_like(delta_pred), retain_graph=True)
			gamma_pred = X_batch.grad[:, 0].clone().detach()  # Градиент дельты — это гамма
			gamma_loss = F.mse_loss(gamma_pred, gammas)

			# Назад по градиенту для обновления параметров
			delta_loss.backward()
			optimizer.step()
			
			# Аккумулируем потери для статистики
			epoch_mse += mse_loss.item()
			epoch_mae += mae_loss.item()
			epoch_mre += mre_loss.item()
			epoch_huber += hub_loss.item()
			epoch_delta_loss += delta_loss.item()
			epoch_gamma_loss += gamma_loss.item()

		# Логируем результаты для текущей эпохи
		print(f"[ {datetime.now().strftime('%H:%M:%S')} ] ----- Epoch [{epoch+1}/{num_epochs}] {'-'*150}")
		avg_epoch_huber = epoch_huber / len(train_loader)
		avg_epoch_mse = epoch_mse / len(train_loader)
		avg_epoch_mae = epoch_mae / len(train_loader)
		avg_epoch_mre = epoch_mre / len(train_loader)
		avg_delta_loss = epoch_delta_loss / len(train_loader)
		avg_gamma_loss = epoch_gamma_loss / len(train_loader)

		print(f"{model.name:<25} | MSE: {avg_epoch_mse:<25} | MAE: {avg_epoch_mae:<25} | MRE: {avg_epoch_mre:<25} | Delta Loss: {avg_delta_loss:<25} | Gamma Loss: {avg_gamma_loss:<25} |")
		
		scheduler.step(avg_delta_loss)

In [None]:
for stage in range(num_stages):
	print(f"[ {datetime.now().strftime('%H:%M:%S')} ] ***** Stage [{stage+1}/{num_stages}] {'*'*150}")
	
	for epoch in range(num_epochs):
		epoch_huber, epoch_mae, epoch_mre = 0., 0., 0.
		epoch_mse, epoch_delta_loss, epoch_gamma_loss = 0., 0., 0.
		
		for X_batch, y_batch in train_loader:
			X_batch, y_batch = X_batch.to(device), y_batch.to(device)
						
			# Включение градиентов для X_batch
			X_batch = X_batch.clone().detach().requires_grad_(True)
			y_batch = y_batch.clone().detach().requires_grad_(True)

			model.train()
			outputs = model(X_batch, X_batch[:, 0])
			outputs = (outputs[:, 0] + outputs[:, 1]) / 2
			y = y_batch[:, 0]
			
			# Основные потери
			hub_loss = huber_loss(outputs, y, 0.02)
			mse_loss = F.mse_loss(outputs, y)
			mae_loss = F.l1_loss(outputs, y)
			
			# Вычисление относительных ошибок
			mask = y >= 1e-10
			y_m = y[mask]
			relative_errors = torch.abs(outputs[mask] - y_m) / y_m
			mre_loss = relative_errors.mean()

			# --- Вычисление дельты ---
			deltas, gammas, thetas, vegas = greeks(X_batch[:, 0], X_batch[:, 1], X_batch[:, 3] / torch.sqrt(X_batch[:, 1]))

			# Рассчитываем градиент по страйку (дельту)
			outputs.backward(torch.ones_like(outputs), retain_graph=True)
			delta_grad = X_batch.grad[:, 0].clone().requires_grad_(True)  # Сохраняем градиенты

			# # Рассчитываем второй градиент (гамму)
			# delta_grad.backward(torch.ones_like(delta_grad), retain_graph=True)
			# gamma_grad = X_batch.grad[:, 0].clone().detach()

			# Вычисление потерь по дельте и гамме
			delta_loss = F.mse_loss(delta_grad, deltas)
			# gamma_loss = F.mse_loss(gamma_grad, gammas)

			# Назад по градиенту для обновления параметров
			optimizer.zero_grad()  # Обнуление градиентов перед шагом
			delta_loss.backward()
			optimizer.step()

			# Аккумулируем потери для статистики
			epoch_huber += hub_loss.item()
			epoch_mse += mse_loss.item()
			epoch_mae += mae_loss.item()
			epoch_mre += mre_loss.item()
			epoch_delta_loss += delta_loss.item()
			# epoch_gamma_loss += gamma_loss.item()

		# Логирование
		avg_epoch_huber = epoch_huber / len(train_loader)
		avg_epoch_mse = epoch_mse / len(train_loader)
		avg_epoch_mae = epoch_mae / len(train_loader)
		avg_epoch_mre = epoch_mre / len(train_loader)
		avg_delta_loss = epoch_delta_loss / len(train_loader)
		avg_gamma_loss = 0#epoch_gamma_loss / len(train_loader)

		print(f"[ {datetime.now().strftime('%H:%M:%S')} ] ----- Epoch [{epoch+1}/{num_epochs}] {'-'*150}")
		print(f"{model.name:<25} | MSE: {avg_epoch_mse:<25} | MAE: {avg_epoch_mae:<25} | MRE: {avg_epoch_mre:<25} | Delta Loss: {avg_delta_loss:<25} | Gamma Loss: {avg_gamma_loss:<25} |")
		
		scheduler.step(avg_delta_loss)


### Log-cosh loss

In [32]:
for stage in range(num_stages):
	print(f"[ {datetime.now().strftime("%H:%M:%S")} ] ***** Stage [{stage+1}/{num_stages}] {'*'*150}")
	
	for epoch in range(num_epochs):
		cnt = 0
		epoch_lcosh = 0.
		epoch_mae = 0.
		epoch_mre = 0.
		epoch_mse = 0.
		
		for X_batch, y_batch in train_loader:
			X_batch, y_batch = X_batch.to(device), y_batch.to(device)

			model.train()
			
			outputs = model(X_batch, X_batch[:, 0])
			outputs = (outputs[:, 0] + outputs[:, 1] ) / 2
			y = y_batch[:, 0]
			
			# Calculate losses
			lcosh_loss = torch.mean(torch.log(torch.cosh(outputs-y)))
			mse_loss = F.mse_loss(outputs, y)
			mae_loss = F.l1_loss(outputs, y)
			mask = y >= 1e-10
			y_m = y[mask]
			relative_errors = torch.abs(outputs[mask] - y_m ) / y_m
			mre_loss = relative_errors.sum()

			cnt += len(relative_errors)
			
			# Backpropagation
			optimizer.zero_grad()
			lcosh_loss.backward()
			optimizer.step()
			
			# Accumulate losses
			epoch_mse += mse_loss.item()
			epoch_mae += mae_loss.item()
			epoch_mre += mre_loss.item()
			epoch_lcosh += hub_loss
		
		print(f"[ {datetime.now().strftime("%H:%M:%S")} ] ----- Epoch [{epoch+1}/{num_epochs}] {'-'*150}")

		avg_epoch_lcosh = epoch_lcosh / len(train_loader)
		avg_epoch_mse = epoch_mse / len(train_loader)
		avg_epoch_mae = epoch_mae / len(train_loader)
		avg_epoch_mre = epoch_mre / cnt
		
		print(f"{model.name:<50} | Huber loss: {avg_epoch_lcosh:<25} | MSE: {avg_epoch_mse:<25} | MAE: {avg_epoch_mae:<25} | MRE: {avg_epoch_mre:<25} |")
		
		scheduler.step(avg_epoch_mae)

[ 18:51:10 ] ***** Stage [1/1] ******************************************************************************************************************************************************
[ 18:51:30 ] ----- Epoch [1/30] ------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model                                        | Huber loss: 2.805671036771152e-05     | MSE: 0.01731447072122037       | MAE: 0.089106594409645         | MRE: 3058633.5212305137        |
[ 18:51:50 ] ----- Epoch [2/30] ------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model                                        | Huber loss: 2.805671036771152e-05     | MSE: 0.0010450003118724588     | MAE: 0.023971658696214888      | MRE: 771303.588995485          |
[ 18:52:09 ] ----- Epoch [3/30] ------------------------------------

### MSE Only

In [96]:
# for epoch in range(num_epochs):
# 	# Initialize epoch metrics for each model
# 	epoch_mae = [0.] * len(models)
# 	epoch_mre = [0.] * len(models)
# 	epoch_mse = [0.] * len(models)
	
# 	for X_batch, y_batch in train_loader:
# 		X_batch, y_batch = X_batch.to(device), y_batch.to(device)
		
# 		# Loop over each model
# 		for i, model in enumerate(models):
# 			model.train()
			
# 			# Forward pass
# 			outputs = model(X_batch, X_batch[:, 0])
# 			outputs = (outputs[:, 0] + outputs[:, 1] ) / 2
# 			y = y_batch[:, 0]
			
# 			# Calculate losses
# 			mse_loss = F.mse_loss(outputs, y)
# 			mae_loss = F.l1_loss(outputs, y)
# 			relative_errors = torch.abs(outputs - y) / (y + 1e-8)
# 			mre_loss = relative_errors.mean()
			
# 			# Backpropagation
# 			optimizers.zero_grad()
# 			mse_loss.backward()
# 			optimizers.step()
			
# 			# Accumulate losses for this model
# 			epoch_mse += mse_loss.item()
# 			epoch_mae += mae_loss.item()
# 			epoch_mre += mre_loss.item()
	
# 	print(f"[ {datetime.now().strftime("%H:%M:%S")} ] ----- Epoch [{epoch+1}/{num_epochs}] {'-'*150}")
# 	# Average metrics for each model
# 	for i in range(len(models)):
# 		avg_epoch_mse = epoch_mse / len(train_loader)
# 		avg_epoch_mae = epoch_mae / len(train_loader)
# 		avg_epoch_mre = epoch_mre / len(train_loader)
		
# 		print(f"{models.name:<50} | MSE: {avg_epoch_mse:<25} | MAE: {avg_epoch_mae:<25} | MRE: {avg_epoch_mre:<25} |")
		
# 		# Scheduler step
# 		schedulers.step(avg_epoch_mse)



### MAE Only

In [45]:
for stage in range(num_stages):
	print(f"[ {datetime.now().strftime("%H:%M:%S")} ] ***** Stage [{stage+1}/{num_stages}] {'*'*150}")
	
	for epoch in range(num_epochs):
		epoch_mae = 0.
		epoch_mre = 0.
		epoch_mse = 0.
		
		for X_batch, y_batch in train_loader:
			X_batch, y_batch = X_batch.to(device), y_batch.to(device)

			model.train()
			
			outputs = model(X_batch)
			# outputs = (outputs[:, 0] + outputs[:, 1] ) / 2
			outputs=outputs[:, 0]
			y = y_batch[:, 0]
			
			# Calculate losses
			mse_loss = F.mse_loss(outputs, y)
			mae_loss = F.l1_loss(outputs, y)
			mask = y >= 1e-10
			y_m = y[mask]
			relative_errors = torch.abs(outputs[mask] - y_m ) / y_m
			mre_loss = relative_errors.mean()
			
			# Backpropagation
			optimizer.zero_grad()
			mae_loss.backward()
			optimizer.step()
			
			# Accumulate losses
			epoch_mse += mse_loss.item()
			epoch_mae += mae_loss.item()
			epoch_mre += mre_loss.item()
		
		print(f"[ {datetime.now().strftime("%H:%M:%S")} ] ----- Epoch [{epoch+1}/{num_epochs}] {'-'*150}")

		avg_epoch_mse = epoch_mse / len(train_loader)
		avg_epoch_mae = epoch_mae / len(train_loader)
		avg_epoch_mre = epoch_mre / len(train_loader)
		
		print(f"{model.name:<50} | MSE: {avg_epoch_mse:<25} | MAE: {avg_epoch_mae:<25} | MRE: {avg_epoch_mre:<25} |")
		
		scheduler.step(avg_epoch_mae)

[ 19:38:53 ] ***** Stage [1/1] ******************************************************************************************************************************************************
[ 19:39:15 ] ----- Epoch [1/100] ------------------------------------------------------------------------------------------------------------------------------------------------------
KAN Black Scholes                                  | MSE: 0.0012387422443283591     | MAE: 0.016218780921354994      | MRE: 418882.8272023318         |
[ 19:39:36 ] ----- Epoch [2/100] ------------------------------------------------------------------------------------------------------------------------------------------------------
KAN Black Scholes                                  | MSE: 0.00014922346122831482    | MAE: 0.008199835669306535      | MRE: 207551.09525587354        |
[ 19:39:56 ] ----- Epoch [3/100] -----------------------------------------------------------------------------------------------------------------

KeyboardInterrupt: 

### MRE Only

In [179]:
# for epoch in range(num_epochs):
# 	# Initialize epoch metrics for each model
# 	epoch_mae = [0.] * len(models)
# 	epoch_mre = [0.] * len(models)
# 	epoch_mse = [0.] * len(models)
	
# 	for X_batch, y_batch in train_loader:
# 		X_batch, y_batch = X_batch.to(device), y_batch.to(device)
		
# 		# Loop over each model
# 		for i, model in enumerate(models):
# 			model.train()
			
# 			# Forward pass
# 			outputs = model(X_batch, X_batch[:, 0])
# 			outputs = outputs[:, 0] + outputs[:, 1] / X_batch[:, 0]
# 			y = y_batch[:, 0]
			
# 			# Calculate losses
# 			mse_loss = F.mse_loss(outputs, y)
# 			mae_loss = F.l1_loss(outputs, y)
# 			relative_errors = torch.abs(outputs - y) / (y + 1e-8)
# 			mre_loss = relative_errors.mean()
			
# 			# Backpropagation
# 			optimizers.zero_grad()
# 			mre_loss.backward()
# 			optimizers.step()
			
# 			# Accumulate losses for this model
# 			epoch_mse += mse_loss.item()
# 			epoch_mae += mae_loss.item()
# 			epoch_mre += mre_loss.item()
	
# 	print(f"[ {datetime.now().strftime("%H:%M:%S")} ] ----- Epoch [{epoch+1}/{num_epochs}] {'-'*150}")
# 	# Average metrics for each model
# 	for i in range(len(models)):
# 		avg_epoch_mse = epoch_mse / len(train_loader)
# 		avg_epoch_mae = epoch_mae / len(train_loader)
# 		avg_epoch_mre = epoch_mre / len(train_loader)
		
# 		print(f"{models.name:<50} | MSE: {avg_epoch_mse:<25} | MAE: {avg_epoch_mae:<25} | MRE: {avg_epoch_mre:<25} |")
		
# 		# Scheduler step
# 		schedulers.step(avg_epoch_mre)



### Combined loss

In [180]:
# for epoch in range(num_epochs):
# 	# Initialize epoch metrics for each model
# 	epoch_mae = [0.] * len(models)
# 	epoch_mre = [0.] * len(models)
# 	epoch_mse = [0.] * len(models)
	
# 	for X_batch, y_batch in train_loader:
# 		X_batch, y_batch = X_batch.to(device), y_batch.to(device)
		
# 		# Loop over each model
# 		for i, model in enumerate(models):
# 			model.train()
			
# 			# Forward pass
# 			outputs = model(X_batch, X_batch[:, 0])
# 			outputs = outputs[:, 0] + outputs[:, 1] / X_batch[:, 0]
# 			y = y_batch[:, 0]
			
# 			# Calculate losses
# 			mse_loss = F.mse_loss(outputs, y)
# 			mae_loss = F.l1_loss(outputs, y)
# 			relative_errors = torch.abs(outputs - y) / (y + 1e-8)
# 			mre_loss = relative_errors.mean()
			
# 			# Composite loss
# 			composite_loss = (weights_mse * mse_loss + 
# 							  weights_mae * mae_loss + 
# 							  weights_mre * mre_loss)
			
# 			# Backpropagation
# 			optimizers.zero_grad()
# 			composite_loss.backward()
# 			optimizers.step()
			
# 			# Accumulate losses for this model
# 			epoch_mse += mse_loss.item()
# 			epoch_mae += mae_loss.item()
# 			epoch_mre += mre_loss.item()
	
# 	print(f"[ {datetime.now().strftime("%H:%M:%S")} ] ----- Epoch [{epoch+1}/{num_epochs}] {'-'*150}")
# 	# Compute average metrics for each model
# 	for i in range(len(models)):
# 		avg_epoch_mse = epoch_mse / len(train_loader)
# 		avg_epoch_mae = epoch_mae / len(train_loader)
# 		avg_epoch_mre = epoch_mre / len(train_loader)
		
# 		print(f"{models.name:<50} | MSE: {avg_epoch_mse:<25} | MAE: {avg_epoch_mae:<25} | MRE: {avg_epoch_mre:<25} |")
		
# 		# Update weights for loss adjustment
# 		weights_mse = avg_epoch_mse / target_loss
# 		weights_mae = avg_epoch_mae / target_loss
# 		weights_mre = avg_epoch_mre / target_loss
		
# 		# Scheduler step
# 		schedulers.step(composite_loss)



### Saving model

In [46]:
model_path = f"models/{model.name}_mae_52.pth"

In [47]:
torch.save(model.state_dict(), model_path)

# **Evaluation**

### Loading a model

In [40]:
models_path = 'models'
models = []

In [None]:
import os

for model_name in [m for m in os.listdir(models_path) if m.endswith('pth')]:
	print(model_name)
	# model = BlackScholesNet(input_size=5, hidden_size=128, output_size=2)
	# model.load_state_dict(torch.load(os.path.join(models_path, model_name)))
	# model.to(device)

	# models.append(model)

In [None]:
model = BlackScholesNet(input_size=5, hidden_size=128, output_size=2)
model.load_state_dict(torch.load(f"models\\black.pth"))
model.to(device)

### Generate test data

In [49]:
num_test_samples = 10000000
batch_size = 256

In [188]:
# generate_static_test_data(scaler, num_test_samples, 'static_test_data_5.pt')

In [50]:
X_test_tensor, y_test_tensor = load_static_test_data('static_test_data_5.pt')

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

  X_test_tensor, y_test_tensor = torch.load(file_name)


### Validation

In [51]:
# Initialize metrics for each model
test_losses = 0.
test_maes = 0.
test_max_aes = 0.
test_mres = 0.
test_max_res = 0.
cnt = 0

model.eval()

with torch.inference_mode():
	for X_batch, y_batch in test_loader:
		X_batch, y_batch = X_batch.to(device), y_batch.to(device)
		# X_batch = X_batch.unsqueeze(1)  # Add a dimension for sequence length

		# Forward pass
		outputs = model(X_batch)
		# outputs = (outputs[:, 0] + outputs[:, 1]) / 2
		outputs = outputs[:, 0]
		
		y = y_batch[:, 0]

		# Mean Squared Error (MSE)
		mse_loss = F.mse_loss(outputs, y)
		test_losses += mse_loss.item()

		# Mean Absolute Error (MAE)
		abs_errors = torch.abs(outputs - y)
		test_maes += abs_errors.sum().item()

		# Maximum Absolute Error (Max AE)
		max_ae = abs_errors.max().item()
		test_max_aes = max(test_max_aes, max_ae)

		# Mean Relative Error (MRE)
		mask = y >= 1e-10
		y_m = y[mask]
		relative_errors = torch.abs(outputs[mask] - y_m ) / y_m

		# Calculate MRE
		test_mres += relative_errors.sum().item()
		cnt += len(relative_errors)

		# Calculate max relative error
		test_max_res = max(test_max_res, relative_errors.max().item())


# Calculate the average metrics over all test samples for each model
avg_test_loss = test_losses / len(test_loader.dataset)
avg_test_mae = test_maes / len(test_loader.dataset)
avg_test_mre = test_mres / cnt

print('-'*250)
print(f"{model.name:<50} Results | MSE: {avg_test_loss:<25} | MAE: {avg_test_mae:<25} | Max AE: {test_max_aes:<25} | MRE: {avg_test_mre:<25} | Max RE: {test_max_res:<25}")


----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
KAN Black Scholes                                  Results | MSE: 0.00019835337972704247    | MAE: 0.15416928065790636       | Max AE: 0.43278967416525643       | MRE: 743518.5907223832         | Max RE: 241826836.28083214       


----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model                                        Results | MSE: 1.860992115070968e-09     | MAE: 0.0004510874317589473     | Max AE: 0.015712605802571444      | MRE: 3039.6012895961358        | Max RE: 24053090.766225673       


----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model                                        Results | MSE: 4.3176854823214384e-08    | MAE: 0.002548539893560493      | Max AE: 0.02216988133309572       | MRE: 52930.97271546223         | Max RE: 74994568.97747828        


weight decay

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model										Results | MSE: 2.3365065048548754e-08	| MAE: 0.0019228472067950922	 | Max AE: 0.020785850080788537	  | MRE: 19540.95551665145		 | Max RE: 38428277.40361217		


----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Deep Model								   Results | MSE: 1.2292241128713081e-08	| MAE: 0.0014149500863335678	 | Max AE: 0.026046665725385276	  | MRE: 15973.972288640323		| Max RE: 13644321.567340782	   


----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black RNN Model									Results | MSE: 1.589732937737662e-11	 | MAE: 3.226697698242954e-05	 | Max AE: 0.001995025861357906	  | MRE: 1103.9241832972975		| Max RE: 15041726.760521403	   


----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black RNN Model									Results | MSE: 4.777907308376809e-11	 | MAE: 7.51992950659468e-05	  | Max AE: 0.0023897778645683085	 | MRE: 2156.166863801692		 | Max RE: 16343429.899646066	   


----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black RNN Model									Results | MSE: 5.004787422430029e-10	 | MAE: 0.00030045160178942263	| Max AE: 0.003092261246673578	  | MRE: 8728.737648890628		 | Max RE: 21332958.18205698		


----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black CNN Model									Results | MSE: 1.6229420763649162e-08	| MAE: 0.0016403877602419335	 | Max AE: 0.02096367339459762	   | MRE: 39130.069827259045		| Max RE: 49456677.41935457		


----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black ResNet Model								 Results | MSE: 1.357335095783607e-07	 | MAE: 0.004738837337220532	  | Max AE: 0.021764388623186998	  | MRE: 147254.71198362616		| Max RE: 146963541.83840838	   


----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model										Results | MSE: 1.6345862189413596e-07	| MAE: 0.005929486075345317	  | Max AE: 0.024445273630216757	  | MRE: 308086.0669714369		 | Max RE: 102909205.05644394	   


hub no activation

### tests

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model										Results | MSE: 6.311032924102108e-08	 | MAE: 0.0024049330370174466	 | Max AE: 0.0486327963291493		| MRE: 0.17257685070773462	   | Max RE: 13.89843139038193		


----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model										Results | MSE: 4.767372257527874e-09	 | MAE: 0.0008842677848565959	 | Max AE: 0.010480693476281389	  | MRE: 8610.273830920263		 | Max RE: 24765156.665712476   
</br>
delta 0.02

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model										Results | MSE: 1.7072262899170862e-08	| MAE: 0.0014622379697655967	 | Max AE: 0.012304333512939192	  | MRE: 19133.356655940905		| Max RE: 32084991.04810058		
</br>
Huber loss delta = 0.01


----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model										Results | MSE: 6.5584848876120205e-09	| MAE: 0.0010204205638990932	 | Max AE: 0.01436321507469207	   | MRE: 21704.53521771374		 | Max RE: 39553088.01572265
</br>
Huber loss delta = 1.0

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model										Results | MSE: 9.577022434796093e-09	 | MAE: 0.001128070044492659	  | Max AE: 0.01750700016971668	   | MRE: 0.0					   | Max RE: 0.0   
with grad norm

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model										Results | MSE: 3.437281101835667e-08	 | MAE: 0.0014345937355705574	 | Max AE: 0.026702327077854637	  | MRE: 0.0					   | Max RE: 0.0		  

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model										Results | MSE: 4.870335576499605e-09	 | MAE: 0.0006644182282886656	 | Max AE: 0.02035231469468729	   | MRE: 0.0					   | Max RE: 0.0	 

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model										Results | MSE: 3.0745519796730916e-09	| MAE: 0.0006669217488797097	 | Max AE: 0.01045388565348454	   | MRE: 0.0					   | Max RE: 0.0	

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model										Results | MSE: 5.394526925641141e-09	 | MAE: 0.0007469986503670395	 | Max AE: 0.024494726553518364	  | MRE: 0.0					   | Max RE: 0.0   

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Black Model										Results | MSE: 9.944437335789435e-10	 | MAE: 0.0002790903619311921	 | Max AE: 0.020291466710218475	  | MRE: 0.0					   | Max RE: 5.520420072604463e+31	

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Base Model										 Results | MSE: 3.700808076204782e-09	 | MAE: 0.0006494232504191063	 | Max AE: 0.02520344930434637	   | MRE: 207.39712310149338		| Max RE: 115508.51118180675 

### Greeks

In [52]:
import torch
from torch.distributions import Normal

def d1(K, T, sigma, F):
	return (torch.log(F / K) + (0.5 * sigma**2) * T) / (sigma * torch.sqrt(T))

def d2(d1, T, sigma):
	return d1 - sigma * torch.sqrt(T)

def delta(d1, F=1, option_type='call'):
	normal_dist = Normal(0, 1)

	if option_type == 'call':
		return normal_dist.cdf(d1)
	elif option_type == 'put':
		return normal_dist.cdf(d1) - 1
	else:
		raise ValueError("Option type must be 'call' or 'put'")

def gamma(T, sigma, d1, F=1):
	normal_dist = Normal(0, 1)
	pdf_d1 = torch.exp(normal_dist.log_prob(d1)) 

	return pdf_d1 / (F * sigma * torch.sqrt(T))

def theta(K, T, sigma, d1, d2, F=1, r=0, option_type='call'):
	normal_dist = Normal(0, 1)
	pdf_d1 = torch.exp(normal_dist.log_prob(d1)) 

	if option_type == 'call':
		return (-F * pdf_d1 * sigma / (2 * torch.sqrt(T)) - r * K * torch.exp(-r * T) * normal_dist.cdf(d2))
	elif option_type == 'put':
		return (-F * pdf_d1 * sigma / (2 * torch.sqrt(T)) + r * K * torch.exp(-r * T) * normal_dist.cdf(-d2))
	else:
		raise ValueError("Option type must be 'call' or 'put'")

def vega(T, d1, F=1):
	normal_dist = Normal(0, 1)
	pdf_d1 = torch.exp(normal_dist.log_prob(d1))
	
	return F * pdf_d1 * torch.sqrt(T)

def greeks(K, T, sigma, F=1, r=0, option_type='call'):
	dp = d1(K, T, sigma, F)
	dm = d2(dp, T, sigma)
	
	return delta(dp, F, option_type), gamma(T, sigma, dp, F), theta(K, T, sigma, dp, dm, F, r, option_type), vega(T, dp, F)


In [53]:
num_samples = 10000
K = np.random.uniform(1, 2.5, num_samples)
T = np.random.uniform(0.004, 4, num_samples)
sigma = np.random.uniform(0.1, 0.5, num_samples)

# Подготовка данных
X = np.vstack((K, T, np.log(K), sigma * np.sqrt(T), sigma**2 * T)).T
# X = scaler.fit_transform(X)
X_tensor = torch.tensor(X, dtype=torch.float64, requires_grad=True)

prices = torch.tensor(black_model(1, K, T, sigma), dtype=torch.float64)

In [245]:
X

array([[2.1123328 , 0.17447376, 0.74779293, 0.17846916, 0.03185124],
       [2.32165281, 3.31470446, 0.84227935, 0.28785647, 0.08286135],
       [1.69476981, 1.0002331 , 0.52754693, 0.47864129, 0.22909749],
       ...,
       [2.38137775, 1.52928014, 0.86767921, 0.1947643 , 0.03793313],
       [1.30069654, 0.52201047, 0.26289992, 0.14615633, 0.02136167],
       [1.14877841, 3.78935869, 0.13869913, 0.91709289, 0.84105936]])

In [55]:
# Входные данные для модели
y = model(X_tensor)
# y = (y[:, 0] + y[:, 1] ) / 2
y = y[:, 0]

abs_errors = torch.abs(y - prices)

# Mean Squared Error (MSE)
mse = F.mse_loss(y, prices).item()

# Mean Absolute Error (MAE)
mae = F.l1_loss(y, prices)

# Mean Relative Error (MRE)
mask = prices >= 1e-10
y_m = prices[mask]
relative_errors = torch.abs(y[mask] - y_m ) / y_m
mre = relative_errors.mean().item()
max_mre = relative_errors.max().item()

print(model.name)
print(f"Mean Squared Error (MSE): {mse}")
print(f"Mean Absolute Error (MAE): {mae.item()}")
print(f"Max Absolute Error (MAE): {abs_errors.max().item()}")
print(f"Mean Relative Error (MRE): {mre}")
print(f"Max Relative Error (MRE): {max_mre}")

KAN Black Scholes
Mean Squared Error (MSE): 1.338610235951651e-06
Mean Absolute Error (MAE): 0.0004891456605618326
Max Absolute Error (MAE): 0.039436549035213875
Mean Relative Error (MRE): 2306.777969088586
Max Relative Error (MRE): 849233.3873387466


In [None]:
for actual in prices:
	for forecast in y:
		error = abs(forecast-actual)/actual
		if error>1e-10:
			print(f"{forecast} vs {actual} error = {error}")

In [57]:
X_test = torch.tensor(X, dtype=torch.float64)
greeks_result = greeks(X_tensor[:, 0],X_tensor[:, 1], X_tensor[:, 3] / torch.sqrt(X_tensor[:, 1]))

In [59]:
predicted_prices = model(X_tensor)
predicted_prices.backward(torch.ones_like(predicted_prices), retain_graph=True)
deltas = X_tensor.grad[:, 0].clone()
print(deltas)

X_tensor.grad.zero_()


tensor([ 0.8280, -0.4822,  0.7609,  ..., -0.4800, -0.5530,  0.4545],
       dtype=torch.float64)


tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        ...,
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]], dtype=torch.float64)

In [61]:
print(greeks_result[0])

print(F.mse_loss(deltas, greeks_result[0]))

tensor([3.1788e-01, 2.9324e-02, 2.1129e-01,  ..., 1.3367e-07, 1.4249e-01,
        6.2746e-02], dtype=torch.float64, grad_fn=<MulBackward0>)
tensor(0.3526, dtype=torch.float64, grad_fn=<MseLossBackward0>)


In [215]:
import torch
import numpy as np
import random

# Фиксируем seed для повторяемости
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [101]:
model.zero_grad()  # Сброс градиентов модели

y = model(X_tensor, X_tensor[:, 0])
# y = (y[:, 0] + y[:, 1] ) / 2
y = y[:, 0]

y.backward(torch.ones_like(y), retain_graph=True)
K_grad = X_tensor.grad[:, 0].clone() 
# T_grad = X_tensor.grad[:, 1].clone()  
# sigma_grad = (X_tensor.grad[:, 3] / torch.sqrt(X_tensor[:, 1])).clone()

X_tensor.grad.zero_()

y = model(X_tensor, X_tensor[:, 0])
# y = (y[:, 0] + y[:, 1] ) / 2
y = y[:, 0]
y.backward(torch.ones_like(y), retain_graph=True)
delta_grad = X_tensor.grad[:, 0].clone()

# X_tensor.grad.zero_()

# # Вычисление второго градиента (гамма)
# y = model(X_tensor, X_tensor[:, 0])
# y = (y[:, 0] + y[:, 1] ) / 2
# # y = y[:, 0]
# y.backward(torch.ones_like(y), retain_graph=True)
# delta_grad.backward(torch.ones_like(delta_grad), retain_graph=True)
# gamma_grad = X_tensor.grad[:, 0].clone()

delta_loss = F.mse_loss(delta_grad, greeks_result[0]).item()
# gamma_loss = F.mse_loss(gamma_grad, greeks_result[1]).item()
# theta_loss = F.mse_loss(T_grad, greeks_result[2]).item()
# vega_loss = F.mse_loss(sigma_grad, greeks_result[3]).item()

print("Losses for current model:")
print('Delta: ', delta_loss)
# print('Gamma: ', gamma_loss)
# print('Theta: ', theta_loss)
# print('Vega: ', vega_loss)

print(model.name)

Losses for current model:
Delta:  0.037595336416915356
Black Model without activation


In [None]:
# Losses for current model:
# Delta:  0.023927056489781124
# Black Model without activation

In [None]:
# Losses for current model:
# Delta:  0.04097502044021693
# Black Model

In [None]:
# Losses for current model:
# Delta:  0.23041457848259805
# Black Model
# weight decay

In [None]:
# Losses for current model:
# Delta:  0.09470602026672634
# Gamma:  0.4605984598995799
# Black Model

In [None]:
# Zero gradients before starting
model.zero_grad()

# Forward pass
y = model(X_tensor.unsqueeze(1))  # Add sequence dimension for LSTM
y = y[:, 0]  # Get the first output

# Delta Calculation (First Derivative)
y.backward(torch.ones_like(y), retain_graph=True)  # Backpropagate
K_grad = X_tensor.grad[:, 0].clone()  # Gradient w.r.t. K (Delta)
X_tensor.grad.zero_()  # Clear gradients for the next step

# Gamma Calculation (Second Derivative)
y = model(X_tensor.unsqueeze(1))  # Forward pass again
y = y[:, 0]
y.backward(torch.ones_like(y), retain_graph=True)
delta_grad = X_tensor.grad[:, 0].clone().requires_grad_(True)  # Delta gradient

X_tensor.grad.zero_()

# Now backpropagate Delta to calculate Gamma
delta_grad.backward(torch.ones_like(delta_grad), retain_graph=True)
gamma_grad = X_tensor.grad[:, 0].clone()  # Gamma

# Calculate MSE loss for Delta and Gamma against target Greeks
delta_loss = F.mse_loss(delta_grad, greeks_result[0]).item()  # Replace with actual target values for Delta
gamma_loss = F.mse_loss(gamma_grad, greeks_result[1]).item()  # Replace with actual target values for Gamma

print("Losses for the RNN model:")
print('Delta Loss: ', delta_loss)
print('Gamma Loss: ', gamma_loss)

In [None]:
# Losses for current model:
# Delta:  0.07544267106592446
# Gamma:  0.4395989266865998
# Black Deep Model

In [None]:
# Losses for the RNN model:
# Delta Loss:  0.08094750341367543
# Gamma Loss:  0.4721712001137761

In [None]:
# Losses for current model:
# Delta:  0.03421519741554771
# Gamma:  0.37361078598119973
# Black ResNet Model

In [None]:
# Losses for current model:
# Delta:  0.02984523250068021
# Gamma:  0.32884577927187386 hub no act 