<a href="https://colab.research.google.com/github/satyabratkumarsingh/option-portfolio-encoder-decoder/blob/main/Train_Set_Transformer_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [69]:
!pip install torch
!pip install comet_ml
!pip install tqdm
!pip install matplotlib



In [70]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [71]:
import os
def delete_file_from_drive(full_file_path):
  if os.path.exists(full_file_path):
      try:
          os.remove(full_file_path)
          print(f"File '{full_file_path}' successfully deleted from Google Drive.")
      except Exception as e:
          print(f"Error deleting file '{full_file_path}': {e}")
  else:
      print(f"File '{full_file_path}' not found at '{full_file_path}'.")


In [1]:
import random
import numpy as np
import torch
import itertools
from itertools import product
from torch.utils.data import Dataset, DataLoader
import gc # For garbage collection
import numpy as np
import yfinance as yf


# Parameters
MU = 0.05
T = 1.0 # Time to maturity
NOISE_STD = 0.005
MIN_PRICE_RANGE = 100
MAX_PRICE_RANGE = 500

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def calibrate_sigma_from_sp500(period="1y", ticker="SPY"):
    """
    Calibrate volatility (sigma) using S&P500 proxy (SPY ETF).
    Computes realized annualized volatility from historical data.

    Args:
        period (str): Period to download (e.g. "1y", "2y").
        ticker (str): Symbol to use, default SPY (ETF for S&P 500).

    Returns:
        float: Annualized volatility (sigma).
    """
    # Download daily adjusted close prices
    data = yf.download(ticker, period=period, interval="1d", auto_adjust=True)

    closes = data["Close"].dropna()

    # Compute log returns
    log_returns = np.log(closes / closes.shift(1)).dropna()

    # Daily volatility
    sigma_daily = log_returns.std()

    # Annualized volatility
    sigma_annual = sigma_daily * np.sqrt(252)

    # Convert safely to float
    return sigma_annual.item() if hasattr(sigma_annual, "item") else float(sigma_annual)

# Example: update SIGMA
SIGMA = calibrate_sigma_from_sp500()
print(f"Calibrated SP500 1Y Volatility (Sigma): {SIGMA:.4f}")


[*********************100%***********************]  1 of 1 completed

Calibrated SP500 1Y Volatility (Sigma): 0.1932





In [2]:

TFR_RATIO = 0.5

def generate_hybrid_strikes(S_0, option_types, rng, training_friendly_ratio=TFR_RATIO):
    n = len(option_types)
    K_prices = np.zeros(n, dtype=np.float32)

    for i in range(n):
        if rng.random() < training_friendly_ratio:  # 40% chance
            K_prices[i] = S_0 * rng.uniform(0.90, 1.20)  # Mixed ITM/OTM
        else:
            # Realistic strikes
            if option_types[i] == "call":
                K_prices[i] = S_0 * rng.uniform(0.95, 1.15)  # Mix of ITM/OTM calls
            else:
                K_prices[i] = S_0 * rng.uniform(0.85, 1.05)  # Mix of ITM/OTM puts

    return K_prices


def generate_option_prices_for_idx(idx, n, weights=None, training_friendly_ratio=TFR_RATIO):

    rng = np.random.default_rng(idx)

    torch.manual_seed(idx)
    if DEVICE.type == 'cuda':
        torch.cuda.manual_seed_all(idx)

    S_0 = rng.uniform(MIN_PRICE_RANGE, MAX_PRICE_RANGE)

    # Generate option types
    option_types = rng.choice(["call", "put"], size=n)
    option_types_numeric = np.where(option_types == "call", 1, 0).astype(np.float32)

    # Generate hybrid strike prices
    K_prices = generate_hybrid_strikes(S_0, option_types, rng, TFR_RATIO)

    # Generate or use weights
    if weights is None:
        weight_sets = generate_combinatorial_weights_manageable(n)
        weights_array = weight_sets[0]
    else:
        weights_array = np.array(weights, dtype=np.float32)

    return K_prices, option_types_numeric, S_0, weights_array



def generate_combinatorial_weights_manageable(n, base_weights=[-0.75, -0.5, -0.25, 0, 0.25, 0.5, 0.75]):
    weight_sets = []

    # Handle the case where n < 2
    if n < 2:
        weights = np.zeros(n, dtype=np.float32)
        if n == 1:
            # If only one position, assign a long position (1.0)
            weights[0] = 1.0
        weight_sets.append(weights)
        return weight_sets

    # Generate a single portfolio: either one long or one short, and the rest from combinatorics
    weights = np.zeros(n, dtype=np.float32)

    # Randomly choose if we want a long or short portfolio
    is_long = random.choice([True, False])

    if is_long:
        # Choose one position to be long (1.0)
        long_idx = random.randint(0, n - 1)
        weights[long_idx] = 1.0
    else:
        # Choose one position to be short (-1.0)
        short_idx = random.randint(0, n - 1)
        weights[short_idx] = -1.0

    # Fill remaining positions with combinatorial weights from base_weights
    remaining_positions = [i for i in range(n) if weights[i] == 0]  # Find positions not yet filled
    combinatorics = np.random.choice(base_weights, size=len(remaining_positions), replace=True)

    # Assign combinatorial weights to the remaining positions without normalization
    weights[remaining_positions] = combinatorics

    weight_sets.append(weights)

    return weight_sets

def compute_cashflow_delta(S_T, portfolio):
    """
    Computes the delta of each option in the portfolio w.r.t terminal stock price S_T.

    Args:
        S_T (Tensor): [B] or [B, 1] terminal stock prices
        portfolio (Tensor): [B, N, 3] with columns (K, type, weight)

    Returns:
        Tensor: [B, N] delta for each option
    """
    # FIXED: Ensure S_T has correct shape
    if S_T.dim() > 2:
        S_T = S_T.squeeze()  # Remove extra dimensions
    if S_T.dim() == 2 and S_T.shape[1] == 1:
        S_T = S_T.squeeze(-1)  # [B, 1] ‚Üí [B]

    strikes = portfolio[..., 0]  # [B, N]
    types = portfolio[..., 1]    # [B, N]

    # FIXED: Expand S_T to match portfolio shape [B, N]
    if S_T.dim() == 1:
        S_T_exp = S_T.unsqueeze(-1).expand(-1, strikes.shape[1])  # [B] ‚Üí [B, N]
    else:
        S_T_exp = S_T.expand(-1, strikes.shape[1])  # [B, 1] ‚Üí [B, N]

    # Call delta: 1 if S_T > K
    call_delta = ((types == 1) & (S_T_exp > strikes)).float()
    # Put delta: -1 if S_T < K
    put_delta = -((types == 0) & (S_T_exp < strikes)).float()

    delta_each = call_delta + put_delta  # [B, N]
    return delta_each

def compute_cashflow(portfolio, S_T):
    """
    Compute cashflow and portfolio-level derivative for a batch of portfolios.

    Args:
        portfolio (Tensor): [B, N, 3] tensor with (K, type, weight)
        S_T (Tensor): [B] or [B, 1] terminal stock prices

    Returns:
        cashflow (Tensor): [B] total weighted payoff per portfolio
        derivative (Tensor): [B] portfolio delta (sum of weighted option deltas)
    """
    # FIXED: Ensure S_T has correct shape
    if S_T.dim() > 2:
        S_T = S_T.squeeze()  # Remove extra dimensions
    if S_T.dim() == 2 and S_T.shape[1] == 1:
        S_T = S_T.squeeze(-1)  # [B, 1] ‚Üí [B]

    strikes = portfolio[..., 0]  # [B, N]
    types = portfolio[..., 1]    # [B, N]
    weights = portfolio[..., 2]  # [B, N]

    # FIXED: Expand S_T to match portfolio shape [B, N]
    if S_T.dim() == 1:
        S_T_exp = S_T.unsqueeze(-1).expand(-1, strikes.shape[1])  # [B] ‚Üí [B, N]
    else:
        S_T_exp = S_T.expand(-1, strikes.shape[1])  # [B, 1] ‚Üí [B, N]

    # Compute payoffs
    call_payoffs = torch.relu(S_T_exp - strikes)  # [B, N]
    put_payoffs = torch.relu(strikes - S_T_exp)   # [B, N]
    payoffs = torch.where(types == 1, call_payoffs, put_payoffs)  # [B, N]

    # Weighted cashflow
    weighted_payoffs = payoffs * weights  # [B, N]
    cashflow = weighted_payoffs.sum(dim=1)  # [B]

    # Compute derivative (delta per option)
    delta_each = compute_cashflow_delta(S_T, portfolio)  # [B, N]
    weighted_delta = delta_each * weights  # [B, N]

    # Portfolio-level derivative
    derivative = weighted_delta.sum(dim=1)  # [B]

    return cashflow.float(), derivative.float()

def compute_cashflow_vectorized(portfolio, S_T_batch):
    """
    Vectorized computation for multiple S_T scenarios.

    Args:
        portfolio (Tensor): [B, N, 3] tensor with (K, type, weight)
        S_T_batch (Tensor): [B, M] multiple terminal prices per portfolio

    Returns:
        cashflow (Tensor): [B, M] cashflows for each scenario
        derivative (Tensor): [B, M] derivatives for each scenario
    """
    B, N, _ = portfolio.shape
    B2, M = S_T_batch.shape
    assert B == B2, f"Batch size mismatch: portfolio {B}, S_T {B2}"

    # Expand tensors for vectorized computation
    portfolio_exp = portfolio.unsqueeze(2).expand(-1, -1, M, -1)  # [B, N, M, 3]
    S_T_exp = S_T_batch.unsqueeze(1).expand(-1, N, -1)  # [B, N, M]

    strikes = portfolio_exp[..., 0]  # [B, N, M]
    types = portfolio_exp[..., 1]    # [B, N, M]
    weights = portfolio_exp[..., 2]  # [B, N, M]

    # Compute payoffs for all scenarios
    call_payoffs = torch.relu(S_T_exp - strikes)  # [B, N, M]
    put_payoffs = torch.relu(strikes - S_T_exp)   # [B, N, M]
    payoffs = torch.where(types == 1, call_payoffs, put_payoffs)  # [B, N, M]

    # Compute deltas for all scenarios
    call_delta = ((types == 1) & (S_T_exp > strikes)).float()  # [B, N, M]
    put_delta = -((types == 0) & (S_T_exp < strikes)).float()  # [B, N, M]
    delta_each = call_delta + put_delta  # [B, N, M]

    # Aggregate across options (sum over N dimension)
    cashflow = (payoffs * weights).sum(dim=1)  # [B, M]
    derivative = (delta_each * weights).sum(dim=1)  # [B, M]

    return cashflow.float(), derivative.float()

In [3]:
import numpy as np
import torch
from torch.utils.data import Dataset

class OperatorDatasetStandardized(Dataset):
    def __init__(self, num_samples, num_samples_S_T,
                 K_scaler=None, S_T_scaler=None, cashflow_scaler=None,
                 is_fitting_mode=False, max_portfolio_size=100, min_portfolio_size=1):
        self.num_samples = num_samples
        self.num_samples_S_T = num_samples_S_T
        self.max_portfolio_size = max_portfolio_size
        self.min_portfolio_size = min_portfolio_size
        self.is_fitting_mode = is_fitting_mode

        if not is_fitting_mode and any(s is None for s in (K_scaler, S_T_scaler, cashflow_scaler)):
            raise ValueError("K_scaler, S_T_scaler, and cashflow_scaler must be provided in evaluation mode.")

        self.K_scaler = K_scaler
        self.S_T_scaler = S_T_scaler
        self.cashflow_scaler = cashflow_scaler

    def __len__(self):
        return self.num_samples

    def _simulate_terminal_prices(self, S_0):
        # Example: Geometric Brownian Motion with noise
        Z = torch.clamp(torch.randn(self.num_samples_S_T), -3, 3)
        drift = (MU - 0.5 * SIGMA**2) * T
        diffusion = SIGMA * torch.sqrt(torch.tensor(T, dtype=torch.float32))
        S_T = S_0 * torch.exp(drift + diffusion * Z)
        S_T += torch.randn_like(S_T) * (NOISE_STD * S_T)
        return S_T.float()

    def __getitem__(self, idx):
        portfolio_len = np.random.randint(self.min_portfolio_size, self.max_portfolio_size + 1)
        K, option_types, S_0, weights = generate_option_prices_for_idx(idx, portfolio_len)

        # Ensure float32
        K = torch.tensor(K, dtype=torch.float32)
        option_types = torch.tensor(option_types, dtype=torch.float32)
        weights = torch.tensor(weights, dtype=torch.float32)
        S_0 = torch.tensor(S_0, dtype=torch.float32)

        # Portfolio tensor
        portfolio = torch.stack([K, option_types, weights], dim=-1)
        pad_len = self.max_portfolio_size - portfolio_len
        if pad_len > 0:
            pad_tensor = torch.zeros(pad_len, 3, dtype=torch.float32)
            portfolio = torch.cat([portfolio, pad_tensor], dim=0)

        mask = torch.tensor([True]*portfolio_len + [False]*pad_len, dtype=torch.bool)

        # Terminal prices - shape [num_samples_S_T]
        S_T = self._simulate_terminal_prices(S_0)

        # Store denormalized versions
        portfolio_denorm = portfolio.clone()
        S_T_denorm = S_T.clone()

        # FIXED: Use vectorized computation
        # Expand portfolio for batch computation: [1, max_portfolio_size, 3]
        portfolio_batch = portfolio_denorm.unsqueeze(0)  # Add batch dimension
        # Expand S_T for batch computation: [1, num_samples_S_T]
        S_T_batch = S_T_denorm.unsqueeze(0)  # Add batch dimension

        # Use vectorized computation
        cashflow_denorm, derivative_denorm = compute_cashflow_vectorized(
            portfolio_batch, S_T_batch
        )

        # Remove batch dimension: [1, M] ‚Üí [M]
        cashflow_denorm = cashflow_denorm.squeeze(0).float()
        derivative_denorm = derivative_denorm.squeeze(0).float()

        if not self.is_fitting_mode:
            # Normalize only valid options
            valid_len = mask.sum().item()
            if valid_len > 0:
                K_norm = torch.tensor(
                    self.K_scaler.transform(K[:valid_len].unsqueeze(1)),
                    dtype=torch.float32
                ).squeeze()
                portfolio[:valid_len, 0] = K_norm

            # Normalize S_T: [M] ‚Üí [M, 1] ‚Üí transform ‚Üí [M]
            S_T = torch.tensor(
                self.S_T_scaler.transform(S_T_denorm.unsqueeze(1)),
                dtype=torch.float32
            ).squeeze()

            # Normalize cashflow: [M] ‚Üí [M, 1] ‚Üí transform ‚Üí [M]
            cashflow = torch.tensor(
                self.cashflow_scaler.transform(cashflow_denorm.unsqueeze(1)),
                dtype=torch.float32
            ).squeeze()

            # Scale derivative to match cashflow std
            derivative = derivative_denorm.clone()
            cf_std = cashflow.std().item()
            deriv_std = derivative.std().item()
            if deriv_std > 0:
                derivative = derivative * (cf_std / deriv_std)
        else:
            cashflow = cashflow_denorm.clone()
            derivative = derivative_denorm.clone()
            S_T = S_T_denorm.clone()

        return {
            "portfolio": portfolio.float(),          # [max_portfolio_size, 3]
            "mask": mask,                            # [max_portfolio_size]
            "S_T": S_T.float(),                      # [num_samples_S_T]
            "cashflow": cashflow.float(),            # [num_samples_S_T]
            "derivative": derivative.float(),        # [num_samples_S_T]
            "portfolio_denorm": portfolio_denorm.float(),
            "S_T_denorm": S_T_denorm.float(),
            "cashflow_denorm": cashflow_denorm.float(),
            "derivative_denorm": derivative_denorm.float()
        }



#Test with sample data

In [4]:
# --- Quick test run ---
dataset = OperatorDatasetStandardized(
    num_samples=2,
    num_samples_S_T=2,
    K_scaler=None, S_T_scaler=None, cashflow_scaler=None,
    max_portfolio_size=3,
    min_portfolio_size=1,
    is_fitting_mode=True
)

for i in range(len(dataset)):
    sample = dataset[i]  # dictionary returned
    print(f"\n=== Sample {i} ===")
    print("Portfolio features:\n", sample["portfolio"])
    print("Mask:", sample["mask"])
    print("S_T:", sample["S_T"])
    print("Cashflow:", sample["cashflow"])
    print("Derivative:", sample["derivative"])
    print("Portfolio (denorm):\n", sample["portfolio_denorm"])
    print("S_T (denorm):", sample["S_T_denorm"])
    print("Cashflow (denorm):", sample["cashflow_denorm"])
    print("Derivative (denorm):", sample["derivative_denorm"])



=== Sample 0 ===
Portfolio features:
 tensor([[ 1.5083e+03,  0.0000e+00,  2.5000e-01],
        [ 1.4125e+03,  1.0000e+00, -1.0000e+00],
        [ 1.3959e+03,  1.0000e+00, -7.5000e-01]])
Mask: tensor([True, True, True])
S_T: tensor([1812.2474, 1289.1338])
Cashflow: tensor([-711.9773,   54.7958])
Derivative: tensor([-1.7500, -0.2500])
Portfolio (denorm):
 tensor([[ 1.5083e+03,  0.0000e+00,  2.5000e-01],
        [ 1.4125e+03,  1.0000e+00, -1.0000e+00],
        [ 1.3959e+03,  1.0000e+00, -7.5000e-01]])
S_T (denorm): tensor([1812.2474, 1289.1338])
Cashflow (denorm): tensor([-711.9773,   54.7958])
Derivative (denorm): tensor([-1.7500, -0.2500])

=== Sample 1 ===
Portfolio features:
 tensor([[ 1.1459e+03,  0.0000e+00,  5.0000e-01],
        [ 1.4422e+03,  0.0000e+00, -1.0000e+00],
        [ 1.3374e+03,  1.0000e+00, -5.0000e-01]])
Mask: tensor([True, True, True])
S_T: tensor([1472.9598, 1368.7098])
Cashflow: tensor([-67.7839, -89.1250])
Derivative: tensor([-0.5000,  0.5000])
Portfolio (denorm)

In [5]:
import torch
import pytest

# Dummy constants for testing
MU = 0.05
T = 1.0
NOISE_STD = 0.0  # deterministic

# -------------------------
# Minimal compute_cashflow
# -------------------------
def compute_cashflow(portfolio_batch, S_T_batch):
    """
    portfolio_batch: [B, N, 3] tensor
    S_T_batch: [B, 1] tensor
    Returns: cashflow [B], derivative [B]
    """
    B, N, _ = portfolio_batch.shape
    cashflow = torch.zeros(B)
    derivative = torch.zeros(B)

    for i in range(B):
        total = 0.0
        delta_total = 0.0
        for j in range(N):
            K, t, w = portfolio_batch[i, j]
            s = S_T_batch[i, 0]
            if t == 1:  # call
                payoff = max(s - K, 0)
                delta = 1.0 if s > K else 0.0
            else:       # put
                payoff = max(K - s, 0)
                delta = -1.0 if s > K else 0.0
            total += w * payoff
            delta_total += w * delta
        cashflow[i] = total
        derivative[i] = delta_total
    return cashflow, derivative

# -------------------------
# Manual reference for test
# -------------------------
def manual_cashflow_delta(S_T, portfolio):
    cashflow = []
    derivative = []
    for s in S_T:
        total = 0.0
        delta_total = 0.0
        for k, t, w in portfolio:
            if t == 1:  # call
                payoff = max(s - k, 0)
                delta = 1.0 if s > k else 0.0
            else:       # put
                payoff = max(k - s, 0)
                delta = -1.0 if s > k else 0.0
            total += w * payoff
            delta_total += w * delta
        cashflow.append(total)
        derivative.append(delta_total)
    return torch.tensor(cashflow, dtype=torch.float32), torch.tensor(derivative, dtype=torch.float32)

# -------------------------
# Test cashflow/derivative for known portfolio
# -------------------------
def test_cashflow_derivative_manual():
    portfolio = torch.tensor([[100.0, 1.0, 1.0], [120.0, 0.0, -0.5]], dtype=torch.float32)
    S_T = torch.tensor([90.0, 110.0, 130.0], dtype=torch.float32)

    portfolio_batch = portfolio.unsqueeze(0).expand(len(S_T), -1, -1)
    cashflow, derivative = compute_cashflow(portfolio_batch, S_T.unsqueeze(-1))
    expected_cashflow, expected_derivative = manual_cashflow_delta(S_T, portfolio)

    assert torch.allclose(cashflow.squeeze(), expected_cashflow, atol=1e-4)
    assert torch.allclose(derivative.squeeze(), expected_derivative, atol=1e-4)
    print("‚úÖ Cashflow and derivative tests passed!")

# -------------------------
# Test dataset integration
# -------------------------
class DummyDataset:
    def __getitem__(self, idx):
        portfolio = torch.tensor([[100.0, 1.0, 1.0], [120.0, 0.0, -0.5]], dtype=torch.float32)
        S_T = torch.tensor([90.0, 110.0, 130.0], dtype=torch.float32)
        portfolio_batch = portfolio.unsqueeze(0).expand(len(S_T), -1, -1)
        cashflow, derivative = compute_cashflow(portfolio_batch, S_T.unsqueeze(-1))
        mask = torch.tensor([True, True], dtype=torch.bool)
        # Return same signature as OperatorDatasetStandardized
        return {
            "portfolio": portfolio,
            "mask": mask,
            "S_T": S_T,
            "cashflow": cashflow.squeeze(),
            "derivative": derivative.squeeze(),
            "portfolio_denorm": portfolio,
            "S_T_denorm": S_T,
            "cashflow_denorm": cashflow.squeeze(),
            "derivative_denorm": derivative.squeeze()
        }

def test_dataset_cashflow_derivative():
    dataset = DummyDataset()
    sample = dataset[0]
    expected_cashflow, expected_derivative = manual_cashflow_delta(sample["S_T"], sample["portfolio"])
    assert torch.allclose(sample["cashflow"], expected_cashflow, atol=1e-4)
    assert torch.allclose(sample["derivative"], expected_derivative, atol=1e-4)
    print("‚úÖ Dataset cashflow and derivative tests passed!")

# Run tests
test_cashflow_derivative_manual()
test_dataset_cashflow_derivative()


‚úÖ Cashflow and derivative tests passed!
‚úÖ Dataset cashflow and derivative tests passed!


In [6]:
import os
from sklearn.preprocessing import StandardScaler
import joblib
import numpy as np
from tqdm import tqdm

DRIVE_PATH = "/content/drive/MyDrive/Ucl/"
K_SCALAR_FILE = os.path.join(DRIVE_PATH, 'K_Scalar_Advanced.pkl')
ST_SCALAR_FILE = os.path.join(DRIVE_PATH, 'S_T_Scalar_Advanced.pkl')
CASHFLOW_SCALAR_FILE = os.path.join(DRIVE_PATH, 'Cashflow_Scalar_Advanced.pkl')


def fit_K_ST_scalers(train_loader, save_path_K=K_SCALAR_FILE, save_path_ST=ST_SCALAR_FILE):
    print("Fitting K and S_T scalers from training set...")
    all_K = []
    all_S_T = []

    for batch in tqdm(train_loader, desc="Collecting K and S_T for scalers"):
        # Access dict keys safely
        portfolio_real = batch["portfolio"]  # [B, N, 3]
        s_t_real = batch["S_T"]              # [B, num_S_T]

        # K is the first column of the portfolio features
        K_real = portfolio_real[:, :, 0].cpu().numpy().reshape(-1, 1)
        S_T_real = s_t_real.cpu().numpy().reshape(-1, 1)

        all_K.append(K_real)
        all_S_T.append(S_T_real)

    K_all_np = np.concatenate(all_K, axis=0)
    S_T_all_np = np.concatenate(all_S_T, axis=0)

    K_scalar = StandardScaler()
    K_scalar.fit(K_all_np)
    #joblib.dump(K_scalar, save_path_K)

    S_T_scalar = StandardScaler()
    S_T_scalar.fit(S_T_all_np)
    #joblib.dump(S_T_scalar, save_path_ST)

    # print(f"‚úÖ Saved K scalar to: {save_path_K}")
    # print(f"‚úÖ Saved S_T scalar to: {save_path_ST}")
    # print(f"K mean: {K_scalar.mean_[0]:.4f}, std: {K_scalar.scale_[0]:.4f}")
    # print(f"S_T mean: {S_T_scalar.mean_[0]:.4f}, std: {S_T_scalar.scale_[0]:.4f}")

    return K_scalar, S_T_scalar


def fit_cashflow_scaler(train_loader, save_path=CASHFLOW_SCALAR_FILE):
    all_cashflows = []

    for batch in tqdm(train_loader, desc="Fitting Cashflow Scaler"):
        cashflow = batch["cashflow"]  # [B, N] or [B*num_S_T]
        cashflow_np = cashflow.detach().cpu().numpy().reshape(-1, 1)
        all_cashflows.append(cashflow_np)

    cashflows_np = np.concatenate(all_cashflows, axis=0)

    scaler = StandardScaler()
    scaler.fit(cashflows_np)
    #joblib.dump(scaler, save_path)

    # print(f"‚úÖ Saved Cashflow Scaler to: {save_path}")
    # print(f"Cashflow Mean: {scaler.mean_[0]:.4f}, Std Dev: {scaler.scale_[0]:.4f}")

    return scaler


In [7]:
import torch
import torch.nn as nn
import numpy as np
from torch.amp import autocast, GradScaler
from torch.utils.data import DataLoader
from tqdm import tqdm

FEED_FWD_DEPTH = 3


DRPO_OUT_PROB = 0.1


class TrunkNet(nn.Module):
    def __init__(self, input_dim=1, latent_dim=64, hidden_dim=128,
                 num_layers=6, dropout_prob=DRPO_OUT_PROB):
        super(TrunkNet, self).__init__()

        self.input_proj = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.LayerNorm(hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout_prob)
        )

        self.blocks = nn.ModuleList([
            nn.Sequential(
                nn.Linear(hidden_dim, hidden_dim),
                nn.LayerNorm(hidden_dim),
                nn.GELU(),
                nn.Dropout(dropout_prob)
            ) for _ in range(num_layers)
        ])

        self.output_proj = nn.Linear(hidden_dim, latent_dim)

    def forward(self, S_T):
        if S_T.dim() == 1:
            S_T = S_T.unsqueeze(-1)
        elif S_T.dim() == 2:
            S_T = S_T.unsqueeze(-1)

        x = self.input_proj(S_T)
        for block in self.blocks:
            x = x + block(x)
        return self.output_proj(x)


class ISAB(nn.Module):
    """Induced Set Attention Block"""
    def __init__(self, d_model, num_heads, num_inds, dropout=DRPO_OUT_PROB):
        super().__init__()
        self.num_inds = num_inds
        self.inducing_points = nn.Parameter(torch.randn(num_inds, d_model))

        self.attention1 = nn.MultiheadAttention(d_model, num_heads, dropout=dropout, batch_first=True)
        self.attention2 = nn.MultiheadAttention(d_model, num_heads, dropout=dropout, batch_first=True)

        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.norm3 = nn.LayerNorm(d_model)
        self.norm4 = nn.LayerNorm(d_model)

        self.ffn1 = nn.Sequential(
            nn.Linear(d_model, d_model * FEED_FWD_DEPTH),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(d_model * FEED_FWD_DEPTH, d_model),
            nn.Dropout(dropout)
        )
        self.ffn2 = nn.Sequential(
            nn.Linear(d_model, d_model * FEED_FWD_DEPTH),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(d_model * FEED_FWD_DEPTH, d_model),
            nn.Dropout(dropout)
        )

    def forward(self, x, mask=None):
        batch_size = x.size(0)
        I = self.inducing_points.unsqueeze(0).expand(batch_size, -1, -1)

        # I attends to X
        attn_out1, _ = self.attention1(
            I, x, x,
            key_padding_mask=(~mask.bool()) if mask is not None else None
        )
        I = self.norm1(I + attn_out1)
        I = self.norm2(I + self.ffn1(I))

        # X attends to I
        attn_out2, _ = self.attention2(x, I, I)
        x = self.norm3(x + attn_out2)
        x = self.norm4(x + self.ffn2(x))

        return x


class PMA(nn.Module):
    """Pooling by Multihead Attention"""
    def __init__(self, d_model, num_heads, num_seeds, dropout=DRPO_OUT_PROB):
        super().__init__()
        self.num_seeds = num_seeds
        self.seed_vectors = nn.Parameter(torch.randn(num_seeds, d_model))

        self.attention = nn.MultiheadAttention(d_model, num_heads, dropout=dropout, batch_first=True)
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)

        self.ffn = nn.Sequential(
            nn.Linear(d_model, d_model * FEED_FWD_DEPTH),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(d_model * FEED_FWD_DEPTH, d_model),
            nn.Dropout(dropout)
        )

    def forward(self, x, mask=None):
        batch_size = x.size(0)
        S = self.seed_vectors.unsqueeze(0).expand(batch_size, -1, -1)

        key_padding_mask = (~mask.bool()) if mask is not None else None

        attn_out, _ = self.attention(S, x, x, key_padding_mask=key_padding_mask)
        S = self.norm1(S + attn_out)
        S = self.norm2(S + self.ffn(S))
        return S


class SAB(nn.Module):
    """Set Attention Block"""
    def __init__(self, d_model, num_heads, dropout=DRPO_OUT_PROB):
        super().__init__()
        self.transformer_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=num_heads,
            dim_feedforward=d_model * FEED_FWD_DEPTH,
            dropout=dropout,
            activation='gelu',
            batch_first=True,
            norm_first=True
        )

    def forward(self, x, mask=None):
        if mask is not None:
            src_key_padding_mask = ~mask.bool()
        else:
            src_key_padding_mask = None
        return self.transformer_layer(x, src_key_padding_mask=src_key_padding_mask)


class EnhancedSetTransformerEncoder(nn.Module):
    """Set Transformer with ISAB + PMA"""
    def __init__(self, portfolio_feature_dim=3, latent_dim=128, hidden_dim=64,
                 num_heads=2, dropout_prob=DRPO_OUT_PROB, num_inds=32, num_seeds=1,
                 use_isab=True, num_layers=2):
        super().__init__()

        if hidden_dim % num_heads != 0:
            hidden_dim = ((hidden_dim // num_heads) + 1) * num_heads

        self.input_proj = nn.Sequential(
            nn.Linear(portfolio_feature_dim, hidden_dim),
            nn.LayerNorm(hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout_prob)
        )

        self.encoder_layers = nn.ModuleList()
        for _ in range(num_layers):
            if use_isab:
                self.encoder_layers.append(ISAB(hidden_dim, num_heads, num_inds, dropout_prob))
            else:
                self.encoder_layers.append(SAB(hidden_dim, num_heads, dropout_prob))

        self.pooling = PMA(hidden_dim, num_heads, num_seeds, dropout_prob)

        self.output_proj = nn.Sequential(
            nn.LayerNorm(hidden_dim),
            nn.Dropout(dropout_prob),
            nn.Linear(hidden_dim, latent_dim)
        )

    def forward(self, portfolio, mask=None):
        x = self.input_proj(portfolio)  # [B, P, H]
        for layer in self.encoder_layers:
            x = layer(x, mask=mask)
        x = self.pooling(x, mask=mask)
        x = x.mean(dim=1) if x.size(1) > 1 else x.squeeze(1)
        return self.output_proj(x)


class OptimizedSetTransformerEncoder(nn.Module):
    """Vanilla Set Transformer with mask support"""
    def __init__(self, portfolio_feature_dim=3, latent_dim=128, hidden_dim=64,
                 num_layers=1, num_heads=2, dropout_prob=DRPO_OUT_PROB):
        super().__init__()

        if hidden_dim % num_heads != 0:
            hidden_dim = ((hidden_dim // num_heads) + 1) * num_heads

        self.input_proj = nn.Sequential(
            nn.Linear(portfolio_feature_dim, hidden_dim),
            nn.LayerNorm(hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout_prob)
        )

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=hidden_dim,
            nhead=num_heads,
            dim_feedforward=hidden_dim * FEED_FWD_DEPTH,
            dropout=dropout_prob,
            activation='gelu',
            batch_first=True,
            norm_first=True
        )

        self.transformer_encoder = nn.TransformerEncoder(
            encoder_layer,
            num_layers=num_layers,
            enable_nested_tensor=False
        )

        self.output_proj = nn.Sequential(
            nn.LayerNorm(hidden_dim),
            nn.Dropout(dropout_prob),
            nn.Linear(hidden_dim, latent_dim)
        )

    def forward(self, portfolio, mask=None):
        x = self.input_proj(portfolio)
        src_key_padding_mask = ~mask.bool() if mask is not None else None
        x = self.transformer_encoder(x, src_key_padding_mask=src_key_padding_mask)
        pooled = x.mean(dim=1) + x.max(dim=1).values
        return self.output_proj(pooled)


class OptimizedDeepONet(nn.Module):
    """DeepONet with branch (portfolio) + trunk (S_T)"""
    def __init__(self, portfolio_feature_dim=3, hidden_dim=64, latent_dim=128,
                 dropout_prob=DRPO_OUT_PROB, num_heads=2, use_enhanced_transformer=True,
                 num_inds=32, num_seeds=1):
        super().__init__()

        if hidden_dim % num_heads != 0:
            recommended = ((hidden_dim // num_heads) + 1) * num_heads
            raise ValueError(
                f"hidden_dim ({hidden_dim}) must be divisible by num_heads ({num_heads}). "
                f"Try hidden_dim={recommended}"
            )

        if use_enhanced_transformer:
            self.branch_net = EnhancedSetTransformerEncoder(
                portfolio_feature_dim=portfolio_feature_dim,
                latent_dim=latent_dim,
                hidden_dim=hidden_dim,
                num_heads=num_heads,
                dropout_prob=dropout_prob,
                num_inds=num_inds,
                num_seeds=num_seeds,
                use_isab=True,
                num_layers=2
            )
        else:
            self.branch_net = OptimizedSetTransformerEncoder(
                portfolio_feature_dim=portfolio_feature_dim,
                latent_dim=latent_dim,
                hidden_dim=hidden_dim,
                dropout_prob=dropout_prob,
                num_heads=num_heads
            )

        self.trunk_net = TrunkNet(
            input_dim=1,
            latent_dim=latent_dim,
            hidden_dim=hidden_dim,
            dropout_prob=dropout_prob
        )

        self.bias = nn.Parameter(torch.zeros(1))
        self.branch_scale = nn.Parameter(torch.ones(1) * 0.8)
        self.trunk_scale = nn.Parameter(torch.ones(1) * 0.8)

    def forward(self, portfolio, S_T, mask=None):
        B, M = S_T.shape

        branch_out = self.branch_net(portfolio, mask=mask) * self.branch_scale

        trunk_outputs = []
        for i in range(M):
            S_T_single = S_T[:, i:i+1]
            trunk_out = self.trunk_net(S_T_single) * self.trunk_scale
            trunk_outputs.append(trunk_out)

        trunk_out = torch.cat(trunk_outputs, dim=1)  # [B, M, latent_dim]

        branch_expanded = branch_out.unsqueeze(1)  # [B, 1, latent_dim]
        interaction = (branch_expanded * trunk_out).sum(dim=-1)  # [B, M]

        return interaction + self.bias


        # branch_out = self.branch_net(portfolio, mask=mask) * self.branch_scale
        # trunk_out = self.trunk_net(S_T) * self.trunk_scale
        # branch_expanded = branch_out.unsqueeze(1)
        # interaction = (branch_expanded * trunk_out).sum(dim=-1)
        # return interaction + self.bias


In [8]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.amp import GradScaler

# --- Gradient summary helper ---
def compute_gradient_stats(model):
    gradient_stats = {}
    total_norm = 0.0
    for name, param in model.named_parameters():
        if param.grad is not None:
            grad_norm = param.grad.norm().item()
            total_norm += grad_norm ** 2
            gradient_stats[name] = {
                'norm': grad_norm,
                'shape': tuple(param.grad.shape),
                'numel': param.grad.numel(),
                'mean': param.grad.mean().item(),
                'std': param.grad.std().item()
            }
    total_norm = total_norm ** 0.5
    return total_norm, gradient_stats, None

def print_gradient_summary(gradient_stats, total_norm, epoch, batch_idx=None):
    prefix = f"Epoch {epoch}" + (f", Batch {batch_idx}" if batch_idx is not None else "")
    print(f"\nüîç === Gradient Analysis - {prefix} ===")
    print(f"Total Gradient Norm: {total_norm:.6f}")

    if total_norm > 30.0:
        print("üö® CRITICAL: Severe gradient explosion! Consider stopping training.")
    elif total_norm > 20.0:
        print("‚ö†Ô∏è  SEVERE: Major gradient explosion detected!")
    elif total_norm > 10.0:
        print("‚ö†Ô∏è  WARNING: Moderate gradient explosion detected!")
    elif total_norm < 1e-6:
        print("‚ö†Ô∏è  WARNING: Vanishing gradients detected!")
    else:
        print("‚úÖ Gradient norm is healthy")

    sorted_layers = sorted(gradient_stats.items(), key=lambda x: x[1]['norm'], reverse=True)
    print(f"\nTop 5 layers by gradient norm (out of {len(gradient_stats)} total):")
    for i, (layer_name, stats) in enumerate(sorted_layers[:5]):
        status = "üî•" if stats['norm'] > 3.0 else "‚ö†Ô∏è" if stats['norm'] > 1.0 else "‚úÖ"
        print(f"  {status} {i+1}. {layer_name}: {stats['norm']:.4f}")
        print(f"      Shape: {stats['shape']}, Elements: {stats['numel']}")
        print(f"      Mean: {stats['mean']:.6f}, Std: {stats['std']:.6f}")
    print("=" * 60)


In [9]:
import torch
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler

class ExtendedEarlyStopping:
    # ... (no changes needed here) ...
    def __init__(self, patience=30, min_delta=0.0005, restore_best_weights=True):
        self.patience = patience
        self.min_delta = min_delta
        self.restore_best_weights = restore_best_weights
        self.wait = 0
        self.stopped_epoch = 0
        self.best = float('inf')
        self.best_weights = None

    def __call__(self, val_loss, model=None):
        if val_loss < self.best - self.min_delta:
            self.best = val_loss
            self.wait = 0
            if model is not None and self.restore_best_weights:
                self.best_weights = model.state_dict().copy()
        else:
            self.wait += 1

        if self.wait >= self.patience:
            self.stopped_epoch = True
            if model is not None and self.restore_best_weights and self.best_weights is not None:
                model.load_state_dict(self.best_weights)

        return self.stopped_epoch


In [99]:
def get_stable_hyperparameters():
    """Return more stable hyperparameters"""
    return {
        "learning_rate": 3e-4,
        "weight_decay": 1e-4 ,
        "lambda_deriv": 0.05,
        "lambda_reg": 1e-4,
        "gradient_clip_norm": 5,
        "batch_size": 128,
        "scheduler_T0": 5,
        "early_stopping_patience": 35,
    }


In [100]:

class OptimizedTrainer:
    def __init__(self, model, device='cuda', monitor_gradients=True,
                 learning_rate=5e-6, lambda_deriv_weight=0.1, weight_decay=1e-4,
                 scale_warmup_epochs=5, initial_scale=0.05, final_scale=1.0, grad_log_threshold = 5.0):
        self.model = model.to(device)
        self.device = device
        self.monitor_gradients = monitor_gradients
        self.lambda_deriv_weight = lambda_deriv_weight

        # Optimizer
        self.optimizer = optim.AdamW(
            model.parameters(),
            lr=learning_rate,
            weight_decay=weight_decay,
            betas=(0.9, 0.999),
            eps=1e-8
        )

        # Cosine Annealing Warm Restarts scheduler (epoch-based)
        self.scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(
            self.optimizer, T_0=scale_warmup_epochs, T_mult=1, eta_min=1e-6
        )

        # Gradient scaler for mixed precision
        self.scaler = GradScaler()
        self.huber_loss = nn.SmoothL1Loss(beta=1.0)

        # Branch/trunk scale
        self.scale_warmup_epochs = scale_warmup_epochs
        self.initial_scale = initial_scale
        self.final_scale = final_scale
        if hasattr(self.model, 'branch_scale') and hasattr(self.model, 'trunk_scale'):
            with torch.no_grad():
                self.model.branch_scale.fill_(initial_scale)
                self.model.trunk_scale.fill_(initial_scale)

    def check_model_health(self, epoch, batch_idx):
        for name, param in self.model.named_parameters():
            if torch.isnan(param).any() or torch.isinf(param).any():
                print(f"Bad parameter: {name} at Epoch {epoch}, Batch {batch_idx}")
                return False
        return True

    def compute_loss(self, pred_cashflow, true_cashflow, pred_deriv=None, true_deriv=None, mask=None):
        """
        FIXED: Computes total loss correctly without misapplying masks.

        Key insight: Portfolio mask is for MODEL INPUTS (which options are valid),
        not for LOSS COMPUTATION (all scenarios are always valid).

        Args:
            pred_cashflow: [B, M] predicted cashflows for M scenarios
            true_cashflow: [B, M] true cashflows for M scenarios
            pred_deriv: [B, M] predicted derivatives for M scenarios
            true_deriv: [B, M] true derivatives for M scenarios
            mask: [B, N] portfolio mask (NOT used in loss computation)
        """

        # --- Cashflow loss (no masking needed) ---
        # All scenarios are always valid, regardless of portfolio structure
        cashflow_loss = self.huber_loss(pred_cashflow, true_cashflow)

        # --- Derivative loss (no masking needed) ---
        if pred_deriv is not None and true_deriv is not None:
            deriv_loss = self.huber_loss(pred_deriv, true_deriv)
            total_loss = cashflow_loss + self.lambda_deriv_weight * deriv_loss
        else:
            deriv_loss = torch.tensor(0.0, device=pred_cashflow.device)
            total_loss = cashflow_loss

        return total_loss, cashflow_loss, deriv_loss

    def train_step(self, portfolio, S_T, cashflow, true_derivative=None, mask=None, epoch=0, batch_idx=0, log_gradients=False):
      self.optimizer.zero_grad()

      S_T = S_T.clone().detach().requires_grad_(True).to(self.device)
      portfolio = portfolio.to(self.device)
      cashflow = cashflow.to(self.device)
      if mask is not None:
          mask = mask.to(self.device)
      if true_derivative is not None:
          true_derivative = true_derivative.to(self.device)

      pred_cashflow = self.model(portfolio, S_T, mask=mask)

      pred_deriv = None
      if true_derivative is not None:
          # FIXED: Compute gradients per scenario, not summed
          pred_deriv = torch.autograd.grad(
              outputs=pred_cashflow,
              inputs=S_T,
              grad_outputs=torch.ones_like(pred_cashflow),  # ADD THIS LINE
              retain_graph=True,
              create_graph=True,
              allow_unused=True
          )[0]

      total_loss, cashflow_loss, deriv_loss = self.compute_loss(
          pred_cashflow, cashflow, pred_deriv, true_derivative, mask=None
      )

      self.scaler.scale(total_loss).backward()
      self.scaler.unscale_(self.optimizer)
      torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=5.0)
      self.scaler.step(self.optimizer)
      self.scaler.update()

      return total_loss.item(), cashflow_loss.item(), deriv_loss.item()

    def val_step(self, portfolio, S_T, cashflow, true_derivative=None, mask=None):
      """
      Validation step that can compute derivative loss as well.
      """
      self.model.eval()

      portfolio = portfolio.to(self.device)
      S_T = S_T.clone().detach().requires_grad_(True).to(self.device)  # need grad for derivative
      cashflow = cashflow.to(self.device)
      if mask is not None:
          mask = mask.to(self.device)
      if true_derivative is not None:
          true_derivative = true_derivative.to(self.device)

      # always compute forward with grad enabled (so we can use for both losses)
      with torch.enable_grad():
          pred_cashflow = self.model(portfolio, S_T, mask=mask)

          pred_deriv = None
          if true_derivative is not None:
              pred_deriv = torch.autograd.grad(
                  outputs=pred_cashflow,
                  inputs=S_T,
                  grad_outputs=torch.ones_like(pred_cashflow),
                  retain_graph=False,
                  create_graph=False,
                  allow_unused=True
              )[0]

      # detach before computing loss to avoid holding graph in memory
      total_loss, cashflow_loss, deriv_loss = self.compute_loss(
          pred_cashflow.detach(), cashflow,
          pred_deriv.detach() if pred_deriv is not None else None,
          true_derivative, mask=None
      )

      return total_loss.item(), cashflow_loss.item(), deriv_loss.item()


    def update_scale(self, current_epoch):
        """Update branch/trunk scale during warmup."""
        if hasattr(self.model, 'branch_scale') and hasattr(self.model, 'trunk_scale'):
            factor = min((current_epoch + 1) / self.scale_warmup_epochs, 1.0)
            new_scale = self.initial_scale + (self.final_scale - self.initial_scale) * factor
            with torch.no_grad():
                self.model.branch_scale.fill_(new_scale)
                self.model.trunk_scale.fill_(new_scale)
            print(f"[Epoch {current_epoch}] Updated branch/trunk scale ‚Üí {new_scale:.4f}")

    def step_scheduler_epoch(self):
        """Step scheduler once per epoch."""
        self.scheduler.step()


In [60]:
experiment.end()

[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     name                  : interesting_taper_4184
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/satyabratkumarsingh/option-portfolio-encoder-decoder/10838a5cf05d46fab30167e6bb067314
[1;38;5;39mCOMET INFO:[0m   Metrics [count] (min, max):
[1;38;5;39mCOMET INFO:[0m     train_cashflow_loss_batch [1120]   : (0.10014146566390991, 0.71085125207901)
[1;38;5;39mCOMET INFO:[0m     train_cashflow_loss_epoch [7]      : (0.1341879060026258, 0.2814498905092478)
[1;38;5;39mCOMET INFO:[0m     train_derivative_loss_batch [1120] : (0.3015000820159912, 1.032523

# Model save

In [101]:
def save_model_checkpoint(model, save_path, epoch=None, optimizer=None, scheduler=None,
                         train_loss=None, val_loss=None, train_size=None, val_size=None):

    checkpoint_data = {
        "model_state_dict": model.state_dict(),
        "hparams": {
            "hidden_dim": hidden_dim,
            "latent_dim": latent_dim,
            "portfolio_feature_dim": portfolio_feature_dim,
            "use_enhanced_transformer": True
        },
        "training_config": {
            "PORT_LEN": PORT_LEN,
            "PORT_SAMPLE_SIZE": PORT_SAMPLE_SIZE,
            "FEED_ST_LEN_EACH_PORT": FEED_ST_LEN_EACH_PORT,
            "batch_size": batch_size,
            "train_size": train_size,
            "val_size": val_size
        },
        "scaler_files": {
            "K_scaler": "K_Scalar_Training.pkl",
            "S_T_scaler": "S_T_Scalar_Training.pkl",
            "cashflow_scaler": "Cashflow_Scalar_Training.pkl"
        }
    }

    # Add optional training state information
    if epoch is not None:
        checkpoint_data["epoch"] = epoch
    if optimizer is not None:
        checkpoint_data["optimizer_state_dict"] = optimizer.state_dict()
    if scheduler is not None:
        checkpoint_data["scheduler_state_dict"] = scheduler.state_dict()
    if train_loss is not None:
        checkpoint_data["train_loss"] = train_loss
    if val_loss is not None:
        checkpoint_data["val_loss"] = val_loss

    torch.save(checkpoint_data, save_path)

In [102]:
import torch
from torch.utils.data import DataLoader, random_split
import numpy as np
import joblib  # Add joblib for saving scalers
from comet_ml import start
from tqdm import tqdm
import os  # Add for directory creation

# === HYPERPARAMETERS ===
hidden_dim = 128
latent_dim = 128
batch_size = 128
epochs = 500
portfolio_feature_dim = 3
PORT_LEN = 100
PORT_SAMPLE_SIZE = 25600
FEED_ST_LEN_EACH_PORT = 100

# === COMET SETUP ===
experiment = start(
    api_key="iatWnXT4JyBtDQhn7OfgISQoF",
    project_name="option-portfolio-encoder-decoder",
    workspace="satyabratkumarsingh"
)

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")


def main():
    """
    FIXED: Corrected main training loop with proper validation computation.
    Added scaler saving for consistent evaluation.
    Added model saving every 50 epochs with -epoch{no} suffix.
    """
    hparams = get_stable_hyperparameters()
    experiment.log_parameters(hparams)

    # === Model ===
    model = OptimizedDeepONet(
        portfolio_feature_dim=portfolio_feature_dim,
        hidden_dim=hidden_dim,
        latent_dim=latent_dim,
        use_enhanced_transformer=True
    ).to(DEVICE)

    # --- STEP 1: Raw dataset for scaler fitting ---
    raw_dataset = OperatorDatasetStandardized(
        num_samples=PORT_SAMPLE_SIZE,
        min_portfolio_size=1,
        max_portfolio_size=PORT_LEN,
        num_samples_S_T=FEED_ST_LEN_EACH_PORT,
        is_fitting_mode=True
    )

    # Train/Val split
    train_size = int(0.8 * len(raw_dataset))
    val_size = len(raw_dataset) - train_size
    val_size = (val_size // batch_size) * batch_size
    train_size = len(raw_dataset) - val_size

    torch.manual_seed(42)
    train_dataset, val_dataset = random_split(raw_dataset, [train_size, val_size])
    train_loader_fitting = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

    # Fit scalers
    print("Fitting scalers on training data...")
    K_scalar, S_T_scalar = fit_K_ST_scalers(train_loader_fitting)
    cashflow_scaler = fit_cashflow_scaler(train_loader_fitting)

    # === SAVE SCALERS ===
    DRIVE_PATH = "/content/drive/MyDrive/Ucl/"
    print("Saving scalers for evaluation consistency...")

    # Create checkpoint directory for periodic model saves
    checkpoint_dir = DRIVE_PATH + "checkpoints/"
    os.makedirs(checkpoint_dir, exist_ok=True)

    try:
        # Save scalers with consistent naming
        joblib.dump(K_scalar, DRIVE_PATH + "K_Scalar_Training.pkl")
        joblib.dump(S_T_scalar, DRIVE_PATH + "S_T_Scalar_Training.pkl")
        joblib.dump(cashflow_scaler, DRIVE_PATH + "Cashflow_Scalar_Training.pkl")
        print("‚úÖ Successfully saved all training scalers")

        # Print scaler statistics for debugging
        print(f"üìä Scaler Statistics:")
        print(f"K_scaler - mean: {K_scalar.mean_[0]:.4f}, std: {np.sqrt(K_scalar.var_[0]):.4f}")
        print(f"S_T_scaler - mean: {S_T_scalar.mean_[0]:.4f}, std: {np.sqrt(S_T_scalar.var_[0]):.4f}")
        print(f"Cashflow_scaler - mean: {cashflow_scaler.mean_[0]:.4f}, std: {np.sqrt(cashflow_scaler.var_[0]):.4f}")

    except Exception as e:
        print(f"‚ùå Error saving scalers: {e}")
        print("Continuing with training but evaluation may be inconsistent...")

    # --- STEP 2: Normalized dataset ---
    normalized_dataset = OperatorDatasetStandardized(
        num_samples=PORT_SAMPLE_SIZE,
        min_portfolio_size=1,  # Add missing parameter for consistency
        max_portfolio_size=PORT_LEN,
        num_samples_S_T=FEED_ST_LEN_EACH_PORT,
        K_scaler=K_scalar,
        S_T_scaler=S_T_scalar,
        cashflow_scaler=cashflow_scaler,
        is_fitting_mode=False
    )

    torch.manual_seed(42)
    train_dataset, val_dataset = random_split(normalized_dataset, [train_size, val_size])
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    # === Trainer ===
    trainer = OptimizedTrainer(
        model,
        device=DEVICE,
        learning_rate=hparams["learning_rate"],
        lambda_deriv_weight=hparams["lambda_deriv"],
        weight_decay=hparams["weight_decay"],
        monitor_gradients=True,
        grad_log_threshold=5.0,
        scale_warmup_epochs=hparams["scheduler_T0"],
        initial_scale=0.05,
        final_scale=1.0
    )

    early_stopper = ExtendedEarlyStopping(
        patience=hparams["early_stopping_patience"],
        min_delta=0.001,
        restore_best_weights=True
    )

    # === Training loop ===
    for epoch in range(epochs):
        trainer.update_scale(epoch)
        model.train()

        # --- Training Loop ---
        train_total_losses, train_cf_losses, train_deriv_losses = [], [], []

        for batch_idx, batch in enumerate(tqdm(train_loader, desc=f"Training Epoch {epoch}")):
            portfolio = batch["portfolio"].to(DEVICE)
            mask = batch["mask"].to(DEVICE)
            S_T = batch["S_T"].to(DEVICE)
            cashflow = batch["cashflow"].to(DEVICE)
            derivative = batch["derivative"].to(DEVICE)

            # CORRECT: Training step computes and returns training losses
            total, cf, deriv = trainer.train_step(
                portfolio,
                S_T.clone().detach().requires_grad_(True),
                cashflow,
                derivative,
                mask
            )

            train_total_losses.append(total)
            train_cf_losses.append(cf)
            train_deriv_losses.append(deriv)

            # --- Batch-level logging ---
            global_step = epoch * len(train_loader) + batch_idx
            experiment.log_metric("train_total_loss_batch", total, step=global_step)
            experiment.log_metric("train_cashflow_loss_batch", cf, step=global_step)
            experiment.log_metric("train_derivative_loss_batch", deriv, step=global_step)

        # --- Epoch-level metrics ---
        avg_train_total = np.mean(train_total_losses)
        avg_train_cf = np.mean(train_cf_losses)
        avg_train_deriv = np.mean(train_deriv_losses)

        experiment.log_metric("train_total_loss_epoch", avg_train_total, step=epoch)
        experiment.log_metric("train_cashflow_loss_epoch", avg_train_cf, step=epoch)
        experiment.log_metric("train_derivative_loss_epoch", avg_train_deriv, step=epoch)

        # --- FIXED: Validation Loop ---
        model.eval()
        val_total_losses, val_cf_losses, val_deriv_losses = [], [], []

        with torch.no_grad():
            for batch_idx, batch in enumerate(val_loader):
                portfolio = batch["portfolio"].to(DEVICE)
                mask = batch["mask"].to(DEVICE)
                S_T = batch["S_T"].to(DEVICE)
                cashflow = batch["cashflow"].to(DEVICE)
                derivative = batch["derivative"].to(DEVICE)

                # FIXED: Actually compute validation losses using val_step
                total, cf, deriv = trainer.val_step(
                    portfolio, S_T, cashflow, derivative, mask
                )

                val_total_losses.append(total)
                val_cf_losses.append(cf)
                val_deriv_losses.append(deriv)

                # --- Batch-level validation logging ---
                global_step = epoch * len(val_loader) + batch_idx
                experiment.log_metric("val_total_loss_batch", total, step=global_step)
                experiment.log_metric("val_cashflow_loss_batch", cf, step=global_step)
                experiment.log_metric("val_derivative_loss_batch", deriv, step=global_step)

        # Step the scheduler once per epoch
        trainer.step_scheduler_epoch()

        current_lr = trainer.optimizer.param_groups[0]['lr']
        print(f"Epoch {epoch} ‚Üí Current LR: {current_lr:.6e}")

        # --- Epoch-level validation metrics ---
        avg_val_total = np.mean(val_total_losses)
        avg_val_cf = np.mean(val_cf_losses)
        avg_val_deriv = np.mean(val_deriv_losses)

        experiment.log_metric("val_total_loss_epoch", avg_val_total, step=epoch)
        experiment.log_metric("val_cashflow_loss_epoch", avg_val_cf, step=epoch)
        experiment.log_metric("val_derivative_loss_epoch", avg_val_deriv, step=epoch)

        print(
            f"Epoch [{epoch}/{epochs}] "
            f"Train ‚Üí total: {avg_train_total:.6f}, cf: {avg_train_cf:.6f}, deriv: {avg_train_deriv:.6f} | "
            f"Val ‚Üí total: {avg_val_total:.6f}, cf: {avg_val_cf:.6f}, deriv: {avg_val_deriv:.6f}"
        )

        # === SAVE MODEL EVERY 50 EPOCHS ===
        if (epoch + 1) % 50 == 0:  # epoch starts from 0, so epoch+1 for proper numbering
            checkpoint_path = checkpoint_dir + f"deeponet_model-epoch{epoch + 1}.pt"

            # Save model checkpoint using the reusable function
            save_model_checkpoint(
                model=model,
                save_path=checkpoint_path,
                epoch=epoch + 1,
                optimizer=trainer.optimizer,
                scheduler=trainer.scheduler if hasattr(trainer, 'scheduler') else None,
                train_loss=avg_train_total,
                val_loss=avg_val_total
            )

            print(f"üîÑ Model checkpoint saved at epoch {epoch + 1}: {checkpoint_path}")

        # --- Early Stopping Check ---
        stop = early_stopper(avg_val_total, model)
        if stop:
            print(f"Early stopping triggered at epoch {epoch}. Best val loss: {early_stopper.best:.6f}")
            break

    # === SAVE FINAL MODEL WITH METADATA ===
    save_path = DRIVE_PATH + "final_deeponet_model.pt"

    # Save final model using the reusable function
    save_model_checkpoint(model=model, save_path=save_path)

    print("‚úÖ Training finished, model saved as final_deeponet_model.pt")
    print("‚úÖ Scalers saved for consistent evaluation")
    print(f"üìÅ Final model saved at: {save_path}")
    print(f"üìÅ Checkpoints saved at: {checkpoint_dir}")
    print(f"üìÅ Scalers saved at: {DRIVE_PATH}*_Scalar_Training.pkl")

    experiment.end()

if __name__ == "__main__":
    main()

[1;38;5;39mCOMET INFO:[0m Experiment is live on comet.com https://www.comet.com/satyabratkumarsingh/option-portfolio-encoder-decoder/96ac953a751041d48e6b2089d00b1db8

[1;38;5;39mCOMET INFO:[0m Couldn't find a Git repository in '/content' nor in any parent directory. Set `COMET_GIT_DIRECTORY` if your Git Repository is elsewhere.


Fitting scalers on training data...
Fitting K and S_T scalers from training set...


Collecting K and S_T for scalers: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [00:30<00:00,  5.18it/s]
Fitting Cashflow Scaler: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [00:30<00:00,  5.20it/s]


Saving scalers for evaluation consistency...
‚úÖ Successfully saved all training scalers
üìä Scaler Statistics:
K_scaler - mean: 155.0788, std: 175.6431
S_T_scaler - mean: 313.4675, std: 138.1498
Cashflow_scaler - mean: 0.4324, std: 167.9381
[Epoch 0] Updated branch/trunk scale ‚Üí 0.2400


Training Epoch 0: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 0 ‚Üí Current LR: 2.714480e-04
Epoch [0/500] Train ‚Üí total: 0.333914, cf: 0.298260, deriv: 0.713087 | Val ‚Üí total: 0.304020, cf: 0.269714, deriv: 0.686130
[Epoch 1] Updated branch/trunk scale ‚Üí 0.4300


Training Epoch 1: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 1 ‚Üí Current LR: 1.966980e-04
Epoch [1/500] Train ‚Üí total: 0.304801, cf: 0.271661, deriv: 0.662811 | Val ‚Üí total: 0.259948, cf: 0.228533, deriv: 0.628292
[Epoch 2] Updated branch/trunk scale ‚Üí 0.6200


Training Epoch 2: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 2 ‚Üí Current LR: 1.043020e-04
Epoch [2/500] Train ‚Üí total: 0.247491, cf: 0.218845, deriv: 0.572919 | Val ‚Üí total: 0.163104, cf: 0.138047, deriv: 0.501137
[Epoch 3] Updated branch/trunk scale ‚Üí 0.8100


Training Epoch 3: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 3 ‚Üí Current LR: 2.955196e-05
Epoch [3/500] Train ‚Üí total: 0.224265, cf: 0.198146, deriv: 0.522382 | Val ‚Üí total: 0.152714, cf: 0.128717, deriv: 0.479952
[Epoch 4] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 4: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:48<00:00,  1.43s/it]


Epoch 4 ‚Üí Current LR: 3.000000e-04
Epoch [4/500] Train ‚Üí total: 0.229259, cf: 0.203710, deriv: 0.510979 | Val ‚Üí total: 0.142427, cf: 0.119407, deriv: 0.460408
[Epoch 5] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 5 ‚Üí Current LR: 2.714480e-04
Epoch [5/500] Train ‚Üí total: 0.244192, cf: 0.218878, deriv: 0.506285 | Val ‚Üí total: 0.182052, cf: 0.158838, deriv: 0.464284
[Epoch 6] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 6: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:48<00:00,  1.43s/it]


Epoch 6 ‚Üí Current LR: 1.966980e-04
Epoch [6/500] Train ‚Üí total: 0.185356, cf: 0.160718, deriv: 0.492771 | Val ‚Üí total: 0.142017, cf: 0.118479, deriv: 0.470764
[Epoch 7] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 7: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 7 ‚Üí Current LR: 1.043020e-04
Epoch [7/500] Train ‚Üí total: 0.160205, cf: 0.137006, deriv: 0.463969 | Val ‚Üí total: 0.131419, cf: 0.107203, deriv: 0.484313
[Epoch 8] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 8: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 8 ‚Üí Current LR: 2.955196e-05
Epoch [8/500] Train ‚Üí total: 0.141416, cf: 0.119016, deriv: 0.447999 | Val ‚Üí total: 0.133051, cf: 0.108610, deriv: 0.488823
[Epoch 9] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 9: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 9 ‚Üí Current LR: 3.000000e-04
Epoch [9/500] Train ‚Üí total: 0.132082, cf: 0.110178, deriv: 0.438074 | Val ‚Üí total: 0.124859, cf: 0.099331, deriv: 0.510561
[Epoch 10] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 10 ‚Üí Current LR: 2.714480e-04
Epoch [10/500] Train ‚Üí total: 0.152893, cf: 0.130390, deriv: 0.450074 | Val ‚Üí total: 0.147251, cf: 0.123258, deriv: 0.479853
[Epoch 11] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 11: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 11 ‚Üí Current LR: 1.966980e-04
Epoch [11/500] Train ‚Üí total: 0.134895, cf: 0.112799, deriv: 0.441911 | Val ‚Üí total: 0.129448, cf: 0.103712, deriv: 0.514721
[Epoch 12] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 12: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 12 ‚Üí Current LR: 1.043020e-04
Epoch [12/500] Train ‚Üí total: 0.127223, cf: 0.105173, deriv: 0.440999 | Val ‚Üí total: 0.121794, cf: 0.096783, deriv: 0.500232
[Epoch 13] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 13: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 13 ‚Üí Current LR: 2.955196e-05
Epoch [13/500] Train ‚Üí total: 0.110838, cf: 0.090207, deriv: 0.412619 | Val ‚Üí total: 0.129814, cf: 0.102430, deriv: 0.547694
[Epoch 14] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 14: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 14 ‚Üí Current LR: 3.000000e-04
Epoch [14/500] Train ‚Üí total: 0.106001, cf: 0.085314, deriv: 0.413731 | Val ‚Üí total: 0.157279, cf: 0.126618, deriv: 0.613216
[Epoch 15] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 15 ‚Üí Current LR: 2.714480e-04
Epoch [15/500] Train ‚Üí total: 0.123812, cf: 0.102406, deriv: 0.428118 | Val ‚Üí total: 0.156090, cf: 0.126907, deriv: 0.583671
[Epoch 16] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 16: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 16 ‚Üí Current LR: 1.966980e-04
Epoch [16/500] Train ‚Üí total: 0.116134, cf: 0.095354, deriv: 0.415590 | Val ‚Üí total: 0.130627, cf: 0.103582, deriv: 0.540905
[Epoch 17] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 17: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 17 ‚Üí Current LR: 1.043020e-04
Epoch [17/500] Train ‚Üí total: 0.106796, cf: 0.086150, deriv: 0.412929 | Val ‚Üí total: 0.149345, cf: 0.120405, deriv: 0.578791
[Epoch 18] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 18: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 18 ‚Üí Current LR: 2.955196e-05
Epoch [18/500] Train ‚Üí total: 0.097641, cf: 0.077375, deriv: 0.405313 | Val ‚Üí total: 0.136264, cf: 0.105762, deriv: 0.610039
[Epoch 19] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 19: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 19 ‚Üí Current LR: 3.000000e-04
Epoch [19/500] Train ‚Üí total: 0.092961, cf: 0.072979, deriv: 0.399647 | Val ‚Üí total: 0.150350, cf: 0.118285, deriv: 0.641298
[Epoch 20] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 20 ‚Üí Current LR: 2.714480e-04
Epoch [20/500] Train ‚Üí total: 0.109618, cf: 0.089509, deriv: 0.402193 | Val ‚Üí total: 0.151122, cf: 0.120222, deriv: 0.618010
[Epoch 21] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 21: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 21 ‚Üí Current LR: 1.966980e-04
Epoch [21/500] Train ‚Üí total: 0.103538, cf: 0.083355, deriv: 0.403654 | Val ‚Üí total: 0.155965, cf: 0.123448, deriv: 0.650337
[Epoch 22] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 22: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 22 ‚Üí Current LR: 1.043020e-04
Epoch [22/500] Train ‚Üí total: 0.096936, cf: 0.076218, deriv: 0.414353 | Val ‚Üí total: 0.143295, cf: 0.112409, deriv: 0.617738
[Epoch 23] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 23: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 23 ‚Üí Current LR: 2.955196e-05
Epoch [23/500] Train ‚Üí total: 0.091583, cf: 0.071235, deriv: 0.406956 | Val ‚Üí total: 0.130710, cf: 0.100922, deriv: 0.595756
[Epoch 24] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 24: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 24 ‚Üí Current LR: 3.000000e-04
Epoch [24/500] Train ‚Üí total: 0.085458, cf: 0.065414, deriv: 0.400884 | Val ‚Üí total: 0.141071, cf: 0.109770, deriv: 0.626011
[Epoch 25] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 25: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 25 ‚Üí Current LR: 2.714480e-04
Epoch [25/500] Train ‚Üí total: 0.101566, cf: 0.081219, deriv: 0.406946 | Val ‚Üí total: 0.167414, cf: 0.138842, deriv: 0.571440
[Epoch 26] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 26: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 26 ‚Üí Current LR: 1.966980e-04
Epoch [26/500] Train ‚Üí total: 0.097294, cf: 0.077180, deriv: 0.402286 | Val ‚Üí total: 0.119417, cf: 0.093599, deriv: 0.516364
[Epoch 27] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 27: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 27 ‚Üí Current LR: 1.043020e-04
Epoch [27/500] Train ‚Üí total: 0.090002, cf: 0.069787, deriv: 0.404318 | Val ‚Üí total: 0.169629, cf: 0.139018, deriv: 0.612224
[Epoch 28] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 28: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 28 ‚Üí Current LR: 2.955196e-05
Epoch [28/500] Train ‚Üí total: 0.083657, cf: 0.063616, deriv: 0.400821 | Val ‚Üí total: 0.150215, cf: 0.117551, deriv: 0.653290
[Epoch 29] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 29: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 29 ‚Üí Current LR: 3.000000e-04
Epoch [29/500] Train ‚Üí total: 0.080182, cf: 0.059966, deriv: 0.404327 | Val ‚Üí total: 0.137949, cf: 0.106605, deriv: 0.626882
[Epoch 30] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 30: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 30 ‚Üí Current LR: 2.714480e-04
Epoch [30/500] Train ‚Üí total: 0.098240, cf: 0.078002, deriv: 0.404765 | Val ‚Üí total: 0.101819, cf: 0.075969, deriv: 0.517011
[Epoch 31] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 31: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 31 ‚Üí Current LR: 1.966980e-04
Epoch [31/500] Train ‚Üí total: 0.092240, cf: 0.071853, deriv: 0.407738 | Val ‚Üí total: 0.166313, cf: 0.133661, deriv: 0.653038
[Epoch 32] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 32: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 32 ‚Üí Current LR: 1.043020e-04
Epoch [32/500] Train ‚Üí total: 0.086798, cf: 0.066588, deriv: 0.404196 | Val ‚Üí total: 0.117981, cf: 0.092773, deriv: 0.504156
[Epoch 33] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 33: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 33 ‚Üí Current LR: 2.955196e-05
Epoch [33/500] Train ‚Üí total: 0.080033, cf: 0.060104, deriv: 0.398591 | Val ‚Üí total: 0.133120, cf: 0.104209, deriv: 0.578230
[Epoch 34] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 34: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 34 ‚Üí Current LR: 3.000000e-04
Epoch [34/500] Train ‚Üí total: 0.076662, cf: 0.056053, deriv: 0.412173 | Val ‚Üí total: 0.136974, cf: 0.104904, deriv: 0.641405
[Epoch 35] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 35: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.41s/it]


Epoch 35 ‚Üí Current LR: 2.714480e-04
Epoch [35/500] Train ‚Üí total: 0.090215, cf: 0.069871, deriv: 0.406881 | Val ‚Üí total: 0.109462, cf: 0.082592, deriv: 0.537388
[Epoch 36] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 36: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 36 ‚Üí Current LR: 1.966980e-04
Epoch [36/500] Train ‚Üí total: 0.089669, cf: 0.069718, deriv: 0.399026 | Val ‚Üí total: 0.130433, cf: 0.099468, deriv: 0.619298
[Epoch 37] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 37: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.41s/it]


Epoch 37 ‚Üí Current LR: 1.043020e-04
Epoch [37/500] Train ‚Üí total: 0.082947, cf: 0.062767, deriv: 0.403603 | Val ‚Üí total: 0.147785, cf: 0.115422, deriv: 0.647272
[Epoch 38] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 38: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 38 ‚Üí Current LR: 2.955196e-05
Epoch [38/500] Train ‚Üí total: 0.077603, cf: 0.057446, deriv: 0.403141 | Val ‚Üí total: 0.145884, cf: 0.113381, deriv: 0.650064
[Epoch 39] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 39: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 39 ‚Üí Current LR: 3.000000e-04
Epoch [39/500] Train ‚Üí total: 0.074148, cf: 0.053610, deriv: 0.410774 | Val ‚Üí total: 0.125796, cf: 0.095406, deriv: 0.607811
[Epoch 40] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 40: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 40 ‚Üí Current LR: 2.714480e-04
Epoch [40/500] Train ‚Üí total: 0.087559, cf: 0.067528, deriv: 0.400613 | Val ‚Üí total: 0.140384, cf: 0.110676, deriv: 0.594151
[Epoch 41] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 41: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 41 ‚Üí Current LR: 1.966980e-04
Epoch [41/500] Train ‚Üí total: 0.086470, cf: 0.066215, deriv: 0.405112 | Val ‚Üí total: 0.115011, cf: 0.085668, deriv: 0.586852
[Epoch 42] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 42: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 42 ‚Üí Current LR: 1.043020e-04
Epoch [42/500] Train ‚Üí total: 0.078925, cf: 0.059351, deriv: 0.391490 | Val ‚Üí total: 0.115548, cf: 0.086215, deriv: 0.586659
[Epoch 43] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 43: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 43 ‚Üí Current LR: 2.955196e-05
Epoch [43/500] Train ‚Üí total: 0.074408, cf: 0.054353, deriv: 0.401101 | Val ‚Üí total: 0.122357, cf: 0.093554, deriv: 0.576055
[Epoch 44] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 44: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 44 ‚Üí Current LR: 3.000000e-04
Epoch [44/500] Train ‚Üí total: 0.070591, cf: 0.050662, deriv: 0.398587 | Val ‚Üí total: 0.123813, cf: 0.093051, deriv: 0.615249
[Epoch 45] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 45: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 45 ‚Üí Current LR: 2.714480e-04
Epoch [45/500] Train ‚Üí total: 0.089563, cf: 0.069566, deriv: 0.399937 | Val ‚Üí total: 0.103638, cf: 0.077040, deriv: 0.531967
[Epoch 46] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 46: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 46 ‚Üí Current LR: 1.966980e-04
Epoch [46/500] Train ‚Üí total: 0.079385, cf: 0.059475, deriv: 0.398192 | Val ‚Üí total: 0.165080, cf: 0.133861, deriv: 0.624371
[Epoch 47] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 47: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 47 ‚Üí Current LR: 1.043020e-04
Epoch [47/500] Train ‚Üí total: 0.076327, cf: 0.056309, deriv: 0.400350 | Val ‚Üí total: 0.124295, cf: 0.095839, deriv: 0.569116
[Epoch 48] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 48: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 48 ‚Üí Current LR: 2.955196e-05
Epoch [48/500] Train ‚Üí total: 0.071576, cf: 0.051679, deriv: 0.397934 | Val ‚Üí total: 0.114877, cf: 0.084663, deriv: 0.604275
[Epoch 49] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 49: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 49 ‚Üí Current LR: 3.000000e-04
Epoch [49/500] Train ‚Üí total: 0.068438, cf: 0.048994, deriv: 0.388894 | Val ‚Üí total: 0.125820, cf: 0.095161, deriv: 0.613188
üîÑ Model checkpoint saved at epoch 50: /content/drive/MyDrive/Ucl/checkpoints/deeponet_model-epoch50.pt
[Epoch 50] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 50: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 50 ‚Üí Current LR: 2.714480e-04
Epoch [50/500] Train ‚Üí total: 0.080226, cf: 0.060640, deriv: 0.391725 | Val ‚Üí total: 0.132490, cf: 0.101270, deriv: 0.624399
[Epoch 51] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 51: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 51 ‚Üí Current LR: 1.966980e-04
Epoch [51/500] Train ‚Üí total: 0.077986, cf: 0.058233, deriv: 0.395064 | Val ‚Üí total: 0.085089, cf: 0.060849, deriv: 0.484791
[Epoch 52] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 52: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 52 ‚Üí Current LR: 1.043020e-04
Epoch [52/500] Train ‚Üí total: 0.073379, cf: 0.053887, deriv: 0.389842 | Val ‚Üí total: 0.119289, cf: 0.089342, deriv: 0.598937
[Epoch 53] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 53: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:48<00:00,  1.43s/it]


Epoch 53 ‚Üí Current LR: 2.955196e-05
Epoch [53/500] Train ‚Üí total: 0.068744, cf: 0.049681, deriv: 0.381258 | Val ‚Üí total: 0.125761, cf: 0.094270, deriv: 0.629833
[Epoch 54] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 54: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 54 ‚Üí Current LR: 3.000000e-04
Epoch [54/500] Train ‚Üí total: 0.066751, cf: 0.047679, deriv: 0.381458 | Val ‚Üí total: 0.122791, cf: 0.091252, deriv: 0.630778
[Epoch 55] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 55: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:48<00:00,  1.43s/it]


Epoch 55 ‚Üí Current LR: 2.714480e-04
Epoch [55/500] Train ‚Üí total: 0.077289, cf: 0.058184, deriv: 0.382114 | Val ‚Üí total: 0.101895, cf: 0.075103, deriv: 0.535833
[Epoch 56] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 56: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 56 ‚Üí Current LR: 1.966980e-04
Epoch [56/500] Train ‚Üí total: 0.078675, cf: 0.058721, deriv: 0.399069 | Val ‚Üí total: 0.119875, cf: 0.091226, deriv: 0.572975
[Epoch 57] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 57: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 57 ‚Üí Current LR: 1.043020e-04
Epoch [57/500] Train ‚Üí total: 0.074769, cf: 0.054664, deriv: 0.402099 | Val ‚Üí total: 0.107304, cf: 0.078511, deriv: 0.575869
[Epoch 58] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 58: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 58 ‚Üí Current LR: 2.955196e-05
Epoch [58/500] Train ‚Üí total: 0.068421, cf: 0.048656, deriv: 0.395291 | Val ‚Üí total: 0.102282, cf: 0.073611, deriv: 0.573426
[Epoch 59] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 59: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 59 ‚Üí Current LR: 3.000000e-04
Epoch [59/500] Train ‚Üí total: 0.064874, cf: 0.045482, deriv: 0.387841 | Val ‚Üí total: 0.116184, cf: 0.085244, deriv: 0.618816
[Epoch 60] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 60: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 60 ‚Üí Current LR: 2.714480e-04
Epoch [60/500] Train ‚Üí total: 0.075845, cf: 0.056605, deriv: 0.384800 | Val ‚Üí total: 0.134363, cf: 0.105514, deriv: 0.576972
[Epoch 61] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 61: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:48<00:00,  1.43s/it]


Epoch 61 ‚Üí Current LR: 1.966980e-04
Epoch [61/500] Train ‚Üí total: 0.076925, cf: 0.056759, deriv: 0.403305 | Val ‚Üí total: 0.086234, cf: 0.061188, deriv: 0.500925
[Epoch 62] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 62: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:48<00:00,  1.43s/it]


Epoch 62 ‚Üí Current LR: 1.043020e-04
Epoch [62/500] Train ‚Üí total: 0.071856, cf: 0.051811, deriv: 0.400886 | Val ‚Üí total: 0.085111, cf: 0.059365, deriv: 0.514930
[Epoch 63] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 63: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 63 ‚Üí Current LR: 2.955196e-05
Epoch [63/500] Train ‚Üí total: 0.066175, cf: 0.047160, deriv: 0.380294 | Val ‚Üí total: 0.113727, cf: 0.083549, deriv: 0.603572
[Epoch 64] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 64: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 64 ‚Üí Current LR: 3.000000e-04
Epoch [64/500] Train ‚Üí total: 0.062919, cf: 0.043777, deriv: 0.382829 | Val ‚Üí total: 0.110733, cf: 0.080706, deriv: 0.600535
[Epoch 65] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 65: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 65 ‚Üí Current LR: 2.714480e-04
Epoch [65/500] Train ‚Üí total: 0.075182, cf: 0.055445, deriv: 0.394726 | Val ‚Üí total: 0.113663, cf: 0.086599, deriv: 0.541277
[Epoch 66] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 66: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 66 ‚Üí Current LR: 1.966980e-04
Epoch [66/500] Train ‚Üí total: 0.076338, cf: 0.056318, deriv: 0.400399 | Val ‚Üí total: 0.099003, cf: 0.071836, deriv: 0.543340
[Epoch 67] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 67: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 67 ‚Üí Current LR: 1.043020e-04
Epoch [67/500] Train ‚Üí total: 0.069971, cf: 0.050477, deriv: 0.389880 | Val ‚Üí total: 0.069963, cf: 0.047497, deriv: 0.449308
[Epoch 68] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 68: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 68 ‚Üí Current LR: 2.955196e-05
Epoch [68/500] Train ‚Üí total: 0.065912, cf: 0.046341, deriv: 0.391420 | Val ‚Üí total: 0.079456, cf: 0.054907, deriv: 0.490981
[Epoch 69] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 69: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 69 ‚Üí Current LR: 3.000000e-04
Epoch [69/500] Train ‚Üí total: 0.061421, cf: 0.042296, deriv: 0.382495 | Val ‚Üí total: 0.095183, cf: 0.066526, deriv: 0.573131
[Epoch 70] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 70: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 70 ‚Üí Current LR: 2.714480e-04
Epoch [70/500] Train ‚Üí total: 0.075800, cf: 0.056552, deriv: 0.384963 | Val ‚Üí total: 0.099637, cf: 0.071826, deriv: 0.556223
[Epoch 71] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 71: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 71 ‚Üí Current LR: 1.966980e-04
Epoch [71/500] Train ‚Üí total: 0.071093, cf: 0.051970, deriv: 0.382466 | Val ‚Üí total: 0.109676, cf: 0.082370, deriv: 0.546101
[Epoch 72] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 72: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 72 ‚Üí Current LR: 1.043020e-04
Epoch [72/500] Train ‚Üí total: 0.068930, cf: 0.049662, deriv: 0.385358 | Val ‚Üí total: 0.095173, cf: 0.069144, deriv: 0.520580
[Epoch 73] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 73: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 73 ‚Üí Current LR: 2.955196e-05
Epoch [73/500] Train ‚Üí total: 0.062954, cf: 0.043942, deriv: 0.380233 | Val ‚Üí total: 0.095550, cf: 0.068756, deriv: 0.535880
[Epoch 74] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 74: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.41s/it]


Epoch 74 ‚Üí Current LR: 3.000000e-04
Epoch [74/500] Train ‚Üí total: 0.059853, cf: 0.040564, deriv: 0.385779 | Val ‚Üí total: 0.088388, cf: 0.060453, deriv: 0.558694
[Epoch 75] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 75: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 75 ‚Üí Current LR: 2.714480e-04
Epoch [75/500] Train ‚Üí total: 0.070635, cf: 0.051666, deriv: 0.379376 | Val ‚Üí total: 0.132058, cf: 0.102159, deriv: 0.597979
[Epoch 76] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 76: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 76 ‚Üí Current LR: 1.966980e-04
Epoch [76/500] Train ‚Üí total: 0.069965, cf: 0.050394, deriv: 0.391429 | Val ‚Üí total: 0.093030, cf: 0.067321, deriv: 0.514178
[Epoch 77] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 77: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 77 ‚Üí Current LR: 1.043020e-04
Epoch [77/500] Train ‚Üí total: 0.065620, cf: 0.046808, deriv: 0.376240 | Val ‚Üí total: 0.064268, cf: 0.041558, deriv: 0.454208
[Epoch 78] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 78: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 78 ‚Üí Current LR: 2.955196e-05
Epoch [78/500] Train ‚Üí total: 0.061852, cf: 0.042520, deriv: 0.386645 | Val ‚Üí total: 0.096442, cf: 0.069854, deriv: 0.531761
[Epoch 79] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 79: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 79 ‚Üí Current LR: 3.000000e-04
Epoch [79/500] Train ‚Üí total: 0.059179, cf: 0.039956, deriv: 0.384453 | Val ‚Üí total: 0.063088, cf: 0.039588, deriv: 0.469997
[Epoch 80] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 80: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 80 ‚Üí Current LR: 2.714480e-04
Epoch [80/500] Train ‚Üí total: 0.070464, cf: 0.050952, deriv: 0.390242 | Val ‚Üí total: 0.057807, cf: 0.035344, deriv: 0.449252
[Epoch 81] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 81: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 81 ‚Üí Current LR: 1.966980e-04
Epoch [81/500] Train ‚Üí total: 0.067688, cf: 0.048666, deriv: 0.380433 | Val ‚Üí total: 0.082522, cf: 0.057699, deriv: 0.496454
[Epoch 82] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 82: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 82 ‚Üí Current LR: 1.043020e-04
Epoch [82/500] Train ‚Üí total: 0.062705, cf: 0.044118, deriv: 0.371744 | Val ‚Üí total: 0.066359, cf: 0.044921, deriv: 0.428749
[Epoch 83] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 83: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 83 ‚Üí Current LR: 2.955196e-05
Epoch [83/500] Train ‚Üí total: 0.060768, cf: 0.041720, deriv: 0.380974 | Val ‚Üí total: 0.079300, cf: 0.053104, deriv: 0.523921
[Epoch 84] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 84: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:48<00:00,  1.43s/it]


Epoch 84 ‚Üí Current LR: 3.000000e-04
Epoch [84/500] Train ‚Üí total: 0.057951, cf: 0.038583, deriv: 0.387358 | Val ‚Üí total: 0.069850, cf: 0.045753, deriv: 0.481926
[Epoch 85] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 85: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 85 ‚Üí Current LR: 2.714480e-04
Epoch [85/500] Train ‚Üí total: 0.069112, cf: 0.050247, deriv: 0.377290 | Val ‚Üí total: 0.074079, cf: 0.049134, deriv: 0.498904
[Epoch 86] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 86: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 86 ‚Üí Current LR: 1.966980e-04
Epoch [86/500] Train ‚Üí total: 0.066366, cf: 0.047232, deriv: 0.382674 | Val ‚Üí total: 0.059512, cf: 0.037984, deriv: 0.430564
[Epoch 87] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 87: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 87 ‚Üí Current LR: 1.043020e-04
Epoch [87/500] Train ‚Üí total: 0.060597, cf: 0.041753, deriv: 0.376881 | Val ‚Üí total: 0.059953, cf: 0.037370, deriv: 0.451649
[Epoch 88] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 88: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:48<00:00,  1.43s/it]


Epoch 88 ‚Üí Current LR: 2.955196e-05
Epoch [88/500] Train ‚Üí total: 0.057823, cf: 0.038460, deriv: 0.387260 | Val ‚Üí total: 0.056486, cf: 0.034703, deriv: 0.435658
[Epoch 89] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 89: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 89 ‚Üí Current LR: 3.000000e-04
Epoch [89/500] Train ‚Üí total: 0.054388, cf: 0.035362, deriv: 0.380526 | Val ‚Üí total: 0.046754, cf: 0.025072, deriv: 0.433647
[Epoch 90] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 90: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 90 ‚Üí Current LR: 2.714480e-04
Epoch [90/500] Train ‚Üí total: 0.065602, cf: 0.046615, deriv: 0.379752 | Val ‚Üí total: 0.074651, cf: 0.049709, deriv: 0.498840
[Epoch 91] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 91: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:48<00:00,  1.43s/it]


Epoch 91 ‚Üí Current LR: 1.966980e-04
Epoch [91/500] Train ‚Üí total: 0.063303, cf: 0.044186, deriv: 0.382334 | Val ‚Üí total: 0.054295, cf: 0.032932, deriv: 0.427251
[Epoch 92] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 92: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 92 ‚Üí Current LR: 1.043020e-04
Epoch [92/500] Train ‚Üí total: 0.058757, cf: 0.039604, deriv: 0.383067 | Val ‚Üí total: 0.043928, cf: 0.023716, deriv: 0.404243
[Epoch 93] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 93: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 93 ‚Üí Current LR: 2.955196e-05
Epoch [93/500] Train ‚Üí total: 0.053697, cf: 0.035020, deriv: 0.373537 | Val ‚Üí total: 0.039213, cf: 0.019489, deriv: 0.394468
[Epoch 94] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 94: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 94 ‚Üí Current LR: 3.000000e-04
Epoch [94/500] Train ‚Üí total: 0.051934, cf: 0.032961, deriv: 0.379458 | Val ‚Üí total: 0.036565, cf: 0.016127, deriv: 0.408757
[Epoch 95] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 95: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:48<00:00,  1.43s/it]


Epoch 95 ‚Üí Current LR: 2.714480e-04
Epoch [95/500] Train ‚Üí total: 0.064317, cf: 0.044789, deriv: 0.390563 | Val ‚Üí total: 0.058095, cf: 0.039192, deriv: 0.378067
[Epoch 96] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 96: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 96 ‚Üí Current LR: 1.966980e-04
Epoch [96/500] Train ‚Üí total: 0.060079, cf: 0.040733, deriv: 0.386929 | Val ‚Üí total: 0.035658, cf: 0.017904, deriv: 0.355090
[Epoch 97] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 97: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:48<00:00,  1.43s/it]


Epoch 97 ‚Üí Current LR: 1.043020e-04
Epoch [97/500] Train ‚Üí total: 0.056242, cf: 0.038128, deriv: 0.362270 | Val ‚Üí total: 0.039435, cf: 0.020580, deriv: 0.377116
[Epoch 98] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 98: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 98 ‚Üí Current LR: 2.955196e-05
Epoch [98/500] Train ‚Üí total: 0.051389, cf: 0.032854, deriv: 0.370699 | Val ‚Üí total: 0.033474, cf: 0.014940, deriv: 0.370689
[Epoch 99] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 99: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 99 ‚Üí Current LR: 3.000000e-04
Epoch [99/500] Train ‚Üí total: 0.048925, cf: 0.030538, deriv: 0.367742 | Val ‚Üí total: 0.029793, cf: 0.011829, deriv: 0.359275
üîÑ Model checkpoint saved at epoch 100: /content/drive/MyDrive/Ucl/checkpoints/deeponet_model-epoch100.pt
[Epoch 100] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:48<00:00,  1.43s/it]


Epoch 100 ‚Üí Current LR: 2.714480e-04
Epoch [100/500] Train ‚Üí total: 0.058203, cf: 0.039148, deriv: 0.381091 | Val ‚Üí total: 0.038748, cf: 0.020442, deriv: 0.366112
[Epoch 101] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 101: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 101 ‚Üí Current LR: 1.966980e-04
Epoch [101/500] Train ‚Üí total: 0.055230, cf: 0.036802, deriv: 0.368559 | Val ‚Üí total: 0.046796, cf: 0.026481, deriv: 0.406300
[Epoch 102] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 102: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 102 ‚Üí Current LR: 1.043020e-04
Epoch [102/500] Train ‚Üí total: 0.054003, cf: 0.034873, deriv: 0.382592 | Val ‚Üí total: 0.038185, cf: 0.020972, deriv: 0.344264
[Epoch 103] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 103: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 103 ‚Üí Current LR: 2.955196e-05
Epoch [103/500] Train ‚Üí total: 0.048672, cf: 0.030007, deriv: 0.373300 | Val ‚Üí total: 0.029935, cf: 0.013072, deriv: 0.337271
[Epoch 104] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 104: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 104 ‚Üí Current LR: 3.000000e-04
Epoch [104/500] Train ‚Üí total: 0.047392, cf: 0.028618, deriv: 0.375465 | Val ‚Üí total: 0.029734, cf: 0.011716, deriv: 0.360355
[Epoch 105] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 105: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 105 ‚Üí Current LR: 2.714480e-04
Epoch [105/500] Train ‚Üí total: 0.058809, cf: 0.040132, deriv: 0.373543 | Val ‚Üí total: 0.040449, cf: 0.020801, deriv: 0.392972
[Epoch 106] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 106: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 106 ‚Üí Current LR: 1.966980e-04
Epoch [106/500] Train ‚Üí total: 0.056543, cf: 0.037414, deriv: 0.382589 | Val ‚Üí total: 0.039708, cf: 0.020638, deriv: 0.381413
[Epoch 107] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 107: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 107 ‚Üí Current LR: 1.043020e-04
Epoch [107/500] Train ‚Üí total: 0.050593, cf: 0.031959, deriv: 0.372668 | Val ‚Üí total: 0.030809, cf: 0.012954, deriv: 0.357094
[Epoch 108] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 108: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 108 ‚Üí Current LR: 2.955196e-05
Epoch [108/500] Train ‚Üí total: 0.047754, cf: 0.029651, deriv: 0.362067 | Val ‚Üí total: 0.031876, cf: 0.013628, deriv: 0.364973
[Epoch 109] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 109: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 109 ‚Üí Current LR: 3.000000e-04
Epoch [109/500] Train ‚Üí total: 0.045360, cf: 0.027126, deriv: 0.364681 | Val ‚Üí total: 0.029569, cf: 0.012033, deriv: 0.350717
[Epoch 110] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 110: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 110 ‚Üí Current LR: 2.714480e-04
Epoch [110/500] Train ‚Üí total: 0.056035, cf: 0.037595, deriv: 0.368800 | Val ‚Üí total: 0.051069, cf: 0.032034, deriv: 0.380707
[Epoch 111] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 111: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 111 ‚Üí Current LR: 1.966980e-04
Epoch [111/500] Train ‚Üí total: 0.054744, cf: 0.036111, deriv: 0.372662 | Val ‚Üí total: 0.041929, cf: 0.025714, deriv: 0.324298
[Epoch 112] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 112: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 112 ‚Üí Current LR: 1.043020e-04
Epoch [112/500] Train ‚Üí total: 0.049533, cf: 0.030787, deriv: 0.374929 | Val ‚Üí total: 0.034783, cf: 0.016481, deriv: 0.366036
[Epoch 113] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 113: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 113 ‚Üí Current LR: 2.955196e-05
Epoch [113/500] Train ‚Üí total: 0.046431, cf: 0.027781, deriv: 0.372984 | Val ‚Üí total: 0.030737, cf: 0.013567, deriv: 0.343406
[Epoch 114] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 114: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 114 ‚Üí Current LR: 3.000000e-04
Epoch [114/500] Train ‚Üí total: 0.044715, cf: 0.026344, deriv: 0.367410 | Val ‚Üí total: 0.026774, cf: 0.010198, deriv: 0.331527
[Epoch 115] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 115: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 115 ‚Üí Current LR: 2.714480e-04
Epoch [115/500] Train ‚Üí total: 0.051969, cf: 0.033815, deriv: 0.363080 | Val ‚Üí total: 0.034169, cf: 0.016639, deriv: 0.350616
[Epoch 116] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 116: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 116 ‚Üí Current LR: 1.966980e-04
Epoch [116/500] Train ‚Üí total: 0.052352, cf: 0.033793, deriv: 0.371184 | Val ‚Üí total: 0.035695, cf: 0.017702, deriv: 0.359851
[Epoch 117] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 117: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 117 ‚Üí Current LR: 1.043020e-04
Epoch [117/500] Train ‚Üí total: 0.049141, cf: 0.030716, deriv: 0.368498 | Val ‚Üí total: 0.034902, cf: 0.017613, deriv: 0.345767
[Epoch 118] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 118: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 118 ‚Üí Current LR: 2.955196e-05
Epoch [118/500] Train ‚Üí total: 0.046031, cf: 0.027186, deriv: 0.376892 | Val ‚Üí total: 0.031795, cf: 0.014409, deriv: 0.347714
[Epoch 119] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 119: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 119 ‚Üí Current LR: 3.000000e-04
Epoch [119/500] Train ‚Üí total: 0.043698, cf: 0.025356, deriv: 0.366842 | Val ‚Üí total: 0.027158, cf: 0.010412, deriv: 0.334906
[Epoch 120] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 120: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 120 ‚Üí Current LR: 2.714480e-04
Epoch [120/500] Train ‚Üí total: 0.051488, cf: 0.032780, deriv: 0.374170 | Val ‚Üí total: 0.038529, cf: 0.021037, deriv: 0.349829
[Epoch 121] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 121: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 121 ‚Üí Current LR: 1.966980e-04
Epoch [121/500] Train ‚Üí total: 0.050162, cf: 0.031912, deriv: 0.364992 | Val ‚Üí total: 0.036443, cf: 0.020073, deriv: 0.327404
[Epoch 122] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 122: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 122 ‚Üí Current LR: 1.043020e-04
Epoch [122/500] Train ‚Üí total: 0.047937, cf: 0.028893, deriv: 0.380878 | Val ‚Üí total: 0.031413, cf: 0.013505, deriv: 0.358155
[Epoch 123] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 123: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 123 ‚Üí Current LR: 2.955196e-05
Epoch [123/500] Train ‚Üí total: 0.044638, cf: 0.025795, deriv: 0.376843 | Val ‚Üí total: 0.028839, cf: 0.011015, deriv: 0.356480
[Epoch 124] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 124: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 124 ‚Üí Current LR: 3.000000e-04
Epoch [124/500] Train ‚Üí total: 0.042652, cf: 0.024288, deriv: 0.367290 | Val ‚Üí total: 0.027902, cf: 0.010849, deriv: 0.341051
[Epoch 125] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 125: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 125 ‚Üí Current LR: 2.714480e-04
Epoch [125/500] Train ‚Üí total: 0.052467, cf: 0.033672, deriv: 0.375885 | Val ‚Üí total: 0.031751, cf: 0.015865, deriv: 0.317711
[Epoch 126] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 126: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 126 ‚Üí Current LR: 1.966980e-04
Epoch [126/500] Train ‚Üí total: 0.047992, cf: 0.029155, deriv: 0.376735 | Val ‚Üí total: 0.036459, cf: 0.019573, deriv: 0.337729
[Epoch 127] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 127: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 127 ‚Üí Current LR: 1.043020e-04
Epoch [127/500] Train ‚Üí total: 0.046289, cf: 0.028038, deriv: 0.365022 | Val ‚Üí total: 0.033852, cf: 0.015282, deriv: 0.371402
[Epoch 128] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 128: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 128 ‚Üí Current LR: 2.955196e-05
Epoch [128/500] Train ‚Üí total: 0.043461, cf: 0.025118, deriv: 0.366860 | Val ‚Üí total: 0.028596, cf: 0.012208, deriv: 0.327771
[Epoch 129] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 129: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 129 ‚Üí Current LR: 3.000000e-04
Epoch [129/500] Train ‚Üí total: 0.042382, cf: 0.023762, deriv: 0.372384 | Val ‚Üí total: 0.028524, cf: 0.010664, deriv: 0.357212
[Epoch 130] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 130: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 130 ‚Üí Current LR: 2.714480e-04
Epoch [130/500] Train ‚Üí total: 0.050533, cf: 0.031809, deriv: 0.374483 | Val ‚Üí total: 0.034696, cf: 0.016856, deriv: 0.356795
[Epoch 131] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 131: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:48<00:00,  1.43s/it]


Epoch 131 ‚Üí Current LR: 1.966980e-04
Epoch [131/500] Train ‚Üí total: 0.048556, cf: 0.029642, deriv: 0.378270 | Val ‚Üí total: 0.035707, cf: 0.017189, deriv: 0.370363
[Epoch 132] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 132: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 132 ‚Üí Current LR: 1.043020e-04
Epoch [132/500] Train ‚Üí total: 0.046001, cf: 0.027713, deriv: 0.365751 | Val ‚Üí total: 0.032620, cf: 0.014828, deriv: 0.355840
[Epoch 133] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 133: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 133 ‚Üí Current LR: 2.955196e-05
Epoch [133/500] Train ‚Üí total: 0.042847, cf: 0.024466, deriv: 0.367609 | Val ‚Üí total: 0.029017, cf: 0.011758, deriv: 0.345188
[Epoch 134] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 134: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 134 ‚Üí Current LR: 3.000000e-04
Epoch [134/500] Train ‚Üí total: 0.041859, cf: 0.023176, deriv: 0.373656 | Val ‚Üí total: 0.028511, cf: 0.010520, deriv: 0.359819
[Epoch 135] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 135: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 135 ‚Üí Current LR: 2.714480e-04
Epoch [135/500] Train ‚Üí total: 0.049668, cf: 0.031076, deriv: 0.371841 | Val ‚Üí total: 0.038029, cf: 0.019933, deriv: 0.361918
[Epoch 136] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 136: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:49<00:00,  1.43s/it]


Epoch 136 ‚Üí Current LR: 1.966980e-04
Epoch [136/500] Train ‚Üí total: 0.048498, cf: 0.029807, deriv: 0.373813 | Val ‚Üí total: 0.033532, cf: 0.017956, deriv: 0.311522
[Epoch 137] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 137: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 137 ‚Üí Current LR: 1.043020e-04
Epoch [137/500] Train ‚Üí total: 0.045760, cf: 0.027714, deriv: 0.360920 | Val ‚Üí total: 0.030333, cf: 0.012641, deriv: 0.353844
[Epoch 138] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 138: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 138 ‚Üí Current LR: 2.955196e-05
Epoch [138/500] Train ‚Üí total: 0.041364, cf: 0.023812, deriv: 0.351042 | Val ‚Üí total: 0.029545, cf: 0.012206, deriv: 0.346792
[Epoch 139] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 139: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:48<00:00,  1.43s/it]


Epoch 139 ‚Üí Current LR: 3.000000e-04
Epoch [139/500] Train ‚Üí total: 0.039926, cf: 0.021954, deriv: 0.359435 | Val ‚Üí total: 0.029164, cf: 0.011471, deriv: 0.353868
[Epoch 140] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 140: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:49<00:00,  1.43s/it]


Epoch 140 ‚Üí Current LR: 2.714480e-04
Epoch [140/500] Train ‚Üí total: 0.050034, cf: 0.031386, deriv: 0.372949 | Val ‚Üí total: 0.043110, cf: 0.025447, deriv: 0.353261
[Epoch 141] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 141: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 141 ‚Üí Current LR: 1.966980e-04
Epoch [141/500] Train ‚Üí total: 0.047119, cf: 0.028847, deriv: 0.365433 | Val ‚Üí total: 0.033996, cf: 0.016810, deriv: 0.343702
[Epoch 142] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 142: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:48<00:00,  1.43s/it]


Epoch 142 ‚Üí Current LR: 1.043020e-04
Epoch [142/500] Train ‚Üí total: 0.044211, cf: 0.025793, deriv: 0.368362 | Val ‚Üí total: 0.028994, cf: 0.011055, deriv: 0.358781
[Epoch 143] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 143: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:50<00:00,  1.44s/it]


Epoch 143 ‚Üí Current LR: 2.955196e-05
Epoch [143/500] Train ‚Üí total: 0.041369, cf: 0.023210, deriv: 0.363182 | Val ‚Üí total: 0.028619, cf: 0.011819, deriv: 0.335996
[Epoch 144] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 144: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 144 ‚Üí Current LR: 3.000000e-04
Epoch [144/500] Train ‚Üí total: 0.040538, cf: 0.022149, deriv: 0.367770 | Val ‚Üí total: 0.027008, cf: 0.009904, deriv: 0.342072
[Epoch 145] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 145: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:47<00:00,  1.42s/it]


Epoch 145 ‚Üí Current LR: 2.714480e-04
Epoch [145/500] Train ‚Üí total: 0.047658, cf: 0.029378, deriv: 0.365590 | Val ‚Üí total: 0.035859, cf: 0.017639, deriv: 0.364388
[Epoch 146] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 146: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:46<00:00,  1.42s/it]


Epoch 146 ‚Üí Current LR: 1.966980e-04
Epoch [146/500] Train ‚Üí total: 0.047994, cf: 0.029368, deriv: 0.372528 | Val ‚Üí total: 0.036129, cf: 0.018237, deriv: 0.357841
[Epoch 147] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 147: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:44<00:00,  1.41s/it]


Epoch 147 ‚Üí Current LR: 1.043020e-04
Epoch [147/500] Train ‚Üí total: 0.043247, cf: 0.025057, deriv: 0.363796 | Val ‚Üí total: 0.033794, cf: 0.016553, deriv: 0.344839
[Epoch 148] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 148: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:44<00:00,  1.40s/it]


Epoch 148 ‚Üí Current LR: 2.955196e-05
Epoch [148/500] Train ‚Üí total: 0.041746, cf: 0.022958, deriv: 0.375759 | Val ‚Üí total: 0.029258, cf: 0.011049, deriv: 0.364177
[Epoch 149] Updated branch/trunk scale ‚Üí 1.0000


Training Epoch 149: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 160/160 [03:44<00:00,  1.40s/it]


Epoch 149 ‚Üí Current LR: 3.000000e-04
Epoch [149/500] Train ‚Üí total: 0.039744, cf: 0.021526, deriv: 0.364375 | Val ‚Üí total: 0.026495, cf: 0.009692, deriv: 0.336055
üîÑ Model checkpoint saved at epoch 150: /content/drive/MyDrive/Ucl/checkpoints/deeponet_model-epoch150.pt
Early stopping triggered at epoch 149. Best val loss: 0.026774
‚úÖ Training finished, model saved as final_deeponet_model.pt
‚úÖ Scalers saved for consistent evaluation
üìÅ Final model saved at: /content/drive/MyDrive/Ucl/final_deeponet_model.pt
üìÅ Checkpoints saved at: /content/drive/MyDrive/Ucl/checkpoints/
üìÅ Scalers saved at: /content/drive/MyDrive/Ucl/*_Scalar_Training.pkl


[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     name                  : static_bug_5138
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/satyabratkumarsingh/option-portfolio-encoder-decoder/96ac953a751041d48e6b2089d00b1db8
[1;38;5;39mCOMET INFO:[0m   Metrics [count] (min, max):
[1;38;5;39mCOMET INFO:[0m     train_cashflow_loss_batch [24000]   : (0.015873372554779053, 0.7178435325622559)
[1;38;5;39mCOMET INFO:[0m     train_cashflow_loss_epoch [150]     : (0.021525719726923853, 0.2982598843052983)
[1;38;5;39mCOMET INFO:[0m     train_derivative_loss_batch [24000] : (0.17953824996948242, 1.1214