# Libraries

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
# # change working directory

# import os

# os.getcwd()
# os.chdir('/content/drive/MyDrive/AML_kaggle_challenge/Kaggle Competition')
# os.getcwd()

In [None]:
# !pip install pykan

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, Dataset
from pathlib import Path
from tqdm import tqdm
import torch.nn.functional as F

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import pickle

import gc

import matplotlib.pyplot as plt

# from kan import KAN # kolmogorov arnold network

# Functions

In [None]:
def load_data(path):
    data = dict(np.load(path, allow_pickle=True))
    return data

In [None]:
# def contrastive_loss_with_multiple_positives(logits, label_indices):
#     """Remove model.temperature - use only learned scaling"""
#     # Create positive mask
#     positive_mask = (label_indices.unsqueeze(1) == label_indices.unsqueeze(0)).float()

#     # --- Text-to-Image Loss ---
#     # NO temperature scaling here - already applied before
#     logits_max_t2i, _ = torch.max(logits, dim=1, keepdim=True)
#     logits_stable_t2i = logits - logits_max_t2i.detach()

#     exp_sim_t2i = torch.exp(logits_stable_t2i)
#     positive_sim_t2i = torch.sum(exp_sim_t2i * positive_mask, dim=1)
#     all_sim_t2i = torch.sum(exp_sim_t2i, dim=1)

#     eps = 1e-9
#     loss_t2i = -torch.log(positive_sim_t2i / (all_sim_t2i + eps) + eps).mean()

#     # --- Image-to-Text Loss ---
#     logits_i2t = logits.T  # Just transpose, no scaling
#     logits_max_i2t, _ = torch.max(logits_i2t, dim=1, keepdim=True)
#     logits_stable_i2t = logits_i2t - logits_max_i2t.detach()

#     exp_sim_i2t = torch.exp(logits_stable_i2t)
#     positive_sim_i2t = torch.sum(exp_sim_i2t * positive_mask.T, dim=1)
#     all_sim_i2t = torch.sum(exp_sim_i2t, dim=1)

#     loss_i2t = -torch.log(positive_sim_i2t / (all_sim_i2t + eps) + eps).mean()

#     return (loss_t2i + loss_i2t) / 2.0

def contrastive_loss_with_multiple_positives(logits, label_indices, scale_penalty_weight=0.001):
    """With scale regularization to prevent explosion"""
    positive_mask = (label_indices.unsqueeze(1) == label_indices.unsqueeze(0)).float()

    # Remove self-comparisons
    positive_mask = positive_mask * (1 - torch.eye(positive_mask.size(0), device=positive_mask.device))

    # --- Text-to-Image Loss ---
    logits_max_t2i, _ = torch.max(logits, dim=1, keepdim=True)
    logits_stable_t2i = logits - logits_max_t2i.detach()

    exp_sim_t2i = torch.exp(logits_stable_t2i)
    positive_sim_t2i = torch.sum(exp_sim_t2i * positive_mask, dim=1)
    all_sim_t2i = torch.sum(exp_sim_t2i, dim=1)

    eps = 1e-9
    loss_t2i = -torch.log(positive_sim_t2i / (all_sim_t2i + eps) + eps).mean()

    # --- Image-to-Text Loss ---
    logits_i2t = logits.T
    logits_max_i2t, _ = torch.max(logits_i2t, dim=1, keepdim=True)
    logits_stable_i2t = logits_i2t - logits_max_i2t.detach()

    exp_sim_i2t = torch.exp(logits_stable_i2t)
    positive_sim_i2t = torch.sum(exp_sim_i2t * positive_mask.T, dim=1)
    all_sim_i2t = torch.sum(exp_sim_i2t, dim=1)

    loss_i2t = -torch.log(positive_sim_i2t / (all_sim_i2t + eps) + eps).mean()

    # Scale regularization (if scale is passed)
    scale_penalty = 0.0
    if hasattr(logits, 'scale_factor'):
        # Penalize large scale values
        scale_penalty = scale_penalty_weight * (logits.scale_factor ** 2)

    contrastive_loss = (loss_t2i + loss_i2t) / 2.0
    return contrastive_loss + scale_penalty




def calculate_mrr_text_to_image(model, data_loader, device):
    """Calculate Mean Reciprocal Rank (MRR) for text→image retrieval"""
    model.eval()
    reciprocal_ranks = []

    with torch.no_grad():
        for text_emb, image_emb, _ in tqdm(data_loader, desc="Calculating MRR "):
            text_emb, image_emb = text_emb.to(device), image_emb.to(device)

            pred_image_emb = model(text_emb)

            pred_image_emb = F.normalize(pred_image_emb, dim=1)

            sim_matrix = pred_image_emb @ image_emb.T  # (B, B)

            batch_size = len(text_emb)
            target_indices = torch.arange(batch_size).to(device)
            correct_scores = sim_matrix.diagonal()


            ranks = (sim_matrix > correct_scores.unsqueeze(1)).sum(dim=1) + 1

            reciprocal_ranks.extend((1.0 / ranks).cpu().tolist())
    mrr = sum(reciprocal_ranks) / len(reciprocal_ranks)
    return mrr


def train_model_combined(model, train_loader, val_loader, device, epochs, lr, model_path,
                        labels_train_indices, labels_val_indices):
    """Combined training with learnable temperature and MRR evaluation"""

    # Learnable temperature parameter (like in train_model_contrastive)
    initial_log_value = np.log(1 / 0.07)
    logit_scale = nn.Parameter(torch.empty(1).fill_(initial_log_value).to(device))

    # Optimizer for both model and temperature
    optimizer = optim.Adam([
        {'params': model.parameters()},
        {'params': [logit_scale], 'lr': 1e-2}  # Higher LR for temperature
    ], lr=lr, weight_decay=1e-5)

    # Scheduler
    # scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=5)
    scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(
        optimizer,
        T_0=10,           # First restart after 10 epochs
        T_mult=1,         # Double the cycle length each time (1, 2, 4, 8...)
        eta_min=1e-6      # Minimum learning rate
    )

    current_lr = optimizer.param_groups[0]['lr']
    current_scale = logit_scale.exp().item()

    print(f"Initial learning rate: {current_lr:.2e}")
    print(f"Initial temperature: {current_scale:.4f}")

    best_val_mrr = -1.0
    best_val_loss = float('inf')
    Path(model_path).parent.mkdir(parents=True, exist_ok=True)

    for epoch in range(epochs):
        # Training phase
        model.train()
        train_loss = 0
        num_batches = 0

        for X_batch, y_batch, batch_label_indices in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}"):
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)
            batch_label_indices = batch_label_indices.to(device)

            optimizer.zero_grad()

            # Forward pass
            pred_emb = model(X_batch)

            # Normalize embeddings
            pred_norm = F.normalize(pred_emb, p=2, dim=1)
            target_norm = F.normalize(y_batch, p=2, dim=1)

            # Compute similarity matrix
            logits = pred_norm @ target_norm.T

            # Apply learned temperature scaling
            with torch.no_grad():
                logit_scale.clamp_(0, 5)
                # logit_scale.clamp_(0, 4.605)  # max scale = 100
                # logit_scale.clamp_(0, 2.302)  # max scale = exp(2.302) ≈ 10
            scale_factor = logit_scale.exp()
            scaled_logits = logits * scale_factor

            # Multi-positive contrastive loss
            # loss = contrastive_loss_with_multiple_positives(
            #     scaled_logits, batch_label_indices, model
            # )

            loss = contrastive_loss_with_multiple_positives(scaled_logits, batch_label_indices)

            # Skip NaN losses
            if torch.isnan(loss):
                print("NaN loss detected! Skipping batch.")
                continue

            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

            train_loss += loss.item()
            num_batches += 1

        avg_train_loss = train_loss / num_batches if num_batches > 0 else float('inf')

        # Validation phase
        model.eval()
        val_loss = 0
        num_val_batches = 0

        with torch.no_grad():
            # Calculate validation loss
            for X_batch, y_batch, batch_label_indices in val_loader:
                X_batch = X_batch.to(device)
                y_batch = y_batch.to(device)
                batch_label_indices = batch_label_indices.to(device)

                pred_emb = model(X_batch)
                pred_norm = F.normalize(pred_emb, p=2, dim=1)
                target_norm = F.normalize(y_batch, p=2, dim=1)
                logits = pred_norm @ target_norm.T

                # Apply same temperature scaling for validation
                scale_factor_val = logit_scale.exp()
                scaled_logits_val = logits * scale_factor_val

                # loss = contrastive_loss_with_multiple_positives(
                #     scaled_logits_val, batch_label_indices, model
                # )

                loss = contrastive_loss_with_multiple_positives(scaled_logits_val, batch_label_indices)

                if not torch.isnan(loss):
                    val_loss += loss.item()
                    num_val_batches += 1

        avg_val_loss = val_loss / num_val_batches if num_val_batches > 0 else float('inf')

        # Calculate MRR
        val_mrr = calculate_mrr_text_to_image(model, val_loader, device)

        # Get current learning rate and scale
        current_lr = optimizer.param_groups[0]['lr']
        current_scale = logit_scale.exp().item()

        print(f"Epoch {epoch+1}:")
        print(f"  Train Loss = {avg_train_loss:.6f}, Val Loss = {avg_val_loss:.6f}")
        print(f"  Val MRR = {val_mrr:.4f}, Scale = {current_scale:.4f}, LR = {current_lr:.2e}")
        # print(f"  Temperature = {model.temperature.item():.6f}")

        # Update scheduler based on MRR (we want to maximize MRR)
        scheduler.step(val_mrr)

        # Save best model based on MRR (primary) and loss (secondary)
        if val_mrr > best_val_mrr or (abs(val_mrr - best_val_mrr) < 1e-6 and avg_val_loss < best_val_loss):
            best_val_mrr = val_mrr
            best_val_loss = avg_val_loss
            torch.save(model.state_dict(), model_path)
            print(f"  ✓ Saved best model (MRR={val_mrr:.4f}, Loss={avg_val_loss:.6f})")

        print("-" * 60)

    return model

# Read Data

In [None]:
# read data back
data = torch.load('/content/drive/MyDrive/AML kaggle competition/sandros_data_15_11_25.pt')

X_train = data['X_train']
X_train_scaled = data['X_train_scaled']
X_val = data['X_val']
X_val_scaled = data['X_val_scaled']

y_train = data['y_train']
y_train_scaled = data['y_train_scaled']
y_val = data['y_val']
y_val_scaled = data['y_val_scaled']

labels_train = data['labels_train']
labels_val = data['labels_val']

In [None]:
import gc

del data

gc.collect()

27

# MLP

In [None]:
class MLP(nn.Module):
    def __init__(self, input_dim=1024, output_dim=1536, hidden_dim_1=1536, hidden_dim_2=2048, dropout_prob=0.5):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim_1),
            nn.BatchNorm1d(hidden_dim_1),
            # nn.LayerNorm(hidden_dim_1),
            nn.GELU(),
            nn.Dropout(dropout_prob),

            nn.Linear(hidden_dim_1, hidden_dim_2),
            nn.BatchNorm1d(hidden_dim_2),
            # nn.LayerNorm(hidden_dim_2),
            nn.GELU(),
            nn.Dropout(dropout_prob),

            nn.Linear(hidden_dim_2, output_dim),
        )

        # # Better temperature initialization
        # self.log_temperature = nn.Parameter(torch.log(torch.tensor(0.07)))

    def forward(self, x):
        return self.net(x)

In [None]:
# class MLP(nn.Module):
#     def __init__(self, input_dim=1024, output_dim=1536, hidden_dim_1=1536, hidden_dim_2=2048, dropout_prob=0.5):
#         super().__init__()

#         self.net = nn.Sequential(
#             nn.Linear(input_dim, hidden_dim_1),
#             nn.LayerNorm(hidden_dim_1),
#             nn.GELU(),
#             nn.Dropout(dropout_prob),

#             nn.Linear(hidden_dim_1, hidden_dim_2),
#             nn.LayerNorm(hidden_dim_2),
#             nn.GELU(),
#             nn.Dropout(dropout_prob),

#             nn.Linear(hidden_dim_2, output_dim),
#         )

#         # Apply custom initialization
#         self._init_weights()

#     def _init_weights(self):
#         for module in self.modules():
#             if isinstance(module, nn.Linear):
#                 # Transformer-style initialization
#                 nn.init.xavier_uniform_(module.weight, gain=nn.init.calculate_gain('relu'))
#                 if module.bias is not None:
#                     nn.init.constant_(module.bias, 0.0)

#             elif isinstance(module, nn.LayerNorm):
#                 nn.init.constant_(module.weight, 1.0)
#                 nn.init.constant_(module.bias, 0.0)

#     def forward(self, x):
#         return self.net(x)

In [None]:
# Initialize model
MODEL_PATH = "models/mlp.pth"
EPOCHS = 150
BATCH_SIZE = 2048
LR = 0.0005
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")



model = MLP().to(DEVICE)
print(f"Parameters: {sum(p.numel() for p in model.parameters()):,}")

# Create data loaders with proper shuffling
train_loader = DataLoader(
    TensorDataset(X_train_scaled, y_train, labels_train),
    batch_size=BATCH_SIZE,
    shuffle=True  # Important for contrastive learning
)
val_loader = DataLoader(
    TensorDataset(X_val_scaled, y_val, labels_val),
    batch_size=100,
    shuffle=False
)

# Train with combined approach
print("\n3. Training with combined approach...")
model = train_model_combined(
    model, train_loader, val_loader, DEVICE, EPOCHS, LR, MODEL_PATH,
    labels_train, labels_val
)

Parameters: 7,876,608

3. Training with combined approach...
Initial learning rate: 5.00e-04
Initial temperature: 14.2857


Epoch 1/150: 100%|██████████| 55/55 [00:03<00:00, 14.18it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 466.45it/s]


Epoch 1:
  Train Loss = 19.764290, Val Loss = 20.648575
  Val MRR = 0.6056, Scale = 24.5270, LR = 5.00e-04
  ✓ Saved best model (MRR=0.6056, Loss=20.648575)
------------------------------------------------------------


Epoch 2/150: 100%|██████████| 55/55 [00:03<00:00, 14.72it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 475.13it/s]


Epoch 2:
  Train Loss = 19.677386, Val Loss = 20.647210
  Val MRR = 0.7073, Scale = 38.9411, LR = 4.95e-04
  ✓ Saved best model (MRR=0.7073, Loss=20.647210)
------------------------------------------------------------


Epoch 3/150: 100%|██████████| 55/55 [00:03<00:00, 14.47it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 356.35it/s]


Epoch 3:
  Train Loss = 19.694445, Val Loss = 20.646302
  Val MRR = 0.7450, Scale = 47.7982, LR = 4.94e-04
  ✓ Saved best model (MRR=0.7450, Loss=20.646302)
------------------------------------------------------------


Epoch 4/150: 100%|██████████| 55/55 [00:03<00:00, 15.65it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 451.32it/s]


Epoch 4:
  Train Loss = 19.638547, Val Loss = 20.646335
  Val MRR = 0.7688, Scale = 53.1884, LR = 4.93e-04
  ✓ Saved best model (MRR=0.7688, Loss=20.646335)
------------------------------------------------------------


Epoch 5/150: 100%|██████████| 55/55 [00:03<00:00, 15.55it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 469.65it/s]


Epoch 5:
  Train Loss = 19.608679, Val Loss = 20.646114
  Val MRR = 0.7845, Scale = 57.8184, LR = 4.93e-04
  ✓ Saved best model (MRR=0.7845, Loss=20.646114)
------------------------------------------------------------


Epoch 6/150: 100%|██████████| 55/55 [00:03<00:00, 14.50it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 356.15it/s]


Epoch 6:
  Train Loss = 19.611678, Val Loss = 20.646186
  Val MRR = 0.7948, Scale = 60.9930, LR = 4.92e-04
  ✓ Saved best model (MRR=0.7948, Loss=20.646186)
------------------------------------------------------------


Epoch 7/150: 100%|██████████| 55/55 [00:04<00:00, 13.24it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 472.27it/s]


Epoch 7:
  Train Loss = 19.631745, Val Loss = 20.645643
  Val MRR = 0.8011, Scale = 62.7231, LR = 4.92e-04
  ✓ Saved best model (MRR=0.8011, Loss=20.645643)
------------------------------------------------------------


Epoch 8/150: 100%|██████████| 55/55 [00:03<00:00, 17.05it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 457.26it/s]


Epoch 8:
  Train Loss = 19.616372, Val Loss = 20.645896
  Val MRR = 0.8125, Scale = 66.3620, LR = 4.92e-04
  ✓ Saved best model (MRR=0.8125, Loss=20.645896)
------------------------------------------------------------


Epoch 9/150: 100%|██████████| 55/55 [00:03<00:00, 15.31it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 353.07it/s]


Epoch 9:
  Train Loss = 19.626657, Val Loss = 20.645857
  Val MRR = 0.8113, Scale = 66.7344, LR = 4.92e-04
------------------------------------------------------------


Epoch 10/150: 100%|██████████| 55/55 [00:04<00:00, 13.34it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 466.34it/s]


Epoch 10:
  Train Loss = 19.621849, Val Loss = 20.645948
  Val MRR = 0.8213, Scale = 70.8242, LR = 4.92e-04
  ✓ Saved best model (MRR=0.8213, Loss=20.645948)
------------------------------------------------------------


Epoch 11/150: 100%|██████████| 55/55 [00:03<00:00, 14.55it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 449.00it/s]


Epoch 11:
  Train Loss = 19.641007, Val Loss = 20.645473
  Val MRR = 0.8223, Scale = 70.1007, LR = 4.92e-04
  ✓ Saved best model (MRR=0.8223, Loss=20.645473)
------------------------------------------------------------


Epoch 12/150: 100%|██████████| 55/55 [00:03<00:00, 17.37it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 349.45it/s]


Epoch 12:
  Train Loss = 19.604153, Val Loss = 20.645813
  Val MRR = 0.8304, Scale = 73.0556, LR = 4.92e-04
  ✓ Saved best model (MRR=0.8304, Loss=20.645813)
------------------------------------------------------------


Epoch 13/150: 100%|██████████| 55/55 [00:03<00:00, 13.82it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 478.54it/s]


Epoch 13:
  Train Loss = 19.598461, Val Loss = 20.645654
  Val MRR = 0.8296, Scale = 75.4993, LR = 4.92e-04
------------------------------------------------------------


Epoch 14/150: 100%|██████████| 55/55 [00:03<00:00, 14.69it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 452.15it/s]


Epoch 14:
  Train Loss = 19.560985, Val Loss = 20.645641
  Val MRR = 0.8334, Scale = 74.2703, LR = 4.92e-04
  ✓ Saved best model (MRR=0.8334, Loss=20.645641)
------------------------------------------------------------


Epoch 15/150: 100%|██████████| 55/55 [00:03<00:00, 14.37it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 330.14it/s]


Epoch 15:
  Train Loss = 19.579635, Val Loss = 20.645687
  Val MRR = 0.8389, Scale = 78.3430, LR = 4.91e-04
  ✓ Saved best model (MRR=0.8389, Loss=20.645687)
------------------------------------------------------------


Epoch 16/150: 100%|██████████| 55/55 [00:03<00:00, 15.85it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 451.49it/s]


Epoch 16:
  Train Loss = 19.602356, Val Loss = 20.645529
  Val MRR = 0.8382, Scale = 77.8129, LR = 4.91e-04
------------------------------------------------------------


Epoch 17/150: 100%|██████████| 55/55 [00:03<00:00, 15.36it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 447.83it/s]


Epoch 17:
  Train Loss = 19.574814, Val Loss = 20.645249
  Val MRR = 0.8406, Scale = 78.9125, LR = 4.91e-04
  ✓ Saved best model (MRR=0.8406, Loss=20.645249)
------------------------------------------------------------


Epoch 18/150: 100%|██████████| 55/55 [00:03<00:00, 14.58it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 356.61it/s]


Epoch 18:
  Train Loss = 19.597767, Val Loss = 20.645376
  Val MRR = 0.8429, Scale = 80.9116, LR = 4.91e-04
  ✓ Saved best model (MRR=0.8429, Loss=20.645376)
------------------------------------------------------------


Epoch 19/150: 100%|██████████| 55/55 [00:04<00:00, 13.61it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 469.17it/s]


Epoch 19:
  Train Loss = 19.577546, Val Loss = 20.645394
  Val MRR = 0.8452, Scale = 82.0744, LR = 4.91e-04
  ✓ Saved best model (MRR=0.8452, Loss=20.645394)
------------------------------------------------------------


Epoch 20/150: 100%|██████████| 55/55 [00:03<00:00, 17.06it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 473.81it/s]


Epoch 20:
  Train Loss = 19.620317, Val Loss = 20.645321
  Val MRR = 0.8461, Scale = 81.6516, LR = 4.91e-04
  ✓ Saved best model (MRR=0.8461, Loss=20.645321)
------------------------------------------------------------


Epoch 21/150: 100%|██████████| 55/55 [00:03<00:00, 15.65it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 350.34it/s]


Epoch 21:
  Train Loss = 19.564348, Val Loss = 20.645328
  Val MRR = 0.8491, Scale = 84.8979, LR = 4.91e-04
  ✓ Saved best model (MRR=0.8491, Loss=20.645328)
------------------------------------------------------------


Epoch 22/150: 100%|██████████| 55/55 [00:04<00:00, 13.27it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 460.78it/s]


Epoch 22:
  Train Loss = 19.588857, Val Loss = 20.645331
  Val MRR = 0.8518, Scale = 84.2866, LR = 4.91e-04
  ✓ Saved best model (MRR=0.8518, Loss=20.645331)
------------------------------------------------------------


Epoch 23/150: 100%|██████████| 55/55 [00:03<00:00, 14.60it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 470.23it/s]


Epoch 23:
  Train Loss = 19.576369, Val Loss = 20.645231
  Val MRR = 0.8517, Scale = 84.2056, LR = 4.91e-04
------------------------------------------------------------


Epoch 24/150: 100%|██████████| 55/55 [00:03<00:00, 17.15it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 367.79it/s]


Epoch 24:
  Train Loss = 19.590398, Val Loss = 20.645240
  Val MRR = 0.8554, Scale = 84.0037, LR = 4.91e-04
  ✓ Saved best model (MRR=0.8554, Loss=20.645240)
------------------------------------------------------------


Epoch 25/150: 100%|██████████| 55/55 [00:03<00:00, 14.03it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 475.39it/s]


Epoch 25:
  Train Loss = 19.583317, Val Loss = 20.645099
  Val MRR = 0.8533, Scale = 88.1882, LR = 4.91e-04
------------------------------------------------------------


Epoch 26/150: 100%|██████████| 55/55 [00:03<00:00, 14.74it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 449.38it/s]


Epoch 26:
  Train Loss = 19.580942, Val Loss = 20.645270
  Val MRR = 0.8574, Scale = 87.4602, LR = 4.91e-04
  ✓ Saved best model (MRR=0.8574, Loss=20.645270)
------------------------------------------------------------


Epoch 27/150: 100%|██████████| 55/55 [00:03<00:00, 14.14it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 337.87it/s]


Epoch 27:
  Train Loss = 19.573216, Val Loss = 20.645308
  Val MRR = 0.8580, Scale = 87.9343, LR = 4.91e-04
  ✓ Saved best model (MRR=0.8580, Loss=20.645308)
------------------------------------------------------------


Epoch 28/150: 100%|██████████| 55/55 [00:03<00:00, 15.76it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 473.18it/s]


Epoch 28:
  Train Loss = 19.581723, Val Loss = 20.645403
  Val MRR = 0.8618, Scale = 86.6292, LR = 4.91e-04
  ✓ Saved best model (MRR=0.8618, Loss=20.645403)
------------------------------------------------------------


Epoch 29/150: 100%|██████████| 55/55 [00:03<00:00, 15.52it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 471.61it/s]


Epoch 29:
  Train Loss = 19.567676, Val Loss = 20.645392
  Val MRR = 0.8611, Scale = 87.9500, LR = 4.91e-04
------------------------------------------------------------


Epoch 30/150: 100%|██████████| 55/55 [00:03<00:00, 14.42it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 342.61it/s]


Epoch 30:
  Train Loss = 19.561056, Val Loss = 20.645386
  Val MRR = 0.8629, Scale = 90.3217, LR = 4.91e-04
  ✓ Saved best model (MRR=0.8629, Loss=20.645386)
------------------------------------------------------------


Epoch 31/150: 100%|██████████| 55/55 [00:04<00:00, 13.46it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 462.37it/s]


Epoch 31:
  Train Loss = 19.550561, Val Loss = 20.645243
  Val MRR = 0.8623, Scale = 90.3083, LR = 4.91e-04
------------------------------------------------------------


Epoch 32/150: 100%|██████████| 55/55 [00:03<00:00, 17.16it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 450.68it/s]


Epoch 32:
  Train Loss = 19.570963, Val Loss = 20.645070
  Val MRR = 0.8639, Scale = 92.0033, LR = 4.91e-04
  ✓ Saved best model (MRR=0.8639, Loss=20.645070)
------------------------------------------------------------


Epoch 33/150: 100%|██████████| 55/55 [00:03<00:00, 15.34it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 360.09it/s]


Epoch 33:
  Train Loss = 19.566165, Val Loss = 20.645063
  Val MRR = 0.8636, Scale = 90.9677, LR = 4.91e-04
------------------------------------------------------------


Epoch 34/150: 100%|██████████| 55/55 [00:04<00:00, 13.31it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 469.42it/s]


Epoch 34:
  Train Loss = 19.557067, Val Loss = 20.645188
  Val MRR = 0.8664, Scale = 91.2264, LR = 4.91e-04
  ✓ Saved best model (MRR=0.8664, Loss=20.645188)
------------------------------------------------------------


Epoch 35/150: 100%|██████████| 55/55 [00:03<00:00, 14.84it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 449.81it/s]


Epoch 35:
  Train Loss = 19.559975, Val Loss = 20.645438
  Val MRR = 0.8669, Scale = 93.6001, LR = 4.91e-04
  ✓ Saved best model (MRR=0.8669, Loss=20.645438)
------------------------------------------------------------


Epoch 36/150: 100%|██████████| 55/55 [00:03<00:00, 16.89it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 344.39it/s]


Epoch 36:
  Train Loss = 19.611142, Val Loss = 20.645709
  Val MRR = 0.8646, Scale = 94.2089, LR = 4.91e-04
------------------------------------------------------------


Epoch 37/150: 100%|██████████| 55/55 [00:03<00:00, 14.04it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 462.56it/s]


Epoch 37:
  Train Loss = 19.557498, Val Loss = 20.645437
  Val MRR = 0.8681, Scale = 96.1294, LR = 4.91e-04
  ✓ Saved best model (MRR=0.8681, Loss=20.645437)
------------------------------------------------------------


Epoch 38/150: 100%|██████████| 55/55 [00:03<00:00, 14.65it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 472.53it/s]


Epoch 38:
  Train Loss = 19.583605, Val Loss = 20.645259
  Val MRR = 0.8685, Scale = 95.3868, LR = 4.91e-04
  ✓ Saved best model (MRR=0.8685, Loss=20.645259)
------------------------------------------------------------


Epoch 39/150: 100%|██████████| 55/55 [00:03<00:00, 14.28it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 327.90it/s]


Epoch 39:
  Train Loss = 19.598534, Val Loss = 20.645250
  Val MRR = 0.8699, Scale = 95.9428, LR = 4.91e-04
  ✓ Saved best model (MRR=0.8699, Loss=20.645250)
------------------------------------------------------------


Epoch 40/150: 100%|██████████| 55/55 [00:03<00:00, 15.85it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 454.54it/s]


Epoch 40:
  Train Loss = 19.564683, Val Loss = 20.645454
  Val MRR = 0.8726, Scale = 97.5944, LR = 4.91e-04
  ✓ Saved best model (MRR=0.8726, Loss=20.645454)
------------------------------------------------------------


Epoch 41/150: 100%|██████████| 55/55 [00:03<00:00, 15.48it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 458.90it/s]


Epoch 41:
  Train Loss = 19.553625, Val Loss = 20.645525
  Val MRR = 0.8723, Scale = 96.9541, LR = 4.91e-04
------------------------------------------------------------


Epoch 42/150: 100%|██████████| 55/55 [00:03<00:00, 14.34it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 354.69it/s]


Epoch 42:
  Train Loss = 19.571075, Val Loss = 20.645354
  Val MRR = 0.8724, Scale = 96.2308, LR = 4.91e-04
------------------------------------------------------------


Epoch 43/150: 100%|██████████| 55/55 [00:04<00:00, 13.59it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 444.22it/s]


Epoch 43:
  Train Loss = 19.583704, Val Loss = 20.645243
  Val MRR = 0.8729, Scale = 98.7484, LR = 4.91e-04
  ✓ Saved best model (MRR=0.8729, Loss=20.645243)
------------------------------------------------------------


Epoch 44/150: 100%|██████████| 55/55 [00:03<00:00, 17.17it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 466.67it/s]


Epoch 44:
  Train Loss = 19.573491, Val Loss = 20.645141
  Val MRR = 0.8739, Scale = 99.1029, LR = 4.91e-04
  ✓ Saved best model (MRR=0.8739, Loss=20.645141)
------------------------------------------------------------


Epoch 45/150: 100%|██████████| 55/55 [00:03<00:00, 15.26it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 340.75it/s]


Epoch 45:
  Train Loss = 19.567324, Val Loss = 20.645074
  Val MRR = 0.8741, Scale = 97.3066, LR = 4.91e-04
  ✓ Saved best model (MRR=0.8741, Loss=20.645074)
------------------------------------------------------------


Epoch 46/150: 100%|██████████| 55/55 [00:04<00:00, 13.40it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 464.88it/s]


Epoch 46:
  Train Loss = 19.593651, Val Loss = 20.645346
  Val MRR = 0.8762, Scale = 97.5513, LR = 4.91e-04
  ✓ Saved best model (MRR=0.8762, Loss=20.645346)
------------------------------------------------------------


Epoch 47/150: 100%|██████████| 55/55 [00:03<00:00, 14.65it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 473.06it/s]


Epoch 47:
  Train Loss = 19.574493, Val Loss = 20.645047
  Val MRR = 0.8748, Scale = 97.2410, LR = 4.91e-04
------------------------------------------------------------


Epoch 48/150: 100%|██████████| 55/55 [00:03<00:00, 17.16it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 362.39it/s]


Epoch 48:
  Train Loss = 19.549679, Val Loss = 20.645144
  Val MRR = 0.8749, Scale = 100.4484, LR = 4.91e-04
------------------------------------------------------------


Epoch 49/150: 100%|██████████| 55/55 [00:03<00:00, 14.05it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 469.09it/s]


Epoch 49:
  Train Loss = 19.581329, Val Loss = 20.645074
  Val MRR = 0.8748, Scale = 101.3206, LR = 4.91e-04
------------------------------------------------------------


Epoch 50/150: 100%|██████████| 55/55 [00:03<00:00, 14.50it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 449.78it/s]


Epoch 50:
  Train Loss = 19.564229, Val Loss = 20.644871
  Val MRR = 0.8760, Scale = 101.5086, LR = 4.91e-04
------------------------------------------------------------


Epoch 51/150: 100%|██████████| 55/55 [00:03<00:00, 14.24it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 342.71it/s]


Epoch 51:
  Train Loss = 19.572543, Val Loss = 20.645212
  Val MRR = 0.8753, Scale = 99.7325, LR = 4.91e-04
------------------------------------------------------------


Epoch 52/150: 100%|██████████| 55/55 [00:03<00:00, 15.75it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 468.93it/s]


Epoch 52:
  Train Loss = 19.511268, Val Loss = 20.645223
  Val MRR = 0.8786, Scale = 102.2966, LR = 4.91e-04
  ✓ Saved best model (MRR=0.8786, Loss=20.645223)
------------------------------------------------------------


Epoch 53/150: 100%|██████████| 55/55 [00:03<00:00, 15.49it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 460.39it/s]


Epoch 53:
  Train Loss = 19.567180, Val Loss = 20.645240
  Val MRR = 0.8792, Scale = 101.1286, LR = 4.91e-04
  ✓ Saved best model (MRR=0.8792, Loss=20.645240)
------------------------------------------------------------


Epoch 54/150: 100%|██████████| 55/55 [00:03<00:00, 14.28it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 331.77it/s]


Epoch 54:
  Train Loss = 19.542713, Val Loss = 20.644924
  Val MRR = 0.8805, Scale = 101.5453, LR = 4.91e-04
  ✓ Saved best model (MRR=0.8805, Loss=20.644924)
------------------------------------------------------------


Epoch 55/150: 100%|██████████| 55/55 [00:04<00:00, 13.44it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 475.52it/s]


Epoch 55:
  Train Loss = 19.577651, Val Loss = 20.645073
  Val MRR = 0.8786, Scale = 102.8317, LR = 4.91e-04
------------------------------------------------------------


Epoch 56/150: 100%|██████████| 55/55 [00:03<00:00, 17.10it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 470.90it/s]


Epoch 56:
  Train Loss = 19.555224, Val Loss = 20.645210
  Val MRR = 0.8813, Scale = 102.6736, LR = 4.91e-04
  ✓ Saved best model (MRR=0.8813, Loss=20.645210)
------------------------------------------------------------


Epoch 57/150: 100%|██████████| 55/55 [00:03<00:00, 15.35it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 369.43it/s]


Epoch 57:
  Train Loss = 19.573922, Val Loss = 20.645218
  Val MRR = 0.8821, Scale = 102.8207, LR = 4.90e-04
  ✓ Saved best model (MRR=0.8821, Loss=20.645218)
------------------------------------------------------------


Epoch 58/150: 100%|██████████| 55/55 [00:04<00:00, 13.38it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 456.93it/s]


Epoch 58:
  Train Loss = 19.541609, Val Loss = 20.645093
  Val MRR = 0.8809, Scale = 103.4975, LR = 4.90e-04
------------------------------------------------------------


Epoch 59/150: 100%|██████████| 55/55 [00:03<00:00, 14.70it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 472.74it/s]


Epoch 59:
  Train Loss = 19.547853, Val Loss = 20.645223
  Val MRR = 0.8827, Scale = 106.3860, LR = 4.91e-04
  ✓ Saved best model (MRR=0.8827, Loss=20.645223)
------------------------------------------------------------


Epoch 60/150: 100%|██████████| 55/55 [00:03<00:00, 16.97it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 344.14it/s]


Epoch 60:
  Train Loss = 19.548528, Val Loss = 20.644683
  Val MRR = 0.8826, Scale = 105.2379, LR = 4.90e-04
------------------------------------------------------------


Epoch 61/150: 100%|██████████| 55/55 [00:03<00:00, 14.01it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 478.08it/s]


Epoch 61:
  Train Loss = 19.563988, Val Loss = 20.645003
  Val MRR = 0.8837, Scale = 103.7329, LR = 4.90e-04
  ✓ Saved best model (MRR=0.8837, Loss=20.645003)
------------------------------------------------------------


Epoch 62/150: 100%|██████████| 55/55 [00:03<00:00, 14.58it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 465.75it/s]


Epoch 62:
  Train Loss = 19.508112, Val Loss = 20.645042
  Val MRR = 0.8810, Scale = 105.1920, LR = 4.90e-04
------------------------------------------------------------


Epoch 63/150: 100%|██████████| 55/55 [00:03<00:00, 14.31it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 329.11it/s]


Epoch 63:
  Train Loss = 19.561767, Val Loss = 20.644955
  Val MRR = 0.8845, Scale = 104.7422, LR = 4.91e-04
  ✓ Saved best model (MRR=0.8845, Loss=20.644955)
------------------------------------------------------------


Epoch 64/150: 100%|██████████| 55/55 [00:03<00:00, 15.70it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 461.95it/s]


Epoch 64:
  Train Loss = 19.566730, Val Loss = 20.645267
  Val MRR = 0.8839, Scale = 104.9198, LR = 4.90e-04
------------------------------------------------------------


Epoch 65/150: 100%|██████████| 55/55 [00:03<00:00, 15.53it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 450.11it/s]


Epoch 65:
  Train Loss = 19.559227, Val Loss = 20.645111
  Val MRR = 0.8849, Scale = 107.4750, LR = 4.90e-04
  ✓ Saved best model (MRR=0.8849, Loss=20.645111)
------------------------------------------------------------


Epoch 66/150: 100%|██████████| 55/55 [00:03<00:00, 14.35it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 346.66it/s]


Epoch 66:
  Train Loss = 19.536931, Val Loss = 20.645212
  Val MRR = 0.8815, Scale = 105.9705, LR = 4.90e-04
------------------------------------------------------------


Epoch 67/150: 100%|██████████| 55/55 [00:04<00:00, 13.48it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 443.56it/s]


Epoch 67:
  Train Loss = 19.545367, Val Loss = 20.645090
  Val MRR = 0.8865, Scale = 105.6790, LR = 4.90e-04
  ✓ Saved best model (MRR=0.8865, Loss=20.645090)
------------------------------------------------------------


Epoch 68/150: 100%|██████████| 55/55 [00:03<00:00, 17.11it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 465.81it/s]


Epoch 68:
  Train Loss = 19.575771, Val Loss = 20.645143
  Val MRR = 0.8853, Scale = 107.1692, LR = 4.90e-04
------------------------------------------------------------


Epoch 69/150: 100%|██████████| 55/55 [00:03<00:00, 15.27it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 332.30it/s]


Epoch 69:
  Train Loss = 19.561419, Val Loss = 20.645344
  Val MRR = 0.8831, Scale = 106.6899, LR = 4.90e-04
------------------------------------------------------------


Epoch 70/150: 100%|██████████| 55/55 [00:04<00:00, 13.36it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 470.59it/s]


Epoch 70:
  Train Loss = 19.568606, Val Loss = 20.645309
  Val MRR = 0.8858, Scale = 106.8491, LR = 4.90e-04
------------------------------------------------------------


Epoch 71/150: 100%|██████████| 55/55 [00:03<00:00, 14.75it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 464.90it/s]


Epoch 71:
  Train Loss = 19.546572, Val Loss = 20.644920
  Val MRR = 0.8867, Scale = 109.7649, LR = 4.90e-04
  ✓ Saved best model (MRR=0.8867, Loss=20.644920)
------------------------------------------------------------


Epoch 72/150: 100%|██████████| 55/55 [00:03<00:00, 17.03it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 360.16it/s]


Epoch 72:
  Train Loss = 19.547131, Val Loss = 20.645272
  Val MRR = 0.8873, Scale = 106.4578, LR = 4.90e-04
  ✓ Saved best model (MRR=0.8873, Loss=20.645272)
------------------------------------------------------------


Epoch 73/150: 100%|██████████| 55/55 [00:03<00:00, 13.95it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 440.41it/s]


Epoch 73:
  Train Loss = 19.551789, Val Loss = 20.645177
  Val MRR = 0.8870, Scale = 108.8309, LR = 4.90e-04
------------------------------------------------------------


Epoch 74/150: 100%|██████████| 55/55 [00:03<00:00, 14.74it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 470.52it/s]


Epoch 74:
  Train Loss = 19.556051, Val Loss = 20.645127
  Val MRR = 0.8891, Scale = 107.4469, LR = 4.90e-04
  ✓ Saved best model (MRR=0.8891, Loss=20.645127)
------------------------------------------------------------


Epoch 75/150: 100%|██████████| 55/55 [00:03<00:00, 14.18it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 344.13it/s]


Epoch 75:
  Train Loss = 19.565690, Val Loss = 20.645157
  Val MRR = 0.8883, Scale = 108.4757, LR = 4.90e-04
------------------------------------------------------------


Epoch 76/150: 100%|██████████| 55/55 [00:03<00:00, 15.71it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 466.89it/s]


Epoch 76:
  Train Loss = 19.520324, Val Loss = 20.645307
  Val MRR = 0.8900, Scale = 108.6038, LR = 4.90e-04
  ✓ Saved best model (MRR=0.8900, Loss=20.645307)
------------------------------------------------------------


Epoch 77/150: 100%|██████████| 55/55 [00:03<00:00, 15.43it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 467.75it/s]


Epoch 77:
  Train Loss = 19.555651, Val Loss = 20.645102
  Val MRR = 0.8858, Scale = 107.9061, LR = 4.90e-04
------------------------------------------------------------


Epoch 78/150: 100%|██████████| 55/55 [00:03<00:00, 14.40it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 339.76it/s]


Epoch 78:
  Train Loss = 19.521644, Val Loss = 20.645166
  Val MRR = 0.8927, Scale = 109.9613, LR = 4.90e-04
  ✓ Saved best model (MRR=0.8927, Loss=20.645166)
------------------------------------------------------------


Epoch 79/150: 100%|██████████| 55/55 [00:04<00:00, 13.48it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 468.16it/s]


Epoch 79:
  Train Loss = 19.558245, Val Loss = 20.644997
  Val MRR = 0.8895, Scale = 107.6753, LR = 4.90e-04
------------------------------------------------------------


Epoch 80/150: 100%|██████████| 55/55 [00:03<00:00, 17.19it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 480.07it/s]


Epoch 80:
  Train Loss = 19.543525, Val Loss = 20.645100
  Val MRR = 0.8896, Scale = 107.5060, LR = 4.90e-04
------------------------------------------------------------


Epoch 81/150: 100%|██████████| 55/55 [00:03<00:00, 15.25it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 350.23it/s]


Epoch 81:
  Train Loss = 19.553434, Val Loss = 20.644972
  Val MRR = 0.8911, Scale = 108.6638, LR = 4.90e-04
------------------------------------------------------------


Epoch 82/150: 100%|██████████| 55/55 [00:04<00:00, 13.27it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 447.05it/s]


Epoch 82:
  Train Loss = 19.538554, Val Loss = 20.644882
  Val MRR = 0.8926, Scale = 111.0604, LR = 4.90e-04
------------------------------------------------------------


Epoch 83/150: 100%|██████████| 55/55 [00:03<00:00, 14.60it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 459.07it/s]


Epoch 83:
  Train Loss = 19.533998, Val Loss = 20.644942
  Val MRR = 0.8910, Scale = 110.7347, LR = 4.90e-04
------------------------------------------------------------


Epoch 84/150: 100%|██████████| 55/55 [00:03<00:00, 17.13it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 347.37it/s]


Epoch 84:
  Train Loss = 19.566799, Val Loss = 20.644880
  Val MRR = 0.8920, Scale = 109.9504, LR = 4.90e-04
------------------------------------------------------------


Epoch 85/150: 100%|██████████| 55/55 [00:03<00:00, 13.80it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 472.22it/s]


Epoch 85:
  Train Loss = 19.535541, Val Loss = 20.644871
  Val MRR = 0.8935, Scale = 111.5061, LR = 4.90e-04
  ✓ Saved best model (MRR=0.8935, Loss=20.644871)
------------------------------------------------------------


Epoch 86/150: 100%|██████████| 55/55 [00:03<00:00, 14.47it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 473.26it/s]


Epoch 86:
  Train Loss = 19.528584, Val Loss = 20.645043
  Val MRR = 0.8936, Scale = 110.4398, LR = 4.90e-04
  ✓ Saved best model (MRR=0.8936, Loss=20.645043)
------------------------------------------------------------


Epoch 87/150: 100%|██████████| 55/55 [00:03<00:00, 14.32it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 349.79it/s]


Epoch 87:
  Train Loss = 19.535919, Val Loss = 20.644804
  Val MRR = 0.8922, Scale = 109.4168, LR = 4.90e-04
------------------------------------------------------------


Epoch 88/150: 100%|██████████| 55/55 [00:03<00:00, 15.90it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 473.35it/s]


Epoch 88:
  Train Loss = 19.524137, Val Loss = 20.644894
  Val MRR = 0.8926, Scale = 109.8971, LR = 4.90e-04
------------------------------------------------------------


Epoch 89/150: 100%|██████████| 55/55 [00:03<00:00, 15.54it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 473.06it/s]


Epoch 89:
  Train Loss = 19.542685, Val Loss = 20.644818
  Val MRR = 0.8945, Scale = 114.3856, LR = 4.90e-04
  ✓ Saved best model (MRR=0.8945, Loss=20.644818)
------------------------------------------------------------


Epoch 90/150: 100%|██████████| 55/55 [00:04<00:00, 13.33it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 358.45it/s]


Epoch 90:
  Train Loss = 19.543194, Val Loss = 20.644949
  Val MRR = 0.8940, Scale = 113.7011, LR = 4.90e-04
------------------------------------------------------------


Epoch 91/150: 100%|██████████| 55/55 [00:04<00:00, 13.70it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 474.17it/s]


Epoch 91:
  Train Loss = 19.570955, Val Loss = 20.644918
  Val MRR = 0.8930, Scale = 109.0196, LR = 4.90e-04
------------------------------------------------------------


Epoch 92/150: 100%|██████████| 55/55 [00:03<00:00, 17.21it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 460.91it/s]


Epoch 92:
  Train Loss = 19.536827, Val Loss = 20.644736
  Val MRR = 0.8954, Scale = 111.0042, LR = 4.90e-04
  ✓ Saved best model (MRR=0.8954, Loss=20.644736)
------------------------------------------------------------


Epoch 93/150: 100%|██████████| 55/55 [00:03<00:00, 14.95it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 341.17it/s]


Epoch 93:
  Train Loss = 19.552696, Val Loss = 20.644970
  Val MRR = 0.8937, Scale = 113.4501, LR = 4.90e-04
------------------------------------------------------------


Epoch 94/150: 100%|██████████| 55/55 [00:04<00:00, 13.62it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 472.15it/s]


Epoch 94:
  Train Loss = 19.553949, Val Loss = 20.644769
  Val MRR = 0.8944, Scale = 112.3992, LR = 4.90e-04
------------------------------------------------------------


Epoch 95/150: 100%|██████████| 55/55 [00:03<00:00, 14.41it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 480.43it/s]


Epoch 95:
  Train Loss = 19.563722, Val Loss = 20.645055
  Val MRR = 0.8951, Scale = 110.7942, LR = 4.90e-04
------------------------------------------------------------


Epoch 96/150: 100%|██████████| 55/55 [00:03<00:00, 16.63it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 341.77it/s]


Epoch 96:
  Train Loss = 19.544286, Val Loss = 20.644932
  Val MRR = 0.8953, Scale = 112.0224, LR = 4.90e-04
------------------------------------------------------------


Epoch 97/150: 100%|██████████| 55/55 [00:03<00:00, 14.31it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 459.79it/s]


Epoch 97:
  Train Loss = 19.504887, Val Loss = 20.645082
  Val MRR = 0.8938, Scale = 113.2909, LR = 4.90e-04
------------------------------------------------------------


Epoch 98/150: 100%|██████████| 55/55 [00:03<00:00, 14.71it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 470.78it/s]


Epoch 98:
  Train Loss = 19.512381, Val Loss = 20.644853
  Val MRR = 0.8941, Scale = 114.6648, LR = 4.90e-04
------------------------------------------------------------


Epoch 99/150: 100%|██████████| 55/55 [00:03<00:00, 14.02it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 355.41it/s]


Epoch 99:
  Train Loss = 19.544889, Val Loss = 20.644883
  Val MRR = 0.8966, Scale = 113.4316, LR = 4.90e-04
  ✓ Saved best model (MRR=0.8966, Loss=20.644883)
------------------------------------------------------------


Epoch 100/150: 100%|██████████| 55/55 [00:03<00:00, 16.04it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 466.89it/s]


Epoch 100:
  Train Loss = 19.523556, Val Loss = 20.645168
  Val MRR = 0.8951, Scale = 116.4532, LR = 4.90e-04
------------------------------------------------------------


Epoch 101/150: 100%|██████████| 55/55 [00:03<00:00, 15.52it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 471.21it/s]


Epoch 101:
  Train Loss = 19.542345, Val Loss = 20.645036
  Val MRR = 0.8964, Scale = 111.6682, LR = 4.90e-04
------------------------------------------------------------


Epoch 102/150: 100%|██████████| 55/55 [00:03<00:00, 14.12it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 340.20it/s]


Epoch 102:
  Train Loss = 19.549012, Val Loss = 20.645174
  Val MRR = 0.8969, Scale = 113.9973, LR = 4.90e-04
  ✓ Saved best model (MRR=0.8969, Loss=20.645174)
------------------------------------------------------------


Epoch 103/150: 100%|██████████| 55/55 [00:04<00:00, 13.65it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 468.32it/s]


Epoch 103:
  Train Loss = 19.537187, Val Loss = 20.645111
  Val MRR = 0.8968, Scale = 113.6860, LR = 4.90e-04
------------------------------------------------------------


Epoch 104/150: 100%|██████████| 55/55 [00:03<00:00, 16.98it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 473.33it/s]


Epoch 104:
  Train Loss = 19.513072, Val Loss = 20.644891
  Val MRR = 0.8952, Scale = 114.4122, LR = 4.90e-04
------------------------------------------------------------


Epoch 105/150: 100%|██████████| 55/55 [00:03<00:00, 14.83it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 316.65it/s]


Epoch 105:
  Train Loss = 19.552104, Val Loss = 20.644907
  Val MRR = 0.8983, Scale = 115.3149, LR = 4.90e-04
  ✓ Saved best model (MRR=0.8983, Loss=20.644907)
------------------------------------------------------------


Epoch 106/150: 100%|██████████| 55/55 [00:04<00:00, 13.66it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 464.68it/s]


Epoch 106:
  Train Loss = 19.554507, Val Loss = 20.644861
  Val MRR = 0.8987, Scale = 116.5434, LR = 4.90e-04
  ✓ Saved best model (MRR=0.8987, Loss=20.644861)
------------------------------------------------------------


Epoch 107/150: 100%|██████████| 55/55 [00:03<00:00, 14.66it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 457.51it/s]


Epoch 107:
  Train Loss = 19.534569, Val Loss = 20.645001
  Val MRR = 0.8984, Scale = 116.1121, LR = 4.90e-04
------------------------------------------------------------


Epoch 108/150: 100%|██████████| 55/55 [00:03<00:00, 16.52it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 340.20it/s]


Epoch 108:
  Train Loss = 19.543729, Val Loss = 20.644886
  Val MRR = 0.8985, Scale = 113.4316, LR = 4.90e-04
------------------------------------------------------------


Epoch 109/150: 100%|██████████| 55/55 [00:03<00:00, 14.12it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 437.07it/s]


Epoch 109:
  Train Loss = 19.549286, Val Loss = 20.644877
  Val MRR = 0.8966, Scale = 114.6695, LR = 4.90e-04
------------------------------------------------------------


Epoch 110/150: 100%|██████████| 55/55 [00:03<00:00, 14.57it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 466.25it/s]


Epoch 110:
  Train Loss = 19.567496, Val Loss = 20.644939
  Val MRR = 0.8985, Scale = 118.5566, LR = 4.90e-04
------------------------------------------------------------


Epoch 111/150: 100%|██████████| 55/55 [00:03<00:00, 14.11it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 354.01it/s]


Epoch 111:
  Train Loss = 19.549524, Val Loss = 20.645009
  Val MRR = 0.8972, Scale = 114.1795, LR = 4.90e-04
------------------------------------------------------------


Epoch 112/150: 100%|██████████| 55/55 [00:03<00:00, 15.88it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 465.20it/s]


Epoch 112:
  Train Loss = 19.579023, Val Loss = 20.645024
  Val MRR = 0.8974, Scale = 114.0964, LR = 4.90e-04
------------------------------------------------------------


Epoch 113/150: 100%|██████████| 55/55 [00:03<00:00, 15.29it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 457.95it/s]


Epoch 113:
  Train Loss = 19.541583, Val Loss = 20.645048
  Val MRR = 0.8982, Scale = 115.9397, LR = 4.90e-04
------------------------------------------------------------


Epoch 114/150: 100%|██████████| 55/55 [00:03<00:00, 14.18it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 333.63it/s]


Epoch 114:
  Train Loss = 19.519551, Val Loss = 20.644953
  Val MRR = 0.8979, Scale = 118.0775, LR = 4.90e-04
------------------------------------------------------------


Epoch 115/150: 100%|██████████| 55/55 [00:04<00:00, 13.68it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 473.15it/s]


Epoch 115:
  Train Loss = 19.518220, Val Loss = 20.645141
  Val MRR = 0.8974, Scale = 115.4883, LR = 4.90e-04
------------------------------------------------------------


Epoch 116/150: 100%|██████████| 55/55 [00:03<00:00, 16.93it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 466.17it/s]


Epoch 116:
  Train Loss = 19.525586, Val Loss = 20.645018
  Val MRR = 0.8962, Scale = 112.9166, LR = 4.90e-04
------------------------------------------------------------


Epoch 117/150: 100%|██████████| 55/55 [00:03<00:00, 15.12it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 348.10it/s]


Epoch 117:
  Train Loss = 19.569236, Val Loss = 20.644937
  Val MRR = 0.8977, Scale = 116.5042, LR = 4.90e-04
------------------------------------------------------------


Epoch 118/150: 100%|██████████| 55/55 [00:04<00:00, 13.48it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 438.47it/s]


Epoch 118:
  Train Loss = 19.543572, Val Loss = 20.644739
  Val MRR = 0.8984, Scale = 118.7212, LR = 4.90e-04
------------------------------------------------------------


Epoch 119/150: 100%|██████████| 55/55 [00:03<00:00, 14.72it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 473.95it/s]


Epoch 119:
  Train Loss = 19.511464, Val Loss = 20.644743
  Val MRR = 0.8986, Scale = 117.5035, LR = 4.90e-04
------------------------------------------------------------


Epoch 120/150: 100%|██████████| 55/55 [00:03<00:00, 16.96it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 343.74it/s]


Epoch 120:
  Train Loss = 19.517407, Val Loss = 20.644826
  Val MRR = 0.9004, Scale = 118.6810, LR = 4.90e-04
  ✓ Saved best model (MRR=0.9004, Loss=20.644826)
------------------------------------------------------------


Epoch 121/150: 100%|██████████| 55/55 [00:03<00:00, 14.00it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 471.75it/s]


Epoch 121:
  Train Loss = 19.559512, Val Loss = 20.644775
  Val MRR = 0.8975, Scale = 119.0862, LR = 4.90e-04
------------------------------------------------------------


Epoch 122/150: 100%|██████████| 55/55 [00:03<00:00, 14.54it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 467.92it/s]


Epoch 122:
  Train Loss = 19.532726, Val Loss = 20.644785
  Val MRR = 0.8995, Scale = 118.4462, LR = 4.90e-04
------------------------------------------------------------


Epoch 123/150: 100%|██████████| 55/55 [00:03<00:00, 13.88it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 349.44it/s]


Epoch 123:
  Train Loss = 19.542856, Val Loss = 20.644616
  Val MRR = 0.9002, Scale = 114.5458, LR = 4.90e-04
------------------------------------------------------------


Epoch 124/150: 100%|██████████| 55/55 [00:03<00:00, 15.82it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 454.74it/s]


Epoch 124:
  Train Loss = 19.545573, Val Loss = 20.645094
  Val MRR = 0.9001, Scale = 115.6573, LR = 4.90e-04
------------------------------------------------------------


Epoch 125/150: 100%|██████████| 55/55 [00:03<00:00, 15.48it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 444.95it/s]


Epoch 125:
  Train Loss = 19.547655, Val Loss = 20.644811
  Val MRR = 0.8991, Scale = 115.7966, LR = 4.90e-04
------------------------------------------------------------


Epoch 126/150: 100%|██████████| 55/55 [00:03<00:00, 14.27it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 345.01it/s]


Epoch 126:
  Train Loss = 19.557771, Val Loss = 20.644849
  Val MRR = 0.8982, Scale = 116.3949, LR = 4.90e-04
------------------------------------------------------------


Epoch 127/150: 100%|██████████| 55/55 [00:04<00:00, 13.42it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 445.06it/s]


Epoch 127:
  Train Loss = 19.554395, Val Loss = 20.645024
  Val MRR = 0.8994, Scale = 118.9543, LR = 4.90e-04
------------------------------------------------------------


Epoch 128/150: 100%|██████████| 55/55 [00:03<00:00, 17.18it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 468.11it/s]


Epoch 128:
  Train Loss = 19.539351, Val Loss = 20.645260
  Val MRR = 0.9010, Scale = 118.6971, LR = 4.90e-04
  ✓ Saved best model (MRR=0.9010, Loss=20.645260)
------------------------------------------------------------


Epoch 129/150: 100%|██████████| 55/55 [00:03<00:00, 15.19it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 336.98it/s]


Epoch 129:
  Train Loss = 19.553815, Val Loss = 20.645025
  Val MRR = 0.9011, Scale = 119.6917, LR = 4.90e-04
  ✓ Saved best model (MRR=0.9011, Loss=20.645025)
------------------------------------------------------------


Epoch 130/150: 100%|██████████| 55/55 [00:04<00:00, 13.29it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 464.29it/s]


Epoch 130:
  Train Loss = 19.571485, Val Loss = 20.645081
  Val MRR = 0.9025, Scale = 119.3231, LR = 4.90e-04
  ✓ Saved best model (MRR=0.9025, Loss=20.645081)
------------------------------------------------------------


Epoch 131/150: 100%|██████████| 55/55 [00:03<00:00, 14.49it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 473.22it/s]


Epoch 131:
  Train Loss = 19.535424, Val Loss = 20.644976
  Val MRR = 0.9004, Scale = 117.9857, LR = 4.90e-04
------------------------------------------------------------


Epoch 132/150: 100%|██████████| 55/55 [00:03<00:00, 16.93it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 352.14it/s]


Epoch 132:
  Train Loss = 19.522936, Val Loss = 20.644876
  Val MRR = 0.9004, Scale = 117.2066, LR = 4.90e-04
------------------------------------------------------------


Epoch 133/150: 100%|██████████| 55/55 [00:03<00:00, 13.89it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 432.12it/s]


Epoch 133:
  Train Loss = 19.559052, Val Loss = 20.644987
  Val MRR = 0.9035, Scale = 119.5227, LR = 4.90e-04
  ✓ Saved best model (MRR=0.9035, Loss=20.644987)
------------------------------------------------------------


Epoch 134/150: 100%|██████████| 55/55 [00:03<00:00, 14.55it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 475.69it/s]


Epoch 134:
  Train Loss = 19.550258, Val Loss = 20.645020
  Val MRR = 0.9010, Scale = 117.4869, LR = 4.90e-04
------------------------------------------------------------


Epoch 135/150: 100%|██████████| 55/55 [00:03<00:00, 14.04it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 353.80it/s]


Epoch 135:
  Train Loss = 19.562288, Val Loss = 20.645162
  Val MRR = 0.8995, Scale = 120.1422, LR = 4.90e-04
------------------------------------------------------------


Epoch 136/150: 100%|██████████| 55/55 [00:03<00:00, 15.86it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 465.92it/s]


Epoch 136:
  Train Loss = 19.506833, Val Loss = 20.644929
  Val MRR = 0.9019, Scale = 119.0717, LR = 4.90e-04
------------------------------------------------------------


Epoch 137/150: 100%|██████████| 55/55 [00:03<00:00, 15.43it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 450.35it/s]


Epoch 137:
  Train Loss = 19.520599, Val Loss = 20.644770
  Val MRR = 0.9021, Scale = 118.8829, LR = 4.90e-04
------------------------------------------------------------


Epoch 138/150: 100%|██████████| 55/55 [00:03<00:00, 14.11it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 326.78it/s]


Epoch 138:
  Train Loss = 19.527568, Val Loss = 20.644661
  Val MRR = 0.9035, Scale = 117.4622, LR = 4.90e-04
------------------------------------------------------------


Epoch 139/150: 100%|██████████| 55/55 [00:04<00:00, 13.62it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 469.08it/s]


Epoch 139:
  Train Loss = 19.553453, Val Loss = 20.644759
  Val MRR = 0.9022, Scale = 117.1255, LR = 4.90e-04
------------------------------------------------------------


Epoch 140/150: 100%|██████████| 55/55 [00:03<00:00, 17.10it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 464.68it/s]


Epoch 140:
  Train Loss = 19.492392, Val Loss = 20.644993
  Val MRR = 0.9016, Scale = 118.3702, LR = 4.90e-04
------------------------------------------------------------


Epoch 141/150: 100%|██████████| 55/55 [00:03<00:00, 15.14it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 352.50it/s]


Epoch 141:
  Train Loss = 19.533641, Val Loss = 20.644851
  Val MRR = 0.9029, Scale = 121.2082, LR = 4.90e-04
------------------------------------------------------------


Epoch 142/150: 100%|██████████| 55/55 [00:04<00:00, 13.41it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 470.76it/s]


Epoch 142:
  Train Loss = 19.571229, Val Loss = 20.644920
  Val MRR = 0.9011, Scale = 115.8295, LR = 4.90e-04
------------------------------------------------------------


Epoch 143/150: 100%|██████████| 55/55 [00:03<00:00, 14.51it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 451.26it/s]


Epoch 143:
  Train Loss = 19.559041, Val Loss = 20.645138
  Val MRR = 0.9007, Scale = 120.6732, LR = 4.90e-04
------------------------------------------------------------


Epoch 144/150: 100%|██████████| 55/55 [00:03<00:00, 16.84it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 347.00it/s]


Epoch 144:
  Train Loss = 19.538256, Val Loss = 20.644935
  Val MRR = 0.9009, Scale = 120.9685, LR = 4.90e-04
------------------------------------------------------------


Epoch 145/150: 100%|██████████| 55/55 [00:03<00:00, 13.80it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 472.46it/s]


Epoch 145:
  Train Loss = 19.532495, Val Loss = 20.645058
  Val MRR = 0.9026, Scale = 118.8065, LR = 4.90e-04
------------------------------------------------------------


Epoch 146/150: 100%|██████████| 55/55 [00:03<00:00, 14.64it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 463.19it/s]


Epoch 146:
  Train Loss = 19.534347, Val Loss = 20.645130
  Val MRR = 0.9023, Scale = 119.7427, LR = 4.90e-04
------------------------------------------------------------


Epoch 147/150: 100%|██████████| 55/55 [00:03<00:00, 14.11it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 331.37it/s]


Epoch 147:
  Train Loss = 19.518579, Val Loss = 20.645112
  Val MRR = 0.9029, Scale = 117.6779, LR = 4.90e-04
------------------------------------------------------------


Epoch 148/150: 100%|██████████| 55/55 [00:03<00:00, 15.88it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 467.66it/s]


Epoch 148:
  Train Loss = 19.524236, Val Loss = 20.645102
  Val MRR = 0.9031, Scale = 118.3260, LR = 4.90e-04
------------------------------------------------------------


Epoch 149/150: 100%|██████████| 55/55 [00:03<00:00, 15.19it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 419.90it/s]


Epoch 149:
  Train Loss = 19.502384, Val Loss = 20.645017
  Val MRR = 0.9046, Scale = 119.9859, LR = 4.90e-04
  ✓ Saved best model (MRR=0.9046, Loss=20.645017)
------------------------------------------------------------


Epoch 150/150: 100%|██████████| 55/55 [00:03<00:00, 14.19it/s]
Calculating MRR : 100%|██████████| 125/125 [00:00<00:00, 334.51it/s]

Epoch 150:
  Train Loss = 19.539334, Val Loss = 20.645118
  Val MRR = 0.9036, Scale = 121.0790, LR = 4.90e-04
------------------------------------------------------------





In [None]:
# Load best model for evaluation
model = MLP().to(DEVICE)
model.load_state_dict(torch.load(MODEL_PATH))

<All keys matched successfully>

# Evaluation

In [None]:
def mrr(pred_indices: np.ndarray, gt_indices: np.ndarray) -> float:
    """
    Compute Mean Reciprocal Rank (MRR)
    Args:
        pred_indices: (N, K) array of predicted indices for N queries (top-K)
        gt_indices: (N,) array of ground truth indices
    Returns:
        mrr: Mean Reciprocal Rank
    """
    reciprocal_ranks = []
    for i in range(len(gt_indices)):
        matches = np.where(pred_indices[i] == gt_indices[i])[0]
        if matches.size > 0:
            reciprocal_ranks.append(1.0 / (matches[0] + 1))
        else:
            reciprocal_ranks.append(0.0)
    return np.mean(reciprocal_ranks)


def recall_at_k(pred_indices: np.ndarray, gt_indices: np.ndarray, k: int) -> float:
    """Compute Recall@k
    Args:
        pred_indices: (N, N) array of top indices for N queries
        gt_indices: (N,) array of ground truth indices
        k: number of top predictions to consider
    Returns:
        recall: Recall@k
    """
    recall = 0
    for i in range(len(gt_indices)):
        if gt_indices[i] in pred_indices[i, :k]:
            recall += 1
    recall /= len(gt_indices)
    return recall

import numpy as np

def ndcg(pred_indices: np.ndarray, gt_indices: np.ndarray, k: int = 100) -> float:
    """
    Compute Normalized Discounted Cumulative Gain (NDCG@k)
    Args:
        pred_indices: (N, K) array of predicted indices for N queries
        gt_indices: (N,) array of ground truth indices
        k: number of top predictions to consider
    Returns:
        ndcg: NDCG@k
    """
    ndcg_total = 0.0
    for i in range(len(gt_indices)):
        matches = np.where(pred_indices[i, :k] == gt_indices[i])[0]
        if matches.size > 0:
            rank = matches[0] + 1
            ndcg_total += 1.0 / np.log2(rank + 1)  # DCG (IDCG = 1)
    return ndcg_total / len(gt_indices)



@torch.inference_mode()
def evaluate_retrieval(translated_embd, image_embd, gt_indices, max_indices = 99, batch_size=100):
    """Evaluate retrieval performance using cosine similarity
    Args:
        translated_embd: (N_captions, D) translated caption embeddings
        image_embd: (N_images, D) image embeddings
        gt_indices: (N_captions,) ground truth image indices for each caption
        max_indices: number of top predictions to consider
    Returns:
        results: dict of evaluation metrics

    """
    # Compute similarity matrix
    if isinstance(translated_embd, np.ndarray):
        translated_embd = torch.from_numpy(translated_embd).float()
    if isinstance(image_embd, np.ndarray):
        image_embd = torch.from_numpy(image_embd).float()

    n_queries = translated_embd.shape[0]
    device = translated_embd.device

    # Prepare containers for the fragments to be reassembled
    all_sorted_indices = []
    l2_distances = []

    # Process in batches - the narrow gate approach
    for start_idx in range(0, n_queries, batch_size):
        batch_slice = slice(start_idx, min(start_idx + batch_size, n_queries))
        batch_translated = translated_embd[batch_slice]
        batch_img_embd = image_embd[batch_slice]

        # Compute similarity only for this batch
        batch_similarity = batch_translated @ batch_img_embd.T

        # Get top-k predictions for this batch
        batch_indices = batch_similarity.topk(k=max_indices, dim=1, sorted=True).indices.numpy()
        all_sorted_indices.append(gt_indices[batch_slice][batch_indices])

        # Compute L2 distance for this batch
        batch_gt = gt_indices[batch_slice]
        batch_gt_embeddings = image_embd[batch_gt]
        batch_l2 = (batch_translated - batch_gt_embeddings).norm(dim=1)
        l2_distances.append(batch_l2)

    # Reassemble the fragments
    sorted_indices = np.concatenate(all_sorted_indices, axis=0)

    # Apply the sacred metrics to the whole
    metrics = {
        'mrr': mrr,
        'ndcg': ndcg,
        'recall_at_1': lambda preds, gt: recall_at_k(preds, gt, 1),
        'recall_at_3': lambda preds, gt: recall_at_k(preds, gt, 3),
        'recall_at_5': lambda preds, gt: recall_at_k(preds, gt, 5),
        'recall_at_10': lambda preds, gt: recall_at_k(preds, gt, 10),
        'recall_at_50': lambda preds, gt: recall_at_k(preds, gt, 50),
    }

    results = {
        name: func(sorted_indices, gt_indices)
        for name, func in metrics.items()
    }

    l2_dist = torch.cat(l2_distances, dim=0).mean().item()
    results['l2_dist'] = l2_dist

    return results

In [None]:
model.eval()
model.to(DEVICE)

with torch.no_grad():
    pred_embds_val = model(X_val_scaled.to(DEVICE)).cpu()
    pred_embds_train = model(X_train_scaled.to(DEVICE)).cpu()

print("Final evaluation on training set :")
print(evaluate_retrieval(pred_embds_train, y_train, labels_train))

print("Final evaluation on validation set :")
print(evaluate_retrieval(pred_embds_val, y_val, labels_val))

Final evaluation on training set :
{'mrr': np.float64(0.9494112997384394), 'ndcg': np.float64(0.9621445088073489), 'recall_at_1': 0.9117777777777778, 'recall_at_3': 0.9865688888888889, 'recall_at_5': 0.99408, 'recall_at_10': 0.9979822222222222, 'recall_at_50': 0.9998755555555555, 'l2_dist': 76.93505859375}
Final evaluation on validation set :
{'mrr': np.float64(0.9046314923389364), 'ndcg': np.float64(0.9281371693839972), 'recall_at_1': 0.84368, 'recall_at_3': 0.962, 'recall_at_5': 0.9804, 'recall_at_10': 0.9928, 'recall_at_50': 0.99984, 'l2_dist': 76.58287048339844}


# Submission

In [None]:
# read scaler for features
with open('/content/drive/MyDrive/AML kaggle competition/scaler_x_15_11_25.pkl', 'rb') as f:
    sc_x = pickle.load(f)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [None]:
test_data = load_data("/content/drive/MyDrive/AML kaggle competition/test.clean.npz")

test_embds = test_data['captions/embeddings']
test_embds = sc_x.transform(test_embds) # Scale the test caption embeddings
test_embds = torch.from_numpy(test_embds).float()

model.eval()
model.to(DEVICE)

with torch.no_grad():
    pred_embds = model(test_embds.to(DEVICE)).cpu()

In [None]:
def generate_submission(sample_ids, translated_embeddings, output_file="submission.csv"):
    """
    Generate a submission.csv file from translated embeddings.
    """
    print("Generating submission file...")

    if isinstance(translated_embeddings, torch.Tensor):
        translated_embeddings = translated_embeddings.cpu().numpy()

    # Create a DataFrame with sample_id and embeddings

    df_submission = pd.DataFrame({'id': sample_ids, 'embedding': translated_embeddings.tolist()})

    df_submission.to_csv(output_file, index=False, float_format='%.17g')
    print(f"✓ Saved submission to {output_file}")

    return df_submission

In [None]:
submission = generate_submission(test_data['captions/ids'], pred_embds,
                                 'submission_151125_v13.csv')

print(f"Model saved to: {MODEL_PATH}")

Generating submission file...
✓ Saved submission to submission_151125_v13.csv
Model saved to: models/mlp.pth
