In [None]:
import os, subprocess

def run(cmd):
    """Helper to execute shell commands with logging."""
    print(f"▶ {cmd}")
    subprocess.run(cmd, shell=True, check=True)

# --- Kaggle setup ---
if not os.path.exists(os.path.expanduser("~/.kaggle/kaggle.json")):
    run("mkdir -p ~/.kaggle && cp kaggle.json ~/.kaggle/ && chmod 600 ~/.kaggle/kaggle.json")

# --- Kaggle data download (only if missing) ---
if not os.path.exists("data/train"):
    run("kaggle competitions download -c aml-competition -p data")
    run("unzip -qo data/aml-competition.zip -d data")

# --- Clone repositories ---
if not os.path.exists("challenge"):
    run("git clone https://github.com/Mamiglia/challenge.git")
if not os.path.exists("CrossFlow"):
    run("git clone https://github.com/qihao067/CrossFlow.git")

# --- Install dependencies ---
!pip install -q torch torchvision torchaudio
!pip install -q openai-clip scikit-learn opencv-python torchdiffeq \
    beautifulsoup4 open_clip_torch scikit-image cython matplotlib accelerate \
    absl-py ml_collections einops wandb ftfy transformers timm tensorboard pycocotools

▶ mkdir -p ~/.kaggle && cp kaggle.json ~/.kaggle/ && chmod 600 ~/.kaggle/kaggle.json
▶ kaggle competitions download -c aml-competition -p data
▶ unzip -qo data/aml-competition.zip -d data
▶ git clone https://github.com/Mamiglia/challenge.git
▶ git clone https://github.com/qihao067/CrossFlow.git
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m20.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m61.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.7/76.7 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.8/44.8 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for openai-clip (setup.py) ... [?25l[?25hdone


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import torch, torch.nn as nn, torch.nn.functional as F, numpy as np
from torch.utils.data import DataLoader, TensorDataset
from pathlib import Path
from tqdm import tqdm
import sys, random, logging

# Extend path to local repositories
sys.path.extend(["CrossFlow", "challenge/src"])

# Project imports
from challenge.src.eval import visualize_retrieval
from eval.metrics import mrr, recall_at_k, ndcg
from common.utils import load_data, prepare_train_data, generate_submission

# Clear existing handlers
for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format="%(message)s",
    handlers=[logging.StreamHandler(sys.stdout)]
)

In [None]:
# Configuration dictionary
CFG = {
    "MODEL_PATH": "models/contrastive_projection.pth",
    "EPOCHS": 60,
    "BATCH_SIZE": 512,
    "LR": 5e-4,
    "TEMPERATURE": 0.05,
    "HIDDEN_DIM": 3072,
    "DROPOUT": 0.15,
    "VALIDATION_MODE": "repo_metrics",
    "MODEL": "mlp_projection",  # or "linear_projection"
    "LOSS": "contrastive",
    "SEED": 42,
    "DEVICE": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
}

def set_seed(seed=42):
    """Ensure deterministic reproducibility."""
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

set_seed(CFG["SEED"])

torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

In [None]:
# ============================================================================
# MODELS
# ============================================================================

class LinearProjection(nn.Module):
    """Simple linear projection with normalization."""
    def __init__(self, input_dim, output_dim, **kwargs):
        super().__init__()
        self.projection = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        x = F.normalize(x, dim=-1)
        out = self.projection(x)
        return F.normalize(out, dim=-1)


class MLPProjection(nn.Module):
    """Multi-layer projection with residual connections."""
    def __init__(self, input_dim, output_dim, hidden_dim=2048, dropout=0.1):
        super().__init__()
        # Bloc 1
        self.block1 = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.LayerNorm(hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout),
        )
        self.shortcut1 = nn.Linear(input_dim, hidden_dim) if input_dim != hidden_dim else nn.Identity()

        # Bloc 2
        self.block2 = nn.Sequential(
            nn.LayerNorm(hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, output_dim)
        )

        # Optional: shortcut connection if dims match
        self.shortcut2 = nn.Linear(hidden_dim, output_dim) if hidden_dim != output_dim else nn.Identity()

        self.residual_scale = 0.5

    def forward(self, x):
        x = F.normalize(x, dim=-1)
        h = self.block1(x) + self.residual_scale * self.shortcut1(x)
        out = self.block2(h) + self.residual_scale * self.shortcut2(h)

        #out = self.net(x) + 0.1 * self.shortcut(x)  # residual with scaling
        return F.normalize(out, dim=-1)


def init_weights(m):
    """Xavier initialization for all Linear layers."""
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)
        if m.bias is not None:
            nn.init.constant_(m.bias, 0)


MODEL_REGISTRY = {
    "linear_projection": LinearProjection,
    "mlp_projection": MLPProjection
}

In [None]:

# ============================================================================
# LOSSES
# ============================================================================

class ContrastiveLoss:
    """CLIP-style contrastive loss - FIXED VERSION."""
    def __init__(self, temperature=0.07):
        self.temperature = temperature
        # Make logit_scale learnable and properly initialized
        self.logit_scale = nn.Parameter(torch.tensor(np.log(1/temperature)))

    def __call__(self, text_features, image_features, labels=None):
        """
        Args:
            text_features: [batch_size, dim]
            image_features: [batch_size, dim]
            labels: Not used (assumes paired data)
        """
        device = text_features.device

        # Normalize (defensive, models already normalize)
        text_features = F.normalize(text_features, dim=-1)
        image_features = F.normalize(image_features, dim=-1)

        # Compute similarity with learned temperature
        logit_scale = self.logit_scale.exp().clamp(max=100)
        logits = logit_scale * (text_features @ image_features.T)

        # Create labels (diagonal = positive pairs)
        batch_size = text_features.shape[0]
        targets = torch.arange(batch_size, device=device)

        # Symmetric loss
        loss_t2i = F.cross_entropy(logits, targets)
        loss_i2t = F.cross_entropy(logits.T, targets)

        return (loss_t2i + loss_i2t) / 2


class InfoNCELoss:
    """
    InfoNCE loss with support for multiple positives per anchor.
    Better for your case where 5 captions → 1 image.
    """
    def __init__(self, temperature=0.07):
        self.temperature = temperature

    def __call__(self, text_features, image_features, labels):
        """
        Args:
            text_features: [batch_size, dim]
            image_features: [batch_size, dim]
            labels: [batch_size, batch_size] - binary matrix of matches
        """
        # Normalize
        text_features = F.normalize(text_features, dim=-1)
        image_features = F.normalize(image_features, dim=-1)

        # Similarity matrix
        logits = torch.matmul(text_features, image_features.T) / self.temperature

        # Create target distribution (can have multiple positives)
        # labels[i, j] = 1 if text i matches image j
        targets = labels.float()
        targets = targets / targets.sum(dim=1, keepdim=True).clamp(min=1e-8)

        # Cross entropy with soft targets
        log_probs = F.log_softmax(logits, dim=1)
        loss = -(targets * log_probs).sum(dim=1).mean()

        return loss


LOSS_REGISTRY = {
    "contrastive": lambda temp: ContrastiveLoss(temperature=temp),
    "infonce": lambda temp: InfoNCELoss(temperature=temp)
}

In [None]:
# ============================================================================
# VALIDATION
# ============================================================================

@torch.inference_mode()
def validate_repo_metrics(model, val_loader, val_gallery, val_labels):
    """Evaluate using official retrieval metrics."""
    model.eval()
    preds = []
    for X, _ in tqdm(val_loader, desc="[Val]", leave=False):
        pred = model(X.to(CFG["DEVICE"]))
        preds.append(pred)
    preds = torch.cat(preds)
    return evaluate_retrieval(preds, val_gallery.to(CFG["DEVICE"]), val_labels, max_indices=100)


@torch.inference_mode()
def evaluate_retrieval(translated_embd, image_embd, gt_indices, max_indices=99, batch_size=100):
    """GPU-safe retrieval evaluation."""
    if isinstance(translated_embd, np.ndarray):
        translated_embd = torch.from_numpy(translated_embd).float()
    if isinstance(image_embd, np.ndarray):
        image_embd = torch.from_numpy(image_embd).float()

    n_queries = translated_embd.shape[0]
    all_sorted_indices = []

    for start_idx in range(0, n_queries, batch_size):
        batch_slice = slice(start_idx, min(start_idx + batch_size, n_queries))
        batch_translated = translated_embd[batch_slice]
        batch_similarity = batch_translated @ image_embd.T
        batch_indices = batch_similarity.topk(k=max_indices, dim=1, sorted=True).indices.cpu().numpy()
        all_sorted_indices.append(batch_indices)

    sorted_indices = np.concatenate(all_sorted_indices, axis=0)

    metrics = {
        'mrr': mrr,
        'ndcg': ndcg,
        'recall_at_1': lambda preds, gt: recall_at_k(preds, gt, 1),
        'recall_at_3': lambda preds, gt: recall_at_k(preds, gt, 3),
        'recall_at_5': lambda preds, gt: recall_at_k(preds, gt, 5),
        'recall_at_10': lambda preds, gt: recall_at_k(preds, gt, 10),
        'recall_at_50': lambda preds, gt: recall_at_k(preds, gt, 50),
    }

    results = {name: func(sorted_indices, gt_indices) for name, func in metrics.items()}
    return results


VALIDATION_REGISTRY = {"repo_metrics": validate_repo_metrics}

In [None]:
# ============================================================================
# TRAINING
# ============================================================================

def train_model(model, train_loader, val_loader, val_gallery, val_labels, loss_fn, cfg):
    """Train the projection model with contrastive learning."""
    device = cfg["DEVICE"]
    optimizer = torch.optim.AdamW(model.parameters(), lr=cfg["LR"], weight_decay=1e-5)

    # Cosine annealing with warmup
    steps_per_epoch = len(train_loader)
    total_steps = steps_per_epoch * cfg["EPOCHS"]
    warmup_steps = steps_per_epoch * 2

    def lr_lambda(step):
        if step < warmup_steps:
            return step / warmup_steps
        progress = (step - warmup_steps) / (total_steps - warmup_steps)
        return 0.5 * (1 + np.cos(np.pi * progress))

    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda)
    validate = VALIDATION_REGISTRY[cfg["VALIDATION_MODE"]]
    best_metric = 0.0
    global_step = 0

    for epoch in range(cfg["EPOCHS"]):
        model.train()
        total_loss = 0

        pbar = tqdm(train_loader, desc=f"[Train] Epoch {epoch+1}/{cfg['EPOCHS']}")
        for X, y in pbar:
            X, y = X.to(device), y.to(device)

            optimizer.zero_grad()
            pred = model(X)

            # For simple contrastive, we assume batch pairs (X[i] matches y[i])
            loss = loss_fn(pred, y, labels=None)  # ← UNE SEULE LIGNE


            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            scheduler.step()

            total_loss += loss.item()
            global_step += 1

            # Update progress bar
            pbar.set_postfix({"loss": f"{loss.item():.4f}", "lr": f"{scheduler.get_last_lr()[0]:.6f}"})

        avg_loss = total_loss / len(train_loader)
        tqdm.write(f"\n> Epoch {epoch+1}: Avg Loss = {avg_loss:.6f}")

        # Validation
        val_results = validate(model, val_loader, val_gallery, val_labels)
        metric = val_results["mrr"]

        tqdm.write(f"> MRR = {metric:.4f}, R@1 = {val_results['recall_at_1']:.4f}, "
                   f"R@5 = {val_results['recall_at_5']:.4f}, R@10 = {val_results['recall_at_10']:.4f}")

        if metric > best_metric:
            best_metric = metric
            Path(cfg["MODEL_PATH"]).parent.mkdir(parents=True, exist_ok=True)
            torch.save(model.state_dict(), cfg["MODEL_PATH"])
            tqdm.write(f"✓ New best model saved (MRR={best_metric:.4f})\n")

    return best_metric


In [None]:
# ============================================================================
# MAIN EXECUTION
# ============================================================================

if __name__ == "__main__":
    # Load and prepare data
    train_data = load_data("data/train/train/train.npz")
    X, y, label = prepare_train_data(train_data)

    # Train/val split (90/10)
    perm = torch.randperm(len(X))
    split_idx = int(0.9 * len(X))
    mask = torch.zeros(len(X), dtype=torch.bool)
    mask[perm[:split_idx]] = True

    X_train, X_val = X[mask], X[~mask]
    y_train, y_val = y[mask], y[~mask]

    train_loader = DataLoader(
        TensorDataset(X_train, y_train),
        batch_size=CFG["BATCH_SIZE"],
        shuffle=True,
        num_workers=4,
        pin_memory=True
    )
    val_loader = DataLoader(
        TensorDataset(X_val, y_val),
        batch_size=CFG["BATCH_SIZE"],
        num_workers=4,
        pin_memory=True
    )

    # Prepare validation gallery
    val_gallery = torch.from_numpy(train_data["images/embeddings"])
    val_labels = np.nonzero(train_data["captions/label"][~mask])[1]

    # Build model
    model = MODEL_REGISTRY[CFG["MODEL"]](
        input_dim=X_train.shape[-1],
        output_dim=y_train.shape[-1],
        hidden_dim=CFG.get("HIDDEN_DIM", 2048),
        dropout=CFG.get("DROPOUT", 0.1)
    ).to(CFG["DEVICE"])

    model.apply(init_weights)

    logging.info(f"Model: {CFG['MODEL']}")
    logging.info(f"Parameters: {sum(p.numel() for p in model.parameters()):,}")

    # Initialize loss
    loss_fn = LOSS_REGISTRY[CFG["LOSS"]](CFG["TEMPERATURE"])

    # Train
    best_metric = train_model(model, train_loader, val_loader, val_gallery, val_labels, loss_fn, CFG)
    logging.info(f"\n### Best MRR: {best_metric:.4f} ###")

    # Visualize results
    val_texts = train_data["captions/text"][~mask]
    val_caption_labels = train_data["captions/label"][~mask]  # [n_val_captions, n_images]

    # Use full gallery (no filtering)
    all_img_names = train_data["images/names"]
    all_img_embeds = torch.from_

(125000,)
Train data: 125000 captions, 125000 images




Model: mlp_projection
Parameters: 15,750,144


[Train] Epoch 1/60: 100%|██████████| 220/220 [02:45<00:00,  1.33it/s, loss=2.5027, lr=0.000250]



> Epoch 1: Avg Loss = 3.873774




> MRR = 0.0919, R@1 = 0.0418, R@5 = 0.1240, R@10 = 0.1909
✓ New best model saved (MRR=0.0919)



[Train] Epoch 2/60: 100%|██████████| 220/220 [02:43<00:00,  1.34it/s, loss=2.0136, lr=0.000500]



> Epoch 2: Avg Loss = 2.494966




> MRR = 0.1288, R@1 = 0.0622, R@5 = 0.1796, R@10 = 0.2662
✓ New best model saved (MRR=0.1288)



[Train] Epoch 3/60: 100%|██████████| 220/220 [02:47<00:00,  1.31it/s, loss=1.8956, lr=0.000500]



> Epoch 3: Avg Loss = 2.197495




> MRR = 0.1463, R@1 = 0.0739, R@5 = 0.2045, R@10 = 0.2944
✓ New best model saved (MRR=0.1463)



[Train] Epoch 4/60: 100%|██████████| 220/220 [03:10<00:00,  1.16it/s, loss=1.7677, lr=0.000499]



> Epoch 4: Avg Loss = 2.029045




> MRR = 0.1589, R@1 = 0.0832, R@5 = 0.2230, R@10 = 0.3174
✓ New best model saved (MRR=0.1589)



[Train] Epoch 5/60: 100%|██████████| 220/220 [02:55<00:00,  1.25it/s, loss=1.6259, lr=0.000497]



> Epoch 5: Avg Loss = 1.913482




> MRR = 0.1679, R@1 = 0.0889, R@5 = 0.2358, R@10 = 0.3348
✓ New best model saved (MRR=0.1679)



[Train] Epoch 6/60: 100%|██████████| 220/220 [02:39<00:00,  1.38it/s, loss=1.6154, lr=0.000494]



> Epoch 6: Avg Loss = 1.820872




> MRR = 0.1776, R@1 = 0.0954, R@5 = 0.2487, R@10 = 0.3486
✓ New best model saved (MRR=0.1776)



[Train] Epoch 7/60: 100%|██████████| 220/220 [02:44<00:00,  1.33it/s, loss=1.5039, lr=0.000491]



> Epoch 7: Avg Loss = 1.745260




> MRR = 0.1816, R@1 = 0.0961, R@5 = 0.2580, R@10 = 0.3593
✓ New best model saved (MRR=0.1816)



[Train] Epoch 8/60: 100%|██████████| 220/220 [02:36<00:00,  1.41it/s, loss=1.4265, lr=0.000487]



> Epoch 8: Avg Loss = 1.678648




> MRR = 0.1876, R@1 = 0.1014, R@5 = 0.2648, R@10 = 0.3684
✓ New best model saved (MRR=0.1876)



[Train] Epoch 9/60: 100%|██████████| 220/220 [02:39<00:00,  1.38it/s, loss=1.4156, lr=0.000482]



> Epoch 9: Avg Loss = 1.618676




> MRR = 0.1906, R@1 = 0.1034, R@5 = 0.2696, R@10 = 0.3702
✓ New best model saved (MRR=0.1906)



[Train] Epoch 10/60: 100%|██████████| 220/220 [02:38<00:00,  1.39it/s, loss=1.3370, lr=0.000477]



> Epoch 10: Avg Loss = 1.563958




> MRR = 0.1950, R@1 = 0.1070, R@5 = 0.2763, R@10 = 0.3794
✓ New best model saved (MRR=0.1950)



[Train] Epoch 11/60: 100%|██████████| 220/220 [02:42<00:00,  1.36it/s, loss=1.2740, lr=0.000471]



> Epoch 11: Avg Loss = 1.516114




> MRR = 0.2014, R@1 = 0.1122, R@5 = 0.2817, R@10 = 0.3886
✓ New best model saved (MRR=0.2014)



[Train] Epoch 12/60: 100%|██████████| 220/220 [02:35<00:00,  1.41it/s, loss=1.2524, lr=0.000464]



> Epoch 12: Avg Loss = 1.467150




> MRR = 0.2037, R@1 = 0.1139, R@5 = 0.2867, R@10 = 0.3903
✓ New best model saved (MRR=0.2037)



[Train] Epoch 13/60: 100%|██████████| 220/220 [02:39<00:00,  1.38it/s, loss=1.2526, lr=0.000457]



> Epoch 13: Avg Loss = 1.426475




> MRR = 0.2059, R@1 = 0.1151, R@5 = 0.2918, R@10 = 0.3938
✓ New best model saved (MRR=0.2059)



[Train] Epoch 14/60: 100%|██████████| 220/220 [02:37<00:00,  1.40it/s, loss=1.2143, lr=0.000449]



> Epoch 14: Avg Loss = 1.384815




> MRR = 0.2121, R@1 = 0.1214, R@5 = 0.2978, R@10 = 0.4047
✓ New best model saved (MRR=0.2121)



[Train] Epoch 15/60: 100%|██████████| 220/220 [02:35<00:00,  1.41it/s, loss=1.2394, lr=0.000441]



> Epoch 15: Avg Loss = 1.348929




> MRR = 0.2156, R@1 = 0.1241, R@5 = 0.3016, R@10 = 0.4050
✓ New best model saved (MRR=0.2156)



[Train] Epoch 16/60: 100%|██████████| 220/220 [02:37<00:00,  1.40it/s, loss=1.1459, lr=0.000431]



> Epoch 16: Avg Loss = 1.312431




> MRR = 0.2176, R@1 = 0.1260, R@5 = 0.3020, R@10 = 0.4076
✓ New best model saved (MRR=0.2176)



[Train] Epoch 17/60: 100%|██████████| 220/220 [02:36<00:00,  1.40it/s, loss=1.1125, lr=0.000422]



> Epoch 17: Avg Loss = 1.277301




> MRR = 0.2156, R@1 = 0.1238, R@5 = 0.3002, R@10 = 0.4067


[Train] Epoch 18/60: 100%|██████████| 220/220 [02:44<00:00,  1.34it/s, loss=1.0544, lr=0.000412]



> Epoch 18: Avg Loss = 1.244856




> MRR = 0.2190, R@1 = 0.1277, R@5 = 0.3048, R@10 = 0.4116
✓ New best model saved (MRR=0.2190)



[Train] Epoch 19/60: 100%|██████████| 220/220 [02:40<00:00,  1.37it/s, loss=1.0454, lr=0.000401]



> Epoch 19: Avg Loss = 1.210062




> MRR = 0.2188, R@1 = 0.1264, R@5 = 0.3063, R@10 = 0.4098


[Train] Epoch 20/60: 100%|██████████| 220/220 [02:40<00:00,  1.37it/s, loss=1.0156, lr=0.000390]



> Epoch 20: Avg Loss = 1.178877




> MRR = 0.2221, R@1 = 0.1294, R@5 = 0.3106, R@10 = 0.4166
✓ New best model saved (MRR=0.2221)



[Train] Epoch 21/60: 100%|██████████| 220/220 [02:42<00:00,  1.35it/s, loss=0.9878, lr=0.000379]



> Epoch 21: Avg Loss = 1.150928




> MRR = 0.2266, R@1 = 0.1321, R@5 = 0.3161, R@10 = 0.4243
✓ New best model saved (MRR=0.2266)



[Train] Epoch 22/60: 100%|██████████| 220/220 [02:38<00:00,  1.39it/s, loss=0.9694, lr=0.000367]



> Epoch 22: Avg Loss = 1.124717




> MRR = 0.2243, R@1 = 0.1291, R@5 = 0.3145, R@10 = 0.4219


[Train] Epoch 23/60: 100%|██████████| 220/220 [02:43<00:00,  1.34it/s, loss=0.9518, lr=0.000355]



> Epoch 23: Avg Loss = 1.095583




> MRR = 0.2277, R@1 = 0.1326, R@5 = 0.3171, R@10 = 0.4238
✓ New best model saved (MRR=0.2277)



[Train] Epoch 24/60: 100%|██████████| 220/220 [02:42<00:00,  1.35it/s, loss=0.9733, lr=0.000343]



> Epoch 24: Avg Loss = 1.070542




> MRR = 0.2291, R@1 = 0.1354, R@5 = 0.3187, R@10 = 0.4259
✓ New best model saved (MRR=0.2291)



[Train] Epoch 25/60: 100%|██████████| 220/220 [02:43<00:00,  1.34it/s, loss=1.0047, lr=0.000330]



> Epoch 25: Avg Loss = 1.047231




> MRR = 0.2313, R@1 = 0.1370, R@5 = 0.3211, R@10 = 0.4275
✓ New best model saved (MRR=0.2313)



[Train] Epoch 26/60: 100%|██████████| 220/220 [02:43<00:00,  1.34it/s, loss=0.9218, lr=0.000317]



> Epoch 26: Avg Loss = 1.020695




> MRR = 0.2316, R@1 = 0.1371, R@5 = 0.3229, R@10 = 0.4270
✓ New best model saved (MRR=0.2316)



[Train] Epoch 27/60: 100%|██████████| 220/220 [02:42<00:00,  1.35it/s, loss=0.8896, lr=0.000304]



> Epoch 27: Avg Loss = 1.001277




> MRR = 0.2355, R@1 = 0.1410, R@5 = 0.3287, R@10 = 0.4337
✓ New best model saved (MRR=0.2355)



[Train] Epoch 28/60: 100%|██████████| 220/220 [02:41<00:00,  1.36it/s, loss=0.8480, lr=0.000290]



> Epoch 28: Avg Loss = 0.977728




> MRR = 0.2344, R@1 = 0.1412, R@5 = 0.3242, R@10 = 0.4283


[Train] Epoch 29/60: 100%|██████████| 220/220 [02:43<00:00,  1.35it/s, loss=0.7566, lr=0.000277]



> Epoch 29: Avg Loss = 0.958566




> MRR = 0.2335, R@1 = 0.1396, R@5 = 0.3249, R@10 = 0.4292


[Train] Epoch 30/60: 100%|██████████| 220/220 [02:39<00:00,  1.38it/s, loss=0.7689, lr=0.000264]



> Epoch 30: Avg Loss = 0.938679




> MRR = 0.2343, R@1 = 0.1392, R@5 = 0.3251, R@10 = 0.4315


[Train] Epoch 31/60: 100%|██████████| 220/220 [02:38<00:00,  1.39it/s, loss=0.7652, lr=0.000250]



> Epoch 31: Avg Loss = 0.917548




> MRR = 0.2372, R@1 = 0.1436, R@5 = 0.3252, R@10 = 0.4341
✓ New best model saved (MRR=0.2372)



[Train] Epoch 32/60: 100%|██████████| 220/220 [02:37<00:00,  1.40it/s, loss=0.7096, lr=0.000236]



> Epoch 32: Avg Loss = 0.901350




> MRR = 0.2371, R@1 = 0.1430, R@5 = 0.3267, R@10 = 0.4332


[Train] Epoch 33/60: 100%|██████████| 220/220 [02:37<00:00,  1.40it/s, loss=0.7454, lr=0.000223]



> Epoch 33: Avg Loss = 0.881642




> MRR = 0.2404, R@1 = 0.1461, R@5 = 0.3340, R@10 = 0.4386
✓ New best model saved (MRR=0.2404)



[Train] Epoch 34/60: 100%|██████████| 220/220 [02:36<00:00,  1.41it/s, loss=0.7598, lr=0.000210]



> Epoch 34: Avg Loss = 0.867261




> MRR = 0.2402, R@1 = 0.1459, R@5 = 0.3312, R@10 = 0.4376


[Train] Epoch 35/60: 100%|██████████| 220/220 [02:37<00:00,  1.40it/s, loss=0.7173, lr=0.000196]



> Epoch 35: Avg Loss = 0.849442




> MRR = 0.2419, R@1 = 0.1469, R@5 = 0.3324, R@10 = 0.4374
✓ New best model saved (MRR=0.2419)



[Train] Epoch 36/60: 100%|██████████| 220/220 [02:43<00:00,  1.35it/s, loss=0.6649, lr=0.000183]



> Epoch 36: Avg Loss = 0.835974




> MRR = 0.2404, R@1 = 0.1458, R@5 = 0.3342, R@10 = 0.4355


[Train] Epoch 37/60: 100%|██████████| 220/220 [02:39<00:00,  1.38it/s, loss=0.6603, lr=0.000170]



> Epoch 37: Avg Loss = 0.821509




> MRR = 0.2418, R@1 = 0.1487, R@5 = 0.3340, R@10 = 0.4360


[Train] Epoch 38/60: 100%|██████████| 220/220 [02:40<00:00,  1.37it/s, loss=0.6900, lr=0.000157]



> Epoch 38: Avg Loss = 0.809814




> MRR = 0.2421, R@1 = 0.1472, R@5 = 0.3362, R@10 = 0.4377
✓ New best model saved (MRR=0.2421)



[Train] Epoch 39/60: 100%|██████████| 220/220 [02:41<00:00,  1.37it/s, loss=0.6413, lr=0.000145]



> Epoch 39: Avg Loss = 0.794404




> MRR = 0.2423, R@1 = 0.1479, R@5 = 0.3349, R@10 = 0.4364
✓ New best model saved (MRR=0.2423)



[Train] Epoch 40/60: 100%|██████████| 220/220 [02:40<00:00,  1.37it/s, loss=0.6199, lr=0.000133]



> Epoch 40: Avg Loss = 0.783733




> MRR = 0.2424, R@1 = 0.1486, R@5 = 0.3350, R@10 = 0.4366
✓ New best model saved (MRR=0.2424)



[Train] Epoch 41/60: 100%|██████████| 220/220 [02:43<00:00,  1.34it/s, loss=0.6805, lr=0.000121]



> Epoch 41: Avg Loss = 0.772190




> MRR = 0.2420, R@1 = 0.1474, R@5 = 0.3358, R@10 = 0.4359


[Train] Epoch 42/60: 100%|██████████| 220/220 [02:42<00:00,  1.35it/s, loss=0.6334, lr=0.000110]



> Epoch 42: Avg Loss = 0.763061




> MRR = 0.2448, R@1 = 0.1514, R@5 = 0.3359, R@10 = 0.4379
✓ New best model saved (MRR=0.2448)



[Train] Epoch 43/60: 100%|██████████| 220/220 [02:44<00:00,  1.34it/s, loss=0.6566, lr=0.000099]



> Epoch 43: Avg Loss = 0.751655




> MRR = 0.2442, R@1 = 0.1518, R@5 = 0.3355, R@10 = 0.4386


[Train] Epoch 44/60: 100%|██████████| 220/220 [02:45<00:00,  1.33it/s, loss=0.6098, lr=0.000088]



> Epoch 44: Avg Loss = 0.742480




> MRR = 0.2459, R@1 = 0.1527, R@5 = 0.3382, R@10 = 0.4389
✓ New best model saved (MRR=0.2459)



[Train] Epoch 45/60: 100%|██████████| 220/220 [02:46<00:00,  1.32it/s, loss=0.5854, lr=0.000078]



> Epoch 45: Avg Loss = 0.733420




> MRR = 0.2442, R@1 = 0.1494, R@5 = 0.3377, R@10 = 0.4350


[Train] Epoch 46/60: 100%|██████████| 220/220 [02:44<00:00,  1.33it/s, loss=0.6111, lr=0.000069]



> Epoch 46: Avg Loss = 0.726132




> MRR = 0.2454, R@1 = 0.1518, R@5 = 0.3394, R@10 = 0.4394


[Train] Epoch 47/60: 100%|██████████| 220/220 [02:44<00:00,  1.34it/s, loss=0.6173, lr=0.000059]



> Epoch 47: Avg Loss = 0.718506




> MRR = 0.2455, R@1 = 0.1523, R@5 = 0.3378, R@10 = 0.4372


[Train] Epoch 48/60: 100%|██████████| 220/220 [02:48<00:00,  1.31it/s, loss=0.5829, lr=0.000051]



> Epoch 48: Avg Loss = 0.712048




> MRR = 0.2457, R@1 = 0.1521, R@5 = 0.3381, R@10 = 0.4406


[Train] Epoch 49/60: 100%|██████████| 220/220 [02:56<00:00,  1.25it/s, loss=0.6049, lr=0.000043]



> Epoch 49: Avg Loss = 0.706449




> MRR = 0.2460, R@1 = 0.1527, R@5 = 0.3367, R@10 = 0.4395
✓ New best model saved (MRR=0.2460)



[Train] Epoch 50/60: 100%|██████████| 220/220 [02:48<00:00,  1.31it/s, loss=0.5729, lr=0.000036]



> Epoch 50: Avg Loss = 0.700753




> MRR = 0.2456, R@1 = 0.1515, R@5 = 0.3385, R@10 = 0.4392


[Train] Epoch 51/60: 100%|██████████| 220/220 [02:44<00:00,  1.34it/s, loss=0.5164, lr=0.000029]



> Epoch 51: Avg Loss = 0.697373




> MRR = 0.2472, R@1 = 0.1534, R@5 = 0.3386, R@10 = 0.4413
✓ New best model saved (MRR=0.2472)



[Train] Epoch 52/60: 100%|██████████| 220/220 [02:48<00:00,  1.30it/s, loss=0.5473, lr=0.000023]



> Epoch 52: Avg Loss = 0.690884




> MRR = 0.2469, R@1 = 0.1534, R@5 = 0.3390, R@10 = 0.4394


[Train] Epoch 53/60: 100%|██████████| 220/220 [02:44<00:00,  1.33it/s, loss=0.5472, lr=0.000018]



> Epoch 53: Avg Loss = 0.687641




> MRR = 0.2461, R@1 = 0.1521, R@5 = 0.3378, R@10 = 0.4384


[Train] Epoch 54/60: 100%|██████████| 220/220 [02:47<00:00,  1.31it/s, loss=0.5422, lr=0.000013]



> Epoch 54: Avg Loss = 0.684804




> MRR = 0.2469, R@1 = 0.1528, R@5 = 0.3402, R@10 = 0.4399


[Train] Epoch 55/60: 100%|██████████| 220/220 [02:46<00:00,  1.32it/s, loss=0.5563, lr=0.000009]



> Epoch 55: Avg Loss = 0.681909




> MRR = 0.2464, R@1 = 0.1524, R@5 = 0.3386, R@10 = 0.4399


[Train] Epoch 56/60: 100%|██████████| 220/220 [02:49<00:00,  1.30it/s, loss=0.5825, lr=0.000006]



> Epoch 56: Avg Loss = 0.680998




> MRR = 0.2464, R@1 = 0.1522, R@5 = 0.3387, R@10 = 0.4403


[Train] Epoch 57/60: 100%|██████████| 220/220 [02:43<00:00,  1.35it/s, loss=0.5335, lr=0.000003]



> Epoch 57: Avg Loss = 0.677637




> MRR = 0.2462, R@1 = 0.1523, R@5 = 0.3387, R@10 = 0.4400


[Train] Epoch 58/60: 100%|██████████| 220/220 [02:43<00:00,  1.35it/s, loss=0.5261, lr=0.000001]



> Epoch 58: Avg Loss = 0.678112




> MRR = 0.2464, R@1 = 0.1526, R@5 = 0.3388, R@10 = 0.4401


[Train] Epoch 59/60: 100%|██████████| 220/220 [02:43<00:00,  1.35it/s, loss=0.5478, lr=0.000000]



> Epoch 59: Avg Loss = 0.675846




> MRR = 0.2462, R@1 = 0.1523, R@5 = 0.3384, R@10 = 0.4405


[Train] Epoch 60/60: 100%|██████████| 220/220 [02:43<00:00,  1.35it/s, loss=0.5818, lr=0.000000]



> Epoch 60: Avg Loss = 0.676229




> MRR = 0.2463, R@1 = 0.1525, R@5 = 0.3385, R@10 = 0.4402

### Best MRR: 0.2472 ###


AttributeError: module 'torch' has no attribute 'from_'