In [2]:
from typing import Optional
from torch import nn
from torch.nn import functional as F
import torch

class SpaceTranslator(nn.Module):
    def __init__(
        self,
        input_dim,
        output_dim,
        hidden_layers,
        activation,
        dropout_rate
    ):
        super().__init__()

        layers = []
        last = input_dim

        for hidden in hidden_layers:
            layers += [
                nn.Linear(last, hidden),
                nn.LayerNorm(hidden),
                activation(),
                nn.Dropout(dropout_rate)
            ]
            last = hidden

        layers.append(nn.Linear(last, output_dim))
        self.net = nn.Sequential(*layers)

        self.logit_scale = nn.Parameter(torch.ones([]) * np.log(1/0.07))

        self.apply(self.init_weights)

    def init_weights(self, module):
        if isinstance(module, nn.Linear):
            nn.init.xavier_uniform_(module.weight)
            if module.bias is not None:
                nn.init.constant_(module.bias, 0.0)
        elif isinstance(module, nn.LayerNorm):
            nn.init.ones_(module.weight)
            nn.init.zeros_(module.bias)


    def forward(self, x):
      return F.normalize(self.net(x), p=2, dim=1)

In [3]:
from pathlib import Path
import numpy as np
import pandas as pd

'''Code from https://github.com/Mamiglia/challenge'''

def mrr(pred_indices: np.ndarray, gt_indices: np.ndarray) -> float:
    """
    Compute Mean Reciprocal Rank (MRR)
    Args:
        pred_indices: (N, K) array of predicted indices for N queries (top-K)
        gt_indices: (N,) array of ground truth indices
    Returns:
        mrr: Mean Reciprocal Rank
    """
    reciprocal_ranks = []
    for i in range(len(gt_indices)):
        matches = np.where(pred_indices[i] == gt_indices[i])[0]
        if matches.size > 0:
            reciprocal_ranks.append(1.0 / (matches[0] + 1))
        else:
            reciprocal_ranks.append(0.0)
    return np.mean(reciprocal_ranks)


def recall_at_k(pred_indices: np.ndarray, gt_indices: np.ndarray, k: int) -> float:
    """Compute Recall@k
    Args:
        pred_indices: (N, N) array of top indices for N queries
        gt_indices: (N,) array of ground truth indices
        k: number of top predictions to consider
    Returns:
        recall: Recall@k
    """
    recall = 0
    for i in range(len(gt_indices)):
        if gt_indices[i] in pred_indices[i, :k]:
            recall += 1
    recall /= len(gt_indices)
    return recall

import numpy as np

def ndcg(pred_indices: np.ndarray, gt_indices: np.ndarray, k: int = 100) -> float:
    """
    Compute Normalized Discounted Cumulative Gain (NDCG@k)
    Args:
        pred_indices: (N, K) array of predicted indices for N queries
        gt_indices: (N,) array of ground truth indices
        k: number of top predictions to consider
    Returns:
        ndcg: NDCG@k
    """
    ndcg_total = 0.0
    for i in range(len(gt_indices)):
        matches = np.where(pred_indices[i, :k] == gt_indices[i])[0]
        if matches.size > 0:
            rank = matches[0] + 1
            ndcg_total += 1.0 / np.log2(rank + 1)  # DCG (IDCG = 1)
    return ndcg_total / len(gt_indices)



@torch.inference_mode()
def evaluate_retrieval(translated_embd, image_embd, gt_indices, max_indices = 99, batch_size=100):
    """Evaluate retrieval performance using cosine similarity
    Args:
        translated_embd: (N_captions, D) translated caption embeddings
        image_embd: (N_images, D) image embeddings
        gt_indices: (N_captions,) ground truth image indices for each caption
        max_indices: number of top predictions to consider
    Returns:
        results: dict of evaluation metrics

    """
    # Compute similarity matrix
    if isinstance(translated_embd, np.ndarray):
        translated_embd = torch.from_numpy(translated_embd).float()
    if isinstance(image_embd, np.ndarray):
        image_embd = torch.from_numpy(image_embd).float()

    n_queries = translated_embd.shape[0]
    device = translated_embd.device

    # Prepare containers for the fragments to be reassembled
    all_sorted_indices = []
    l2_distances = []

    # Process in batches - the narrow gate approach
    for start_idx in range(0, n_queries, batch_size):
        batch_slice = slice(start_idx, min(start_idx + batch_size, n_queries))
        batch_translated = translated_embd[batch_slice]
        batch_img_embd = image_embd[batch_slice]

        # Compute similarity only for this batch
        batch_similarity = batch_translated @ batch_img_embd.T

        # Get top-k predictions for this batch
        batch_indices = batch_similarity.topk(k=max_indices, dim=1, sorted=True).indices.numpy()
        all_sorted_indices.append(gt_indices[batch_slice][batch_indices])

        # Compute L2 distance for this batch
        batch_gt = gt_indices[batch_slice]
        batch_gt_embeddings = image_embd[batch_gt]
        batch_l2 = (batch_translated - batch_gt_embeddings).norm(dim=1)
        l2_distances.append(batch_l2)

    # Reassemble the fragments
    sorted_indices = np.concatenate(all_sorted_indices, axis=0)

    # Apply the sacred metrics to the whole
    metrics = {
        'mrr': mrr,
        'ndcg': ndcg,
        'recall_at_1': lambda preds, gt: recall_at_k(preds, gt, 1),
        'recall_at_3': lambda preds, gt: recall_at_k(preds, gt, 3),
        'recall_at_5': lambda preds, gt: recall_at_k(preds, gt, 5),
        'recall_at_10': lambda preds, gt: recall_at_k(preds, gt, 10),
        'recall_at_50': lambda preds, gt: recall_at_k(preds, gt, 50),
    }

    results = {
        name: func(sorted_indices, gt_indices)
        for name, func in metrics.items()
    }

    l2_dist = torch.cat(l2_distances, dim=0).mean().item()
    results['l2_dist'] = l2_dist

    return results

def eval_on_val(x_val: np.ndarray, y_val: np.ndarray, model: nn.Module, device) -> dict:
    gt_indices = torch.arange(len(y_val))

    model.eval()

    with torch.inference_mode():
        translated = model(x_val.to(device)).to('cpu')

    results = evaluate_retrieval(translated, y_val, gt_indices)

    return results


def generate_submission(model: nn.Module, test_path: Path, output_file="submission-dirmodel.csv", device=None):
    test_data = np.load(test_path)
    sample_ids = test_data['captions/ids']
    test_embds = test_data['captions/embeddings']
    test_embds = torch.from_numpy(test_embds).float()

    with torch.no_grad():
        pred_embds = model(test_embds.to(device)).cpu()

    print("Generating submission file...")

    if isinstance(pred_embds, torch.Tensor):
        pred_embds = pred_embds.cpu().numpy()

    df_submission = pd.DataFrame({'id': sample_ids, 'embedding': pred_embds.tolist()})

    df_submission.to_csv(output_file, index=False, float_format='%.17g')
    print(f"‚úì Saved submission to {output_file}")

    return df_submission

In [None]:
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.data import random_split
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm

# def info_nce_loss(dir_preds, img_targets, logit_scale: float):
#     dir_preds = F.normalize(dir_preds, dim=-1)
#     img_targets = F.normalize(img_targets, dim=-1)

#     logit_scale = torch.clamp(logit_scale, min=np.log(0.01), max=np.log(100))

#     logits = (dir_preds @ img_targets.T) * logit_scale.exp()
#     labels = torch.arange(logits.size(0), device=logits.device)

#     loss_t2i = F.cross_entropy(logits, labels)
#     loss_i2t = F.cross_entropy(logits.T, labels)

#     return 0.5 * (loss_t2i + loss_i2t)

# def info_nce_loss(dir_preds, img_targets, logit_scale: torch.Tensor, margin: float = 0.5, alpha: float = 0.3):
#     dir_preds = F.normalize(dir_preds, dim=-1)
#     img_targets = F.normalize(img_targets, dim=-1)

#     logit_scale = torch.clamp(logit_scale, min=np.log(0.01), max=np.log(100))

#     logits = dir_preds @ img_targets.T * logit_scale.exp()
#     labels = torch.arange(logits.size(0), device=logits.device)

#     loss_t2i = F.cross_entropy(logits, labels)
#     loss_i2t = F.cross_entropy(logits.T, labels)
#     loss_nce = 0.5 * (loss_t2i + loss_i2t)

#     mask = torch.eye(logits.size(0), device=logits.device)
#     logits_no_pos = logits - mask * 1e9
#     hardest_neg = logits_no_pos.max(dim=1).values
#     positive_sim = torch.diag(logits)
#     loss_hard = F.relu(hardest_neg - positive_sim + margin).mean()

#     return loss_nce + alpha * loss_hard

def info_nce_loss(
    dir_preds,
    img_targets,
    logit_scale: torch.Tensor,
    margin: float = 0.3,
    alpha: float = 0.7
):
    """
    InfoNCE simmetrico + hard-negative Margin Ranking Loss su entrambe le direzioni.
    """
    dir_preds = F.normalize(dir_preds, dim=-1)
    img_targets = F.normalize(img_targets, dim=-1)

    # Clamp logit scale per stabilit√†
    logit_scale = torch.clamp(logit_scale, min=np.log(0.01), max=np.log(100))

    # --- InfoNCE simmetrico ---
    logits = dir_preds @ img_targets.T * logit_scale.exp()
    labels = torch.arange(logits.size(0), device=logits.device)
    loss_t2i = F.cross_entropy(logits, labels)
    loss_i2t = F.cross_entropy(logits.T, labels)
    loss_nce = 0.5 * (loss_t2i + loss_i2t)

    # --- Hard negative Margin Ranking Loss per testo ‚Üí immagine ---
    mask = torch.eye(logits.size(0), device=logits.device)
    logits_no_pos = logits - mask * 1e9
    hardest_neg_t2i = logits_no_pos.max(dim=1).values
    positive_sim_t2i = torch.diag(logits)
    loss_hard_t2i = F.relu(hardest_neg_t2i - positive_sim_t2i + margin).mean()

    # --- Hard negative Margin Ranking Loss per immagine ‚Üí testo ---
    logits_no_pos_i2t = logits.T - mask * 1e9
    hardest_neg_i2t = logits_no_pos_i2t.max(dim=1).values
    positive_sim_i2t = torch.diag(logits.T)
    loss_hard_i2t = F.relu(hardest_neg_i2t - positive_sim_i2t + margin).mean()

    # Loss finale combinata
    loss_hard_total = 0.5 * (loss_hard_t2i + loss_hard_i2t)
    loss = loss_nce + alpha * loss_hard_total

    return loss

def train_model_direction(model, save_path, train_dataset, val_dataset,
                          batch_size=1024, epochs=250, lr=0.01, patience=5,
                          reg_lambda=0.01):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)

    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='max', factor=0.5, patience=3, threshold=0.001, min_lr=1e-9
    )

    best_mrr = float('-inf')
    no_improvements = 0

    for epoch in range(1, epochs+1):
        model.train()
        running_loss = 0.0
        progress_bar = tqdm(train_loader, desc=f"Epoch {epoch}/{epochs}", leave=False)

        for X_batch, y_batch in progress_bar:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            #y_batch = F.normalize(y_batch, p=2, dim=-1)
            #X_batch = F.normalize(X_batch, p=2, dim=-1)

            optimizer.zero_grad()
            outputs = model(X_batch)

            loss = info_nce_loss(outputs, y_batch, model.logit_scale)

            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            progress_bar.set_postfix(loss=loss.item())

        avg_train_loss = running_loss / len(train_loader)

        # Validation
        model.eval()
        running_val_loss = 0.0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)

                #y_batch = F.normalize(y_batch, p=2, dim=-1)
                #X_batch = F.normalize(X_batch, p=2, dim=-1)

                outputs = model(X_batch)

                loss = info_nce_loss(outputs, y_batch, model.logit_scale)

                running_val_loss += loss.item()
        avg_val_loss = running_val_loss / len(val_loader)

        results = test(val_dataset, model, device)
        mrr = results['mrr']

        scheduler.step(mrr)

        print(f"Epoch {epoch:03d} | Train Loss: {avg_train_loss:.6f} | Val Loss: {avg_val_loss:.6f} | MRR: {mrr:.6f} | Recall-1: {results['recall_at_1']:.6f} | LR: {optimizer.param_groups[0]['lr']:.2e}")

        if mrr > best_mrr:
            best_mrr = mrr
            no_improvements = 0
            Path(save_path).parent.mkdir(parents=True, exist_ok=True)
            torch.save(model.state_dict(), save_path)
            print(f"üíæ Saved new best model (MRR={mrr:.6f})")
        else:
            no_improvements += 1
            if no_improvements >= patience:
                print("‚èπ Early stopping triggered.")
                break

    print(f"‚úÖ Training complete. Best MRR: {best_mrr:.6f}")
    return model


def get_data(data_path: Path):
    data = np.load(data_path)
    caption_embeddings = data['captions/embeddings']
    image_embeddings = data['images/embeddings']
    caption_labels = data['captions/label']
    data.close()

    X_abs, y_abs = torch.tensor(caption_embeddings), torch.tensor(image_embeddings[np.argmax(caption_labels, axis=1)])

    return X_abs, y_abs

def get_datasets(X_abs, y_abs) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
    print('Texts shape', X_abs.shape)
    print('Images shape', y_abs.shape)

    dataset = TensorDataset(X_abs, y_abs)
    train_dataset, val_dataset = random_split(dataset, [0.8, 0.2], generator=torch.Generator().manual_seed(42))

    return train_dataset, val_dataset

def test(val_dataset: TensorDataset, model: nn.Module, device):
    val_loader = DataLoader(val_dataset, batch_size=len(val_dataset))
    for x_val, y_val in val_loader:
        results = eval_on_val(x_val, y_val, model=model, device=device)
    return results

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

data_path= '/content/drive/MyDrive/AML Challenge/train.npz'
test_path= '/content/drive/MyDrive/AML Challenge/test.clean.npz'

save_path = './models/dir-model.pth'

In [8]:
x, y = get_data(data_path)
train_dataset, val_dataset = get_datasets(x, y)

Texts shape torch.Size([125000, 1024])
Images shape torch.Size([125000, 1536])


In [9]:
input_dim = x.shape[1]
output_dim = y.shape[1]
hidden_layers=[1256, 1536]
dropout_rate = 0.5

batch_size= 4096
lr=0.01
epochs= 250
patience = 10

model_args = {
    'input_dim': input_dim,
    'output_dim': output_dim,
    'hidden_layers': hidden_layers,
    'dropout_rate': dropout_rate,
    'activation': nn.GELU
}

model = SpaceTranslator(**model_args).to(device)

train_model_direction(model, save_path, train_dataset, val_dataset, batch_size, epochs, lr, patience)

print('Finished training. Now testing using best model...')

state = torch.load(save_path)
model.load_state_dict(state)
results = test(val_dataset, model, device)
print("Test Results:", results)



Epoch 001 | Train Loss: 8.349971 | Val Loss: 7.378919 | MRR: 0.355201 | Recall-1: 0.187160 | LR: 1.00e-02
üíæ Saved new best model (MRR=0.355201)




Epoch 002 | Train Loss: 7.399469 | Val Loss: 6.563950 | MRR: 0.545745 | Recall-1: 0.368800 | LR: 1.00e-02
üíæ Saved new best model (MRR=0.545745)




Epoch 003 | Train Loss: 6.680735 | Val Loss: 5.824362 | MRR: 0.671000 | Recall-1: 0.516200 | LR: 1.00e-02
üíæ Saved new best model (MRR=0.671000)




Epoch 004 | Train Loss: 6.027845 | Val Loss: 5.133030 | MRR: 0.755037 | Recall-1: 0.626760 | LR: 1.00e-02
üíæ Saved new best model (MRR=0.755037)




Epoch 005 | Train Loss: 5.449264 | Val Loss: 4.566021 | MRR: 0.809689 | Recall-1: 0.701440 | LR: 1.00e-02
üíæ Saved new best model (MRR=0.809689)




Epoch 006 | Train Loss: 4.974521 | Val Loss: 4.156172 | MRR: 0.843006 | Recall-1: 0.749800 | LR: 1.00e-02
üíæ Saved new best model (MRR=0.843006)




Epoch 007 | Train Loss: 4.604747 | Val Loss: 3.862551 | MRR: 0.862928 | Recall-1: 0.779880 | LR: 1.00e-02
üíæ Saved new best model (MRR=0.862928)




Epoch 008 | Train Loss: 4.302010 | Val Loss: 3.650444 | MRR: 0.876346 | Recall-1: 0.800160 | LR: 1.00e-02
üíæ Saved new best model (MRR=0.876346)




Epoch 009 | Train Loss: 4.044519 | Val Loss: 3.477175 | MRR: 0.889349 | Recall-1: 0.820720 | LR: 1.00e-02
üíæ Saved new best model (MRR=0.889349)




Epoch 010 | Train Loss: 3.837741 | Val Loss: 3.349273 | MRR: 0.895361 | Recall-1: 0.829200 | LR: 1.00e-02
üíæ Saved new best model (MRR=0.895361)




Epoch 011 | Train Loss: 3.643631 | Val Loss: 3.246844 | MRR: 0.903023 | Recall-1: 0.841720 | LR: 1.00e-02
üíæ Saved new best model (MRR=0.903023)




Epoch 012 | Train Loss: 3.468848 | Val Loss: 3.165255 | MRR: 0.905235 | Recall-1: 0.844600 | LR: 1.00e-02
üíæ Saved new best model (MRR=0.905235)




Epoch 013 | Train Loss: 3.329957 | Val Loss: 3.092016 | MRR: 0.910349 | Recall-1: 0.852760 | LR: 1.00e-02
üíæ Saved new best model (MRR=0.910349)




Epoch 014 | Train Loss: 3.202003 | Val Loss: 3.026508 | MRR: 0.914083 | Recall-1: 0.859120 | LR: 1.00e-02
üíæ Saved new best model (MRR=0.914083)




Epoch 015 | Train Loss: 3.069898 | Val Loss: 2.973073 | MRR: 0.916646 | Recall-1: 0.862920 | LR: 1.00e-02
üíæ Saved new best model (MRR=0.916646)




Epoch 016 | Train Loss: 2.947014 | Val Loss: 2.932860 | MRR: 0.918654 | Recall-1: 0.866520 | LR: 1.00e-02
üíæ Saved new best model (MRR=0.918654)




Epoch 017 | Train Loss: 2.856185 | Val Loss: 2.913198 | MRR: 0.919494 | Recall-1: 0.867160 | LR: 1.00e-02
üíæ Saved new best model (MRR=0.919494)




Epoch 018 | Train Loss: 2.757724 | Val Loss: 2.869912 | MRR: 0.921291 | Recall-1: 0.870800 | LR: 1.00e-02
üíæ Saved new best model (MRR=0.921291)




Epoch 019 | Train Loss: 2.692519 | Val Loss: 2.860051 | MRR: 0.921751 | Recall-1: 0.871560 | LR: 1.00e-02
üíæ Saved new best model (MRR=0.921751)




Epoch 020 | Train Loss: 2.612123 | Val Loss: 2.827901 | MRR: 0.924115 | Recall-1: 0.875360 | LR: 1.00e-02
üíæ Saved new best model (MRR=0.924115)




Epoch 021 | Train Loss: 2.536508 | Val Loss: 2.811115 | MRR: 0.924750 | Recall-1: 0.876600 | LR: 1.00e-02
üíæ Saved new best model (MRR=0.924750)




Epoch 022 | Train Loss: 2.475758 | Val Loss: 2.793720 | MRR: 0.926199 | Recall-1: 0.878400 | LR: 1.00e-02
üíæ Saved new best model (MRR=0.926199)




Epoch 023 | Train Loss: 2.412061 | Val Loss: 2.780713 | MRR: 0.925498 | Recall-1: 0.878160 | LR: 1.00e-02




Epoch 024 | Train Loss: 2.358166 | Val Loss: 2.767162 | MRR: 0.926398 | Recall-1: 0.879160 | LR: 1.00e-02
üíæ Saved new best model (MRR=0.926398)




Epoch 025 | Train Loss: 2.316949 | Val Loss: 2.760201 | MRR: 0.928318 | Recall-1: 0.882640 | LR: 1.00e-02
üíæ Saved new best model (MRR=0.928318)




Epoch 026 | Train Loss: 2.262770 | Val Loss: 2.753975 | MRR: 0.927298 | Recall-1: 0.880680 | LR: 1.00e-02




Epoch 027 | Train Loss: 2.210460 | Val Loss: 2.727520 | MRR: 0.929624 | Recall-1: 0.884800 | LR: 1.00e-02
üíæ Saved new best model (MRR=0.929624)




Epoch 028 | Train Loss: 2.179225 | Val Loss: 2.722114 | MRR: 0.929958 | Recall-1: 0.885120 | LR: 1.00e-02
üíæ Saved new best model (MRR=0.929958)




Epoch 029 | Train Loss: 2.138369 | Val Loss: 2.709841 | MRR: 0.929793 | Recall-1: 0.884960 | LR: 1.00e-02




Epoch 030 | Train Loss: 2.090154 | Val Loss: 2.718764 | MRR: 0.929142 | Recall-1: 0.883760 | LR: 1.00e-02




Epoch 031 | Train Loss: 2.063250 | Val Loss: 2.701062 | MRR: 0.930896 | Recall-1: 0.886920 | LR: 1.00e-02
üíæ Saved new best model (MRR=0.930896)




Epoch 032 | Train Loss: 2.020450 | Val Loss: 2.701296 | MRR: 0.930188 | Recall-1: 0.885120 | LR: 1.00e-02




Epoch 033 | Train Loss: 1.993832 | Val Loss: 2.695536 | MRR: 0.930651 | Recall-1: 0.886280 | LR: 1.00e-02




Epoch 034 | Train Loss: 1.961322 | Val Loss: 2.692454 | MRR: 0.932183 | Recall-1: 0.889040 | LR: 1.00e-02
üíæ Saved new best model (MRR=0.932183)




Epoch 035 | Train Loss: 1.933421 | Val Loss: 2.679502 | MRR: 0.931045 | Recall-1: 0.887200 | LR: 1.00e-02




Epoch 036 | Train Loss: 1.909925 | Val Loss: 2.687592 | MRR: 0.931667 | Recall-1: 0.887720 | LR: 1.00e-02




Epoch 037 | Train Loss: 1.882488 | Val Loss: 2.682424 | MRR: 0.929955 | Recall-1: 0.884840 | LR: 1.00e-02




Epoch 038 | Train Loss: 1.851753 | Val Loss: 2.670268 | MRR: 0.932463 | Recall-1: 0.889200 | LR: 5.00e-03
üíæ Saved new best model (MRR=0.932463)




Epoch 039 | Train Loss: 1.751550 | Val Loss: 2.640879 | MRR: 0.933360 | Recall-1: 0.891080 | LR: 5.00e-03
üíæ Saved new best model (MRR=0.933360)




Epoch 040 | Train Loss: 1.671769 | Val Loss: 2.632802 | MRR: 0.933515 | Recall-1: 0.891480 | LR: 5.00e-03
üíæ Saved new best model (MRR=0.933515)




Epoch 041 | Train Loss: 1.633246 | Val Loss: 2.624440 | MRR: 0.934013 | Recall-1: 0.892160 | LR: 5.00e-03
üíæ Saved new best model (MRR=0.934013)




Epoch 042 | Train Loss: 1.608994 | Val Loss: 2.629998 | MRR: 0.934486 | Recall-1: 0.893280 | LR: 5.00e-03
üíæ Saved new best model (MRR=0.934486)




Epoch 043 | Train Loss: 1.583612 | Val Loss: 2.629712 | MRR: 0.933864 | Recall-1: 0.891760 | LR: 5.00e-03




Epoch 044 | Train Loss: 1.572272 | Val Loss: 2.629180 | MRR: 0.933168 | Recall-1: 0.890640 | LR: 5.00e-03




Epoch 045 | Train Loss: 1.553927 | Val Loss: 2.625961 | MRR: 0.934093 | Recall-1: 0.892640 | LR: 5.00e-03




Epoch 046 | Train Loss: 1.533664 | Val Loss: 2.627910 | MRR: 0.932956 | Recall-1: 0.890120 | LR: 2.50e-03




Epoch 047 | Train Loss: 1.489684 | Val Loss: 2.609144 | MRR: 0.934330 | Recall-1: 0.892760 | LR: 2.50e-03




Epoch 048 | Train Loss: 1.455796 | Val Loss: 2.605987 | MRR: 0.934309 | Recall-1: 0.892680 | LR: 2.50e-03




Epoch 049 | Train Loss: 1.438004 | Val Loss: 2.611739 | MRR: 0.934318 | Recall-1: 0.893200 | LR: 2.50e-03




Epoch 050 | Train Loss: 1.425462 | Val Loss: 2.606436 | MRR: 0.935082 | Recall-1: 0.894000 | LR: 1.25e-03
üíæ Saved new best model (MRR=0.935082)




Epoch 051 | Train Loss: 1.403052 | Val Loss: 2.603337 | MRR: 0.935138 | Recall-1: 0.894280 | LR: 1.25e-03
üíæ Saved new best model (MRR=0.935138)




Epoch 052 | Train Loss: 1.386938 | Val Loss: 2.602827 | MRR: 0.934728 | Recall-1: 0.893440 | LR: 1.25e-03




Epoch 053 | Train Loss: 1.375963 | Val Loss: 2.600435 | MRR: 0.935170 | Recall-1: 0.894360 | LR: 1.25e-03
üíæ Saved new best model (MRR=0.935170)




Epoch 054 | Train Loss: 1.368912 | Val Loss: 2.597088 | MRR: 0.935170 | Recall-1: 0.894320 | LR: 6.25e-04
üíæ Saved new best model (MRR=0.935170)




Epoch 055 | Train Loss: 1.362469 | Val Loss: 2.596866 | MRR: 0.935080 | Recall-1: 0.894280 | LR: 6.25e-04




Epoch 056 | Train Loss: 1.354972 | Val Loss: 2.594749 | MRR: 0.935132 | Recall-1: 0.894320 | LR: 6.25e-04




Epoch 057 | Train Loss: 1.350103 | Val Loss: 2.594714 | MRR: 0.935589 | Recall-1: 0.895040 | LR: 6.25e-04
üíæ Saved new best model (MRR=0.935589)




Epoch 058 | Train Loss: 1.340906 | Val Loss: 2.593851 | MRR: 0.935220 | Recall-1: 0.894360 | LR: 6.25e-04




Epoch 059 | Train Loss: 1.334650 | Val Loss: 2.594682 | MRR: 0.935115 | Recall-1: 0.894240 | LR: 6.25e-04




Epoch 060 | Train Loss: 1.331243 | Val Loss: 2.594390 | MRR: 0.935098 | Recall-1: 0.894280 | LR: 6.25e-04




Epoch 061 | Train Loss: 1.332329 | Val Loss: 2.594345 | MRR: 0.935211 | Recall-1: 0.894480 | LR: 3.13e-04




Epoch 062 | Train Loss: 1.334382 | Val Loss: 2.594035 | MRR: 0.935417 | Recall-1: 0.894880 | LR: 3.13e-04




Epoch 063 | Train Loss: 1.324441 | Val Loss: 2.594162 | MRR: 0.935205 | Recall-1: 0.894400 | LR: 3.13e-04




Epoch 064 | Train Loss: 1.317096 | Val Loss: 2.594263 | MRR: 0.935188 | Recall-1: 0.894560 | LR: 3.13e-04




Epoch 065 | Train Loss: 1.316380 | Val Loss: 2.594504 | MRR: 0.935431 | Recall-1: 0.894920 | LR: 1.56e-04




Epoch 066 | Train Loss: 1.314107 | Val Loss: 2.594279 | MRR: 0.935494 | Recall-1: 0.895040 | LR: 1.56e-04




Epoch 067 | Train Loss: 1.318336 | Val Loss: 2.593910 | MRR: 0.935410 | Recall-1: 0.894840 | LR: 1.56e-04
‚èπ Early stopping triggered.
‚úÖ Training complete. Best MRR: 0.935589
Finished training. Now testing using best model...
Test Results: {'mrr': np.float64(0.9355885110160322), 'ndcg': np.float64(0.951342610965915), 'recall_at_1': 0.89504, 'recall_at_3': 0.9736, 'recall_at_5': 0.9854, 'recall_at_10': 0.99368, 'recall_at_50': 0.99924, 'l2_dist': 240.8096160888672}


In [10]:
generate_submission(model, Path(test_path), output_file="davdav.csv", device=device)

Generating submission file...
‚úì Saved submission to davdav.csv


Unnamed: 0,id,embedding
0,1,"[0.7081302404403687, 0.3333854675292969, 1.504..."
1,2,"[-2.326406478881836, 1.227230191230774, 5.6908..."
2,3,"[-2.0953173637390137, -1.3478784561157227, 2.4..."
3,4,"[8.900824546813965, -6.391790390014648, -10.33..."
4,5,"[9.878498077392578, 12.09605598449707, 3.22351..."
...,...,...
1495,1496,"[0.020430684089660645, 2.2903895378112793, 10...."
1496,1497,"[4.134548664093018, 8.464696884155273, 9.58650..."
1497,1498,"[5.980518341064453, -4.8062357902526855, 10.85..."
1498,1499,"[4.560214996337891, 5.826842308044434, -3.1341..."


In [None]:
torch.save(model.state_dict(), "model_weights.pth")

In [None]:
state = torch.load("model_weights.pth")
model.load_state_dict(state)
print("Test Results:", results)

Test Results: {'mrr': np.float64(0.9299668128234295), 'ndcg': np.float64(0.9471116568627964), 'recall_at_1': 0.8854, 'recall_at_3': 0.97124, 'recall_at_5': 0.98416, 'recall_at_10': 0.993, 'recall_at_50': 0.9994, 'l2_dist': 189.603515625}
