In [82]:
# /kaggle/input/crawl-data/top_40_vi_en_v1.csv
# /kaggle/input/cross-lingual-cross-encoder/cross_encoder_en_vi_epoch3
import json
from datasets import Dataset
import pandas as pd
import numpy as np
train_path="/kaggle/input/crawl-data/train_crosslingual.jsonl"
test_path="/kaggle/input/crawl-data/test_crosslingual.jsonl"
eval_path="/kaggle/input/crawl-data/eval_crosslingual.jsonl"


In [91]:
import json
import torch
import pandas as pd
import numpy as np
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModel
from torch.nn.functional import cosine_similarity

def l2_normalize(x):
    return x / x.norm(p=2, dim=-1, keepdim=True)

def get_embedding(texts, model, tokenizer, device, max_length=256):
    inputs = tokenizer(texts, padding=True, truncation=True, max_length=max_length, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model(**inputs)
        if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
            embeddings = outputs.pooler_output
        else:
            embeddings = outputs.last_hidden_state[:, 0]
    return l2_normalize(embeddings)

def embedding_en_corpus(train_df, model, tokenizer, device, batch_size=64, max_length=256):
    all_contexts = set()
    for _, row in train_df.iterrows():
        pos_items = eval(row['en_pos']) if isinstance(row['en_pos'], str) else row['en_pos']
        neg_items = eval(row['en_neg']) if isinstance(row['en_neg'], str) else row['en_neg']
        all_contexts.update(pos_items)
        all_contexts.update(neg_items)
    all_contexts = list(all_contexts)

    print("Encoding EN corpus...")
    corpus_embeddings = []
    for i in range(0, len(all_contexts), batch_size):
        batch = all_contexts[i:i+batch_size]
        emb = get_embedding(batch, model, tokenizer, device, max_length)
        corpus_embeddings.append(emb.cpu())  # keep CPU for later cosine similarity
    corpus_embeddings = torch.cat(corpus_embeddings, dim=0)  # shape: (N, D)
    return all_contexts, corpus_embeddings

def embedding_vi_corpus(train_df, model, tokenizer, device, batch_size=64, max_length=256):
    all_contexts = set()
    for _, row in train_df.iterrows():
        pos_items = eval(row['vi_pos']) if isinstance(row['vi_pos'], str) else row['vi_pos']
        neg_items = eval(row['vi_neg']) if isinstance(row['vi_neg'], str) else row['vi_neg']
        all_contexts.update(pos_items)
        all_contexts.update(neg_items)
    all_contexts = list(all_contexts)

    print("Encoding VI corpus...")
    corpus_embeddings = []
    for i in range(0, len(all_contexts), batch_size):
        batch = all_contexts[i:i+batch_size]
        emb = get_embedding(batch, model, tokenizer, device, max_length)
        corpus_embeddings.append(emb.cpu())  # keep CPU for cosine sim
    corpus_embeddings = torch.cat(corpus_embeddings, dim=0)
    return all_contexts, corpus_embeddings

def vi_query_and_en_context(
    train_df,
    all_contexts,
    corpus_embeddings,
    model,
    tokenizer,
    device,
    top_k=10,
    max_length=256
):
    results = []
    corpus_embeddings = corpus_embeddings.to(device)

    for _, row in tqdm(train_df.iterrows(), total=len(train_df)):
        query = row['vi_query']
        pos_items = eval(row['en_pos']) if isinstance(row['en_pos'], str) else row['en_pos']
        pos_items = set(str(p).strip() for p in pos_items)

        q_emb = get_embedding([query], model, tokenizer, device, max_length)  # shape: (1, D)
        sims = cosine_similarity(q_emb, corpus_embeddings, dim=1)  # shape: (N,)
        top_indices = sims.topk(top_k).indices.cpu().tolist()
        top_contexts = [str(all_contexts[i]).strip() for i in top_indices]

        results.append({
            "query": query,
            "pos": list(pos_items),
            "top_k_pred": top_contexts
        })

    return pd.DataFrame(results)
def en_query_and_vi_context(
    train_df,
    all_contexts,
    corpus_embeddings,
    model,
    tokenizer,
    device,
    top_k=10,
    max_length=256
):
    results = []
    corpus_embeddings = corpus_embeddings.to(device)

    for _, row in tqdm(train_df.iterrows(), total=len(train_df)):
        query = row['en_query']
        pos_items = eval(row['vi_pos']) if isinstance(row['vi_pos'], str) else row['vi_pos']
        pos_items = set(str(p).strip() for p in pos_items)

        q_emb = get_embedding([query], model, tokenizer, device, max_length)  # shape: (1, D)
        sims = cosine_similarity(q_emb, corpus_embeddings, dim=1)  # shape: (N,)
        top_indices = sims.topk(top_k).indices.cpu().tolist()
        top_contexts = [str(all_contexts[i]).strip() for i in top_indices]

        results.append({
            "query": query,
            "pos": list(pos_items),
            "top_k_pred": top_contexts
        })

    return pd.DataFrame(results)
def vi_query_and_vi_context(
    train_df,
    all_contexts,
    corpus_embeddings,
    model,
    tokenizer,
    device,
    top_k=10,
    max_length=256
):
    results = []
    corpus_embeddings = corpus_embeddings.to(device)

    for _, row in tqdm(train_df.iterrows(), total=len(train_df)):
        query = row['vi_query']
        pos_items = eval(row['vi_pos']) if isinstance(row['vi_pos'], str) else row['vi_pos']
        pos_items = set(str(p).strip() for p in pos_items)

        q_emb = get_embedding([query], model, tokenizer, device, max_length)  # shape: (1, D)
        sims = cosine_similarity(q_emb, corpus_embeddings, dim=1)  # shape: (N,)
        top_indices = sims.topk(top_k).indices.cpu().tolist()
        top_contexts = [str(all_contexts[i]).strip() for i in top_indices]

        results.append({
            "query": query,
            "pos": list(pos_items),
            "top_k_pred": top_contexts
        })

    return pd.DataFrame(results)
def en_query_and_en_context(
    train_df,
    all_contexts,
    corpus_embeddings,
    model,
    tokenizer,
    device,
    top_k=10,
    max_length=256
):
    results = []
    corpus_embeddings = corpus_embeddings.to(device)

    for _, row in tqdm(train_df.iterrows(), total=len(train_df)):
        query = row['en_query']
        pos_items = eval(row['en_pos']) if isinstance(row['en_pos'], str) else row['en_pos']
        pos_items = set(str(p).strip() for p in pos_items)

        q_emb = get_embedding([query], model, tokenizer, device, max_length)  # shape: (1, D)
        sims = cosine_similarity(q_emb, corpus_embeddings, dim=1)  # shape: (N,)
        top_indices = sims.topk(top_k).indices.cpu().tolist()
        top_contexts = [str(all_contexts[i]).strip() for i in top_indices]

        results.append({
            "query": query,
            "pos": list(pos_items),
            "top_k_pred": top_contexts
        })

    return pd.DataFrame(results)

In [62]:
def compute_recall_mrr_multi_gt(df, k=10):
    acc1, acc5, acc10 = [], [], []
    recall10, mrr10 = [], []

    for _, row in df.iterrows():
        true_texts = set(str(x).strip() for x in row['pos'])
        pred_texts = [str(x).strip() for x in row['top_k_pred'][:k]]

        # ACC@k: ít nhất 1 đúng trong top-k
        acc1.append(1 if any(pred in true_texts for pred in pred_texts[:1]) else 0)
        acc5.append(1 if any(pred in true_texts for pred in pred_texts[:5]) else 0)
        acc10.append(1 if any(pred in true_texts for pred in pred_texts[:10]) else 0)

        # Recall@10: tỷ lệ ground-truth nằm trong top-10
        recall = len(set(pred_texts) & true_texts) / len(true_texts)
        recall10.append(recall)

        # MRR@10: reciprocal rank của đúng đầu tiên trong top-10
        mrr = 0
        for rank, pred in enumerate(pred_texts, start=1):
            if pred in true_texts:
                mrr = 1 / rank
                break
        mrr10.append(mrr)

    # Kết quả trung bình toàn bộ truy vấn
    print(f"Accuracy@1:  {np.mean(acc1):.4f}")
    print(f"Accuracy@5:  {np.mean(acc5):.4f}")
    print(f"Accuracy@10: {np.mean(acc10):.4f}")
    print(f"Recall@10:   {np.mean(recall10):.4f}")
    print(f"MRR@10:      {np.mean(mrr10):.4f}")

    return {
        "acc@1": np.mean(acc1),
        "acc@5": np.mean(acc5),
        "acc@10": np.mean(acc10),
        "recall@10": np.mean(recall10),
        "mrr@10": np.mean(mrr10),
    }

In [63]:
def compute_recall_mrr_multi_gt(df, k=10):
    acc1, acc5, acc_k = [], [], []
    recall_k, mrr_k = [], []

    for _, row in df.iterrows():
        true_texts = set(str(x).strip() for x in row['pos'])
        pred_texts = [str(x).strip() for x in row['top_k_pred'][:k]]

        # ACC@1
        acc1.append(1 if any(pred in true_texts for pred in pred_texts[:1]) else 0)

        # ACC@5
        acc5.append(1 if any(pred in true_texts for pred in pred_texts[:5]) else 0)

        # ACC@k
        acc_k.append(1 if any(pred in true_texts for pred in pred_texts) else 0)

        # Recall@k
        recall = len(set(pred_texts) & true_texts) / len(true_texts)
        recall_k.append(recall)

        # MRR@k
        mrr = 0
        for rank, pred in enumerate(pred_texts, start=1):
            if pred in true_texts:
                mrr = 1 / rank
                break
        mrr_k.append(mrr)

    # In kết quả
    print(f"Accuracy@1:  {np.mean(acc1):.4f}")
    print(f"Accuracy@5:  {np.mean(acc5):.4f}")
    print(f"Accuracy@{k}: {np.mean(acc_k):.4f}")
    print(f"Recall@{k}:   {np.mean(recall_k):.4f}")
    print(f"MRR@{k}:      {np.mean(mrr_k):.4f}")

    return {
        "acc@1": np.mean(acc1),
        "acc@5": np.mean(acc5),
        f"acc@{k}": np.mean(acc_k),
        f"recall@{k}": np.mean(recall_k),
        f"mrr@{k}": np.mean(mrr_k),
    }


In [37]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_name = "BAAI/bge-m3"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name).to(device)
model.eval()

XLMRobertaModel(
  (embeddings): XLMRobertaEmbeddings(
    (word_embeddings): Embedding(250002, 1024, padding_idx=1)
    (position_embeddings): Embedding(8194, 1024, padding_idx=1)
    (token_type_embeddings): Embedding(1, 1024)
    (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): XLMRobertaEncoder(
    (layer): ModuleList(
      (0-23): 24 x XLMRobertaLayer(
        (attention): XLMRobertaAttention(
          (self): XLMRobertaSdpaSelfAttention(
            (query): Linear(in_features=1024, out_features=1024, bias=True)
            (key): Linear(in_features=1024, out_features=1024, bias=True)
            (value): Linear(in_features=1024, out_features=1024, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): XLMRobertaSelfOutput(
            (dense): Linear(in_features=1024, out_features=1024, bias=True)
            (LayerNorm): LayerNorm((1024,), eps=1e-05, elem

In [83]:
model_name = "BAAI/bge-m3"
checkpoint_path = "/kaggle/input/checkpoint2/checkpoint/loss1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name).to(device)
train_df = pd.read_json(train_path, lines=True)
vi_contexts, vi_corpus_embeddings = embedding_vi_corpus(train_df, model, tokenizer, device)
en_contexts, en_corpus_embeddings = embedding_en_corpus(train_df, model, tokenizer, device)

# Predict top-10


🔄 Encoding VI corpus...


100%|██████████| 109/109 [02:46<00:00,  1.53s/it]


🔄 Encoding EN corpus...


100%|██████████| 112/112 [02:51<00:00,  1.53s/it]


In [84]:

vi_query_and_en_context_top_40 = vi_query_and_en_context(
    train_df=train_df,
    all_contexts=en_contexts,
    corpus_embeddings=en_corpus_embeddings,
    model=model,
    tokenizer=tokenizer,
    device=device,
    top_k=40
)
vi_query_and_vi_context_top_40 = vi_query_and_vi_context(
    train_df=train_df,
    all_contexts=vi_contexts,
    corpus_embeddings=vi_corpus_embeddings,
    model=model,
    tokenizer=tokenizer,
    device=device,
    top_k=40
)
en_query_and_vi_context_top_40 = en_query_and_vi_context(
    train_df=train_df,
    all_contexts=vi_contexts,
    corpus_embeddings=vi_corpus_embeddings,
    model=model,
    tokenizer=tokenizer,
    device=device,
    top_k=40
)
en_query_and_en_context_top_40 = en_query_and_en_context(
    train_df=train_df,
    all_contexts=en_contexts,
    corpus_embeddings=en_corpus_embeddings,
    model=model,
    tokenizer=tokenizer,
    device=device,
    top_k=40
)

100%|██████████| 4000/4000 [01:22<00:00, 48.31it/s]
100%|██████████| 4000/4000 [01:22<00:00, 48.31it/s]
100%|██████████| 4000/4000 [01:19<00:00, 50.53it/s]
100%|██████████| 4000/4000 [01:20<00:00, 49.83it/s]


In [85]:
compute_recall_mrr_multi_gt(vi_query_and_en_context_top_40, k=10)

Accuracy@1:  0.2845
Accuracy@5:  0.4677
Accuracy@10: 0.5400
Recall@10:   0.5400
MRR@10:      0.3634


{'acc@1': 0.2845,
 'acc@5': 0.46775,
 'acc@10': 0.54,
 'recall@10': 0.54,
 'mrr@10': 0.36339613095238094}

In [86]:
compute_recall_mrr_multi_gt(en_query_and_vi_context_top_40, k=10)

Accuracy@1:  0.3050
Accuracy@5:  0.4770
Accuracy@10: 0.5577
Recall@10:   0.5577
MRR@10:      0.3811


{'acc@1': 0.305,
 'acc@5': 0.477,
 'acc@10': 0.55775,
 'recall@10': 0.55775,
 'mrr@10': 0.3810831349206349}

In [87]:
compute_recall_mrr_multi_gt(en_query_and_en_context_top_40, k=10)

Accuracy@1:  0.3610
Accuracy@5:  0.5265
Accuracy@10: 0.5925
Recall@10:   0.5925
MRR@10:      0.4324


{'acc@1': 0.361,
 'acc@5': 0.5265,
 'acc@10': 0.5925,
 'recall@10': 0.5925,
 'mrr@10': 0.43239563492063493}

In [88]:
compute_recall_mrr_multi_gt(vi_query_and_vi_context_top_40, k=10)

Accuracy@1:  0.4820
Accuracy@5:  0.6418
Accuracy@10: 0.7060
Recall@10:   0.7060
MRR@10:      0.5513


{'acc@1': 0.482,
 'acc@5': 0.64175,
 'acc@10': 0.706,
 'recall@10': 0.706,
 'mrr@10': 0.55131875}

In [93]:
import ast
import pandas as pd
from tqdm import tqdm
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
model_name = "BAAI/bge-m3"
checkpoint_path = "/kaggle/input/checkpoint3/checkpoint/loss1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(checkpoint_path).to(device)
train_df = pd.read_json(test_path, lines=True)
vi_contexts, vi_corpus_embeddings = embedding_vi_corpus(train_df, model, tokenizer, device)
en_contexts, en_corpus_embeddings = embedding_en_corpus(train_df, model, tokenizer, device)
vi_query_and_en_context_top_40 = vi_query_and_en_context(
    train_df=train_df,
    all_contexts=en_contexts,
    corpus_embeddings=en_corpus_embeddings,
    model=model,
    tokenizer=tokenizer,
    device=device,
    top_k=40
)
vi_query_and_vi_context_top_40 = vi_query_and_vi_context(
    train_df=train_df,
    all_contexts=vi_contexts,
    corpus_embeddings=vi_corpus_embeddings,
    model=model,
    tokenizer=tokenizer,
    device=device,
    top_k=40
)
en_query_and_vi_context_top_40 = en_query_and_vi_context(
    train_df=train_df,
    all_contexts=vi_contexts,
    corpus_embeddings=vi_corpus_embeddings,
    model=model,
    tokenizer=tokenizer,
    device=device,
    top_k=40
)
en_query_and_en_context_top_40 = en_query_and_en_context(
    train_df=train_df,
    all_contexts=en_contexts,
    corpus_embeddings=en_corpus_embeddings,
    model=model,
    tokenizer=tokenizer,
    device=device,
    top_k=40
)

ce_model_name = "/kaggle/input/cross-lingual-cross-encoder/cross_encoder_epoch2" # "BAAI/bge-reranker-v2-m3"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
ce_model = AutoModelForSequenceClassification.from_pretrained(ce_model_name).to(device)
ce_tokenizer = AutoTokenizer.from_pretrained(ce_model_name)
ce_model.eval()

def safe_list(s):
    return ast.literal_eval(s) if isinstance(s, str) else s

def rerank(query, contexts, model, tokenizer, device, max_length=256):
    pairs = [(query, ctx) for ctx in contexts]
    inputs = tokenizer(
        [q for q, c in pairs],
        [c for q, c in pairs],
        padding=True,
        truncation=True,
        max_length=max_length,
        return_tensors="pt"
    ).to(device)

    with torch.no_grad():
        logits = model(**inputs).logits
        scores = logits.squeeze()

    ranked = sorted(zip(contexts, scores.cpu().tolist()), key=lambda x: x[1], reverse=True)
    return [ctx for ctx, _ in ranked]

def rerank_top_k_results(df, model, tokenizer, device):
    reranked_rows = []
    for _, row in tqdm(df.iterrows(), total=len(df)):
        query = row["query"]
        pos = safe_list(row["pos"])
        top_k_pred = safe_list(row["top_k_pred"])

        reranked_top_k = rerank(query, top_k_pred, model, tokenizer, device)

        reranked_rows.append({
            "query": query,
            "pos": pos,
            "top_k_pred": reranked_top_k
        })

    return pd.DataFrame(reranked_rows)


Encoding VI corpus...
Encoding EN corpus...


100%|██████████| 500/500 [00:10<00:00, 48.56it/s]
100%|██████████| 500/500 [00:10<00:00, 48.49it/s]
100%|██████████| 500/500 [00:09<00:00, 51.87it/s]
100%|██████████| 500/500 [00:09<00:00, 51.31it/s]


In [94]:
reranked_vi_en = rerank_top_k_results(vi_query_and_en_context_top_40, ce_model, ce_tokenizer, device)
compute_recall_mrr_multi_gt(reranked_vi_en, k=10)
reranked_vi_vi = rerank_top_k_results(vi_query_and_vi_context_top_40, ce_model, ce_tokenizer, device)
compute_recall_mrr_multi_gt(reranked_vi_vi, k=10)
reranked_en_vi = rerank_top_k_results(en_query_and_vi_context_top_40, ce_model, ce_tokenizer, device)
compute_recall_mrr_multi_gt(reranked_en_vi, k=10)
reranked_en_en = rerank_top_k_results(en_query_and_en_context_top_40, ce_model, ce_tokenizer, device)
compute_recall_mrr_multi_gt(reranked_en_en, k=10)


100%|██████████| 500/500 [08:03<00:00,  1.03it/s]


Accuracy@1:  0.4400
Accuracy@5:  0.6540
Accuracy@10: 0.7220
Recall@10:   0.7220
MRR@10:      0.5271


100%|██████████| 500/500 [08:05<00:00,  1.03it/s]


Accuracy@1:  0.5640
Accuracy@5:  0.7420
Accuracy@10: 0.8120
Recall@10:   0.8120
MRR@10:      0.6419


100%|██████████| 500/500 [08:04<00:00,  1.03it/s]


Accuracy@1:  0.4340
Accuracy@5:  0.6320
Accuracy@10: 0.7080
Recall@10:   0.7080
MRR@10:      0.5185


100%|██████████| 500/500 [08:01<00:00,  1.04it/s]

Accuracy@1:  0.4960
Accuracy@5:  0.6520
Accuracy@10: 0.7200
Recall@10:   0.7200
MRR@10:      0.5635





{'acc@1': 0.496,
 'acc@5': 0.652,
 'acc@10': 0.72,
 'recall@10': 0.72,
 'mrr@10': 0.5635246031746033}

In [99]:
import ast
import pandas as pd
from tqdm import tqdm
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
model_name = "BAAI/bge-m3"
checkpoint_path = "/kaggle/input/cross-lingual/checkpoint/loss1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(checkpoint_path).to(device)
train_df = pd.read_json(test_path, lines=True)
vi_contexts, vi_corpus_embeddings = embedding_vi_corpus(train_df, model, tokenizer, device)
en_contexts, en_corpus_embeddings = embedding_en_corpus(train_df, model, tokenizer, device)
vi_query_and_en_context_top_40 = vi_query_and_en_context(
    train_df=train_df,
    all_contexts=en_contexts,
    corpus_embeddings=en_corpus_embeddings,
    model=model,
    tokenizer=tokenizer,
    device=device,
    top_k=40
)
vi_query_and_vi_context_top_40 = vi_query_and_vi_context(
    train_df=train_df,
    all_contexts=vi_contexts,
    corpus_embeddings=vi_corpus_embeddings,
    model=model,
    tokenizer=tokenizer,
    device=device,
    top_k=40
)
en_query_and_vi_context_top_40 = en_query_and_vi_context(
    train_df=train_df,
    all_contexts=vi_contexts,
    corpus_embeddings=vi_corpus_embeddings,
    model=model,
    tokenizer=tokenizer,
    device=device,
    top_k=40
)
en_query_and_en_context_top_40 = en_query_and_en_context(
    train_df=train_df,
    all_contexts=en_contexts,
    corpus_embeddings=en_corpus_embeddings,
    model=model,
    tokenizer=tokenizer,
    device=device,
    top_k=40
)

ce_model_name = "/kaggle/input/cross-lingual-cross-encoder/cross_encoder_epoch2" # "BAAI/bge-reranker-v2-m3"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
ce_model = AutoModelForSequenceClassification.from_pretrained(ce_model_name).to(device)
ce_tokenizer = AutoTokenizer.from_pretrained(ce_model_name)
ce_model.eval()

def safe_list(s):
    return ast.literal_eval(s) if isinstance(s, str) else s

def rerank(query, contexts, model, tokenizer, device, max_length=256):
    pairs = [(query, ctx) for ctx in contexts]
    inputs = tokenizer(
        [q for q, c in pairs],
        [c for q, c in pairs],
        padding=True,
        truncation=True,
        max_length=max_length,
        return_tensors="pt"
    ).to(device)

    with torch.no_grad():
        logits = model(**inputs).logits
        scores = logits.squeeze()

    ranked = sorted(zip(contexts, scores.cpu().tolist()), key=lambda x: x[1], reverse=True)
    return [ctx for ctx, _ in ranked]

def rerank_top_k_results(df, model, tokenizer, device):
    reranked_rows = []
    for _, row in tqdm(df.iterrows(), total=len(df)):
        query = row["query"]
        pos = safe_list(row["pos"])
        top_k_pred = safe_list(row["top_k_pred"])

        reranked_top_k = rerank(query, top_k_pred, model, tokenizer, device)

        reranked_rows.append({
            "query": query,
            "pos": pos,
            "top_k_pred": reranked_top_k
        })

    return pd.DataFrame(reranked_rows)


Encoding VI corpus...
Encoding EN corpus...


100%|██████████| 500/500 [00:10<00:00, 48.92it/s]
100%|██████████| 500/500 [00:10<00:00, 49.06it/s]
100%|██████████| 500/500 [00:09<00:00, 52.04it/s]
100%|██████████| 500/500 [00:09<00:00, 51.71it/s]


In [100]:
reranked_vi_en = rerank_top_k_results(vi_query_and_en_context_top_40, ce_model, ce_tokenizer, device)
compute_recall_mrr_multi_gt(reranked_vi_en, k=10)
reranked_vi_vi = rerank_top_k_results(vi_query_and_vi_context_top_40, ce_model, ce_tokenizer, device)
compute_recall_mrr_multi_gt(reranked_vi_vi, k=10)
reranked_en_vi = rerank_top_k_results(en_query_and_vi_context_top_40, ce_model, ce_tokenizer, device)
compute_recall_mrr_multi_gt(reranked_en_vi, k=10)
reranked_en_en = rerank_top_k_results(en_query_and_en_context_top_40, ce_model, ce_tokenizer, device)
compute_recall_mrr_multi_gt(reranked_en_en, k=10)


100%|██████████| 500/500 [08:02<00:00,  1.04it/s]


Accuracy@1:  0.4600
Accuracy@5:  0.6800
Accuracy@10: 0.7580
Recall@10:   0.7580
MRR@10:      0.5478


100%|██████████| 500/500 [08:04<00:00,  1.03it/s]


Accuracy@1:  0.5680
Accuracy@5:  0.7440
Accuracy@10: 0.8220
Recall@10:   0.8220
MRR@10:      0.6462


100%|██████████| 500/500 [08:03<00:00,  1.03it/s]


Accuracy@1:  0.4260
Accuracy@5:  0.6380
Accuracy@10: 0.7160
Recall@10:   0.7160
MRR@10:      0.5151


100%|██████████| 500/500 [08:00<00:00,  1.04it/s]

Accuracy@1:  0.4900
Accuracy@5:  0.6560
Accuracy@10: 0.7380
Recall@10:   0.7380
MRR@10:      0.5623





{'acc@1': 0.49,
 'acc@5': 0.656,
 'acc@10': 0.738,
 'recall@10': 0.738,
 'mrr@10': 0.5622793650793652}