## Imports

In [None]:
!pip install -q mteb
!pip install -q beir

In [2]:
import os
import sys
import math
import glob
import string
import random
import shutil
import warnings
import pathlib
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
from IPython.display import display

import matplotlib.pyplot as plt
import seaborn as sns

from tqdm.auto import tqdm
import gc

import torch
import torch.nn as nn
import torch.functional as F

from mteb import MTEB
from sentence_transformers import SentenceTransformer

## Configs

In [3]:
class Config:
    seed = 1111

In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
seed_everything(Config.seed)

In [5]:
models = {
    "GLoVe": lambda: SentenceTransformer("average_word_embeddings_glove.6B.300d"),
    "BERT": lambda: SentenceTransformer("bert-base-nli-mean-tokens"),
    "SentenceTransformer": lambda: SentenceTransformer("all-mpnet-base-v2")
}

tasks = {
    "Retrieval": ["QuoraRetrieval", "SciFact", "Touche2020"],
    "Classification": ["EmotionClassification", "CBD", "ImdbClassification"],
}

## Evaluation

In [None]:
history_retrieval = []
history_classification = []

for model_name, model_builder in models.items():
    model = model_builder()
    for task_type,tasks_names in tasks.items():
        for task in tasks_names:
            print(f"Evaluation {model_name} on {task} task")
            evaluation = MTEB(tasks=[task], eval_splits=["test"], overwrite_results=True, output_folder=f"./results/{model_name}")
            results = evaluation.run(model)
            if(task_type=="Retrieval"):
                history_retrieval.append({
                    "model_name": model_name,
                    "task": task,
                    "metrics": results[task]["test"]
                })
            else:
                history_classification.append({
                    "model_name": model_name,
                    "task": task,
                    "metrics": results[task]["test"]
                })
    del model

In [None]:
np.save('history_retrieval.npy', history_retrieval)
history_retrieval

In [None]:
np.save('history_retrieval.npy', history_retrieval)
np.save('history_classification.npy', history_classification)

In [None]:
history_retrieval_df = pd.DataFrame(history_retrieval)
history_classification_df = pd.DataFrame(history_classification)
for i in [1,3,5,10]:
    history_retrieval_df[f"map_at_{i}"] = history_retrieval_df["metrics"].apply(lambda x: x[f"map_at_{i}"])
    history_retrieval_df[f"ndcg_at_{i}"] = history_retrieval_df["metrics"].apply(lambda x: x[f"ndcg_at_{i}"])

history_classification_df["f1"] = history_classification_df["metrics"].apply(lambda x: x["f1"])
history_classification_df["accuracy"] = history_classification_df["metrics"].apply(lambda x: x["accuracy"])

history_classification_df = history_classification_df.drop(columns=["metrics"])
history_retrieval_df = history_retrieval_df.drop(columns=["metrics"])
display(history_retrieval_df,history_classification_df)

In [None]:
history_retrieval_df.to_csv("history_retrieval_df.csv", index=0)
history_classification_df.to_csv("history_classification_df.csv", index=0)