<a href="https://colab.research.google.com/github/stavco9/rs-final-project/blob/main/recsys2022/Models_for_ensemble/7b_TwoTowers_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
just_checking_integrity=False
rows=3000
test_rows=1000

In [2]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
import numpy as np
import datetime
from itertools import compress
from math import sin, cos
import ast
import gc
import os
import scipy.sparse as sps
import keras.models
from tensorflow.python.keras.metrics import MeanMetricWrapper
from tqdm import tqdm

In [3]:
from tqdm.auto import tqdm
tqdm.pandas()

# Preparation

In [104]:
from google.colab import drive
drive.mount('/content/drive')
%cd "/content/drive/My Drive/IDC_MSc/Year1/ReccomendationSystems/Final_Project/"
#%cd "/content/drive/My Drive/recommendation_systems/Final_Project/"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/My Drive/IDC_MSc/Year1/ReccomendationSystems/Final_Project


In [5]:
def is_reseen(x):
    #print(x)
    seen={}
    res=[]
    for item in x:
        if item in seen:
            res.append(seen[item])
            seen[item]+=1
        else:
            res.append(0)
            seen[item]=1
    return res

def percentage_seen(x):
    size=len(x)
    return [(i+1)/size for i in range(size)]

def similarity_with_preceding(x):
    if len(x)==1:
        return np.array([0])
    feats=embedding_weights[x]
    #normalize
    feats=np.divide(feats,np.linalg.norm(feats,axis=1).reshape((-1,1)))
    feats_copy=feats.copy()
    feats_copy=np.roll(feats_copy,1,axis=0)
    feats_copy[0,:]=0
    prod=np.multiply(feats,feats_copy).sum(axis=1)
    return prod

def similarity_feats_with_preceding(x):
    if len(x)==1:
        return np.array([0])
    feats=ICM[x].toarray()
    #normalize
    feats=np.divide(feats,np.linalg.norm(feats,axis=1).reshape((-1,1)))
    feats_copy=feats.copy()
    feats_copy=np.roll(feats_copy,1,axis=0)
    feats_copy[0,:]=0
    prod=np.multiply(feats,feats_copy).sum(axis=1)
    return prod

In [6]:
static_features = [
                   'date_hour_sin',
                   'date_hour_cos',
                   'date_day_sin',
                   'date_day_cos',
                   'date_month_sin',
                   'date_month_cos',
                   'date_hour_sin_ending',
                   'date_hour_cos_ending',
                   'date_day_sin_ending',
                   'date_day_cos_ending',
                   'date_month_sin_ending',
                   'date_month_cos_ending',
                   'date_year_2020',
                   'length_of_session_seconds',
                   'avg_time_spent_per_item_seconds',
                   'variance_time_spent_per_item_seconds',
                   'n_seen_items',
                   'n_unique_items',
                   'user_went_afk',
                   'is_weekend',
                   'is_hot_hour',
                   'is_night',
                   'is_christmas_time',
                   'is_black_friday',
                   'session_similarity',
                   'session_similarity_uniques',
                   'session_similarity_feats',
                   'session_similarity_feats_uniques'
]
item_related_features = ['timedelta']

In [12]:
embedding_weights = np.load(
    './dataset/processed_data/compressed_features.npy'
    )

def get_ICM(files_directory="./dataset/processed_data"):
    df_icm = pd.read_csv(filepath_or_buffer=os.path.join(files_directory, 'simplified_features_and_categories_30.csv'), sep=',', header=0)

    item_id_list = df_icm['item_id'].values
    feat_id_list = df_icm['feature_idx'].values
    rating_id_list = np.ones_like(feat_id_list)
    ICM_matrix = sps.csr_matrix((rating_id_list, (item_id_list, feat_id_list)))
    return ICM_matrix

ICM=get_ICM()
embedding_weights.shape

(23692, 64)

# Load best model

In [7]:
def mrr_top_at(at=100):
    @tf.function
    def mrr_top(y_true,y_pred):
        top_k=tf.math.top_k(y_pred,k=at)
        rr=top_k.indices
        #print(rr)
        idx=tf.range(start=1,limit=at+1,delta=1)
        y_true=tf.reshape(y_true,[-1,1])
        #print(y_true)
        #y_true=tf.broadcast_to(y_true,test.shape)
        y_true=tf.cast(y_true,tf.int32)
        ranking=tf.where(tf.math.equal(rr,y_true),idx,0)
        #print(ranking)
        ranking=tf.reduce_sum(ranking,axis=-1)
        ranking=tf.where(ranking>0,1/ranking,0)
        #print(ranking)
        ranking=tf.reduce_mean(ranking)
        return ranking
    return mrr_top

@keras.saving.register_keras_serializable(package="CustomMetrics")
class MRRTopAt(MeanMetricWrapper):
    def __init__(self, at=100, **kwargs):
        super(MRRTopAt, self).__init__(mrr_top_at(at=100), **kwargs)


@keras.saving.register_keras_serializable(package="CustomLayers")
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1, **kwargs):
        super(TransformerBlock, self).__init__(**kwargs)
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tf.keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.ff_dim = ff_dim
        self.rate = rate

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

    def get_config(self):
        base_config = super().get_config()
        config = {
            "embed_dim": self.embed_dim,
            "num_heads": self.num_heads,
            "ff_dim": self.ff_dim,
            "rate": self.rate
        }
        return {**base_config, **config}

    @classmethod
    def from_config(cls, config):
        embed_dim = config.pop("embed_dim")
        num_heads = config.pop("num_heads")
        ff_dim = config.pop("ff_dim")
        rate = config.pop("rate")
        return cls(embed_dim, num_heads, ff_dim, rate, **config)

@keras.saving.register_keras_serializable(package="CustomLayers")
class TokenAndPositionEmbedding(layers.Layer):
    def __init__(
        self,
        maxlen,
        vocab_size,
        embed_dim,
        item_embedding_trainable=True,
        embedding_weights=None,
        **kwargs
        ):
        super(TokenAndPositionEmbedding, self).__init__(**kwargs)
        self.token_emb = layers.Embedding(
            input_dim=vocab_size,
            output_dim=embed_dim,
            trainable=item_embedding_trainable,
            weights=embedding_weights
            )
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)
        self.maxlen = maxlen
        self.vocab_size = vocab_size
        self.embed_dim = embed_dim
        self.item_embedding_trainable = item_embedding_trainable
        self.embedding_weights = embedding_weights

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=maxlen-1, limit=0, delta=-1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

    def get_config(self):
        base_config = super().get_config()
        config = {
            "maxlen": self.maxlen,
            "vocab_size": self.vocab_size,
            "embed_dim": self.embed_dim,
            "item_embedding_trainable": self.item_embedding_trainable,
            "embedding_weights": self.embedding_weights
        }
        return {**base_config, **config}

    @classmethod
    def from_config(cls, config):
        token_emb_config = config.pop("token_emb")
        pos_emb_config = config.pop("pos_emb")
        maxlen = config.pop("maxlen")
        vocab_size = config.pop("vocab_size")
        embed_dim = config.pop("embed_dim")
        item_embedding_trainable = config.pop("item_embedding_trainable")
        embedding_weights = config.pop("embedding_weights")
        return cls(maxlen, vocab_size, embed_dim, item_embedding_trainable, embedding_weights, **config)


@keras.saving.register_keras_serializable(package="CustomLayers")
class PositionEmbedding(layers.Layer):
    def __init__(
        self,
        maxlen,
        embed_dim,
        **kwargs
        ):
        super(PositionEmbedding, self).__init__(**kwargs)
        self.maxlen = maxlen
        self.embed_dim = embed_dim
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        return positions

    def get_config(self):
      base_config = super().get_config()
      config = {
          "maxlen": self.maxlen,
          "embed_dim": self.embed_dim
      }
      return {**base_config, **config}

    @classmethod
    def from_config(cls, config):
        maxlen = config.pop("maxlen")
        embed_dim = config.pop("embed_dim")
        return cls(maxlen, embed_dim, **config)

In [107]:
def load_model(file_name):
    directory = './models'
    print(f"Loading {file_name}...")
    model = tf.keras.models.load_model(
        f'{directory}/{file_name}.keras',
        safe_mode=False,
        custom_objects={
            "mrr_top": MRRTopAt(at=100),
            "TransformerBlock": TransformerBlock,
            "TokenAndPositionEmbedding": TokenAndPositionEmbedding,
            "PositionEmbedding": PositionEmbedding
      })
    print(f"Model {file_name} loaded")
    return model


selected_models = [
    '2towers_model_mlp2_towerS1_towerI1_heads5',
    '2towers_model_mlp2_towerS1_towerI1'
]

models_for_training = {}
for model_name in selected_models:
  models_for_training[model_name] = load_model(model_name)

Loading 2towers_model_mlp2_towerS1_towerI1_heads5...
Model 2towers_model_mlp2_towerS1_towerI1_heads5 loaded
Loading 2towers_model_mlp2_towerS1_towerI1...
Model 2towers_model_mlp2_towerS1_towerI1 loaded


# Functions for prediction and evaluation

In [108]:
def prepare_data(dataset_name):

    temp=pd.read_csv(f"./dataset/processed_data/macro_feats_NN_{dataset_name}.csv")
    temp["item_id"]=temp["item_id"].map(ast.literal_eval)
    temp["timedelta"]=temp["timedelta"].map(ast.literal_eval)
    test_leaderboard_sessions=temp
    gc.collect()

    x_reseen_test = tf.keras.preprocessing.sequence.pad_sequences(
        test_leaderboard_sessions['item_id'].apply(is_reseen),
        padding='post',
        maxlen=100,
    )

    x_percentage_seen_test = tf.keras.preprocessing.sequence.pad_sequences(
        test_leaderboard_sessions['item_id'].apply(percentage_seen),
        padding='post',
        maxlen=100,
    )

    x_simils_test = tf.keras.preprocessing.sequence.pad_sequences(
        test_leaderboard_sessions['item_id'].progress_apply(similarity_with_preceding
        ),
        padding='post',
        maxlen=100,
        dtype='float32',
    )
    x_simils_feats_test = tf.keras.preprocessing.sequence.pad_sequences(
        test_leaderboard_sessions['item_id'].progress_apply(similarity_feats_with_preceding
        ),
        padding='post',
        maxlen=100,
        dtype='float32',
    )

    for col in ['length_of_session_seconds','avg_time_spent_per_item_seconds','variance_time_spent_per_item_seconds']:
        test_leaderboard_sessions[col]=np.log10(test_leaderboard_sessions[col]+1)
    for col in ['n_seen_items','n_unique_items']:
        test_leaderboard_sessions[col]=test_leaderboard_sessions[col]/100

    x_test_leaderboard = tf.keras.preprocessing.sequence.pad_sequences(
        test_leaderboard_sessions['item_id'],
        padding='post',
        maxlen=100
    )
    #x_test_leaderboard[0]

    # padding
    for item_related_feature in item_related_features:
        print('Padding {}'.format(item_related_feature))
        x_test_item_related = tf.keras.preprocessing.sequence.pad_sequences(
            test_leaderboard_sessions[item_related_feature],
            dtype='float16',
            maxlen=100,
            padding='post'
          )

    if len(item_related_features) == 1:
        x_test_item_related = np.expand_dims(x_test_item_related, axis=-1)

    submission_set = tf.data.Dataset.from_tensor_slices(
        (
            test_leaderboard_sessions['session_id'],
        (
            x_test_leaderboard,
            x_reseen_test/100,
            x_percentage_seen_test,
            x_simils_test,
            x_simils_feats_test,
            x_test_item_related,
            test_leaderboard_sessions[static_features].to_numpy()
        )
        )
    ).batch(2048)
    submission_set.element_spec

    return submission_set

In [112]:
def process_data(submission_set, model_for_training):
    candidates=np.array([i+1 for i in range(4990)]) # this is for the test month

    submission_df = pd.DataFrame(
        {'session_id':[], 'code':[], 'score':[], 'rank':[]}
    )

    for session_ids, sessions in tqdm(submission_set):
        predicted_scores=model_for_training(sessions).numpy()
        mask = np.ones(predicted_scores.shape[1], dtype=bool)
        mask[candidates] = False
        predicted_scores[...,mask]=-np.inf
        predicted_scores=tf.convert_to_tensor(predicted_scores)
        preds=tf.math.top_k(predicted_scores, k=100)
        scores=preds.values.numpy()
        code = preds.indices.numpy()
        prediction = pd.DataFrame(
            {
                'session_id':session_ids,
                'code':[tuple(v) for v in code],
                'score':[tuple(v) for v in scores],
                'rank':[tuple([rank for rank in range(1, 101)]) for id in session_ids]
            }
            )
        submission_df_tr = pd.concat([submission_df, prediction])

    # save
    # candidates_path = f'./dataset/candidates/NN/Transformer/{dataset_name}/'
    # if not os.path.exists(candidates_path):
    #     os.makedirs(candidates_path)
    # submission_df_tr.to_csv(candidates_path + '/candidates.csv', index=False)
    # read
    # candidates_path = f'./dataset/candidates/NN/Transformer/{dataset_name}/'
    # submission_df_tr=pd.read_csv(candidates_path + "/candidates.csv")

    # explode
    # submission_df_tr["score"]=submission_df_tr["score"].astype(np.int64).map(ast.literal_eval)
    # submission_df_tr["code"]=submission_df_tr["code"].map(ast.literal_eval)
    # submission_df_tr["rank"]=submission_df_tr["rank"].map(ast.literal_eval)
    submission_df_tr = submission_df_tr.explode(['code','score', 'rank'])
    submission_df_tr[["session_id","code","rank"]]=submission_df_tr[["session_id","code","rank"]].astype(int)
    submission_df_tr["score"]=submission_df_tr["score"].astype(float)
    submission_df_tr = submission_df_tr.rename(columns={"code":"Item_ID","session_id":"Session_Id"})

    return submission_df_tr

In [164]:
def merge_results(submission_model, bought):
    sessions=np.unique(submission_model["Session_Id"])
    train = bought[bought["session_id"].isin(sessions)]
    train = train.rename(columns={"item_id":"Item_ID","session_id":"Session_Id"})
    train["target"]=1

    #bought = bought.rename(columns={"item_id": "Item_ID", "session_id":"Session_Id"})
    #bought["target"]=1
    gc.collect()

    final_dataset=pd.merge(train, submission_model, on=["Session_Id","Item_ID"], how="left")
    final_dataset.fillna(100)
    final_dataset["target"]=final_dataset["target"].astype("int8")
    gc.collect()

    #sessions_containing_bought_among_candidates=final_dataset[["Session_Id","target"]].groupby("Session_Id").agg(sum).reset_index()
    #sessions_containing_bought_among_candidates=sessions_containing_bought_among_candidates.loc[sessions_containing_bought_among_candidates["target"]>0]
    #final_dataset=final_dataset[final_dataset['Session_Id'].isin(sessions_containing_bought_among_candidates["Session_Id"].tolist())]

    return final_dataset

In [205]:
def evaluate(dataset, mrr_at=100):
    metrics = {}
    purchased_samples = dataset[dataset["target"] == 1]
    hit_samples = purchased_samples[purchased_samples["rank"] <= mrr_at]
    hit_score = len(hit_samples) / len(bought)
    print(f"Hit score (rate): {hit_score}")
    metrics['Hit Score (rate)'] = hit_score

    #relevances_rank = dataset.groupby(['Session_Id', 'target'])['rank'].min()
    #ranks = relevances_rank.loc[:, 1]
    #reciprocal_ranks = 1 / (ranks)
    #mrr = reciprocal_ranks.mean()
    #print(f"MRR@{mrr_at} score: {mrr}")

    mrr = (1 / dataset.groupby(['Session_Id', 'target'])['rank'].min()).mean()
    print(f"MRR@{mrr_at} score: {mrr}")
    metrics[f'MRR@{mrr_at}'] = mrr

    NHITS=len(dataset['Session_Id'].unique())
    HITRATE=NHITS/81600 #approximate value

    print(f"normalized score (hit-rate={'%.6f' % HITRATE}): {mrr*HITRATE}")
    metrics[f"normalized score (hit-rate={'%.6f' % HITRATE})"] = mrr*HITRATE

    return metrics

# Leaderboard



In [219]:
dataset_name = 'leaderboard'

bought = pd.read_csv(f"./dataset/original_data/test_{dataset_name}_purchases.csv")
submission_set = prepare_data(dataset_name)

submission_df = {}

for model_name in selected_models:
    print(f"Processing {model_name}")
    submission_df[model_name] = process_data(submission_set, models_for_training[model_name])
    print(submission_df[model_name].shape)
    print(submission_df[model_name].head())

  0%|          | 0/50000 [00:00<?, ?it/s]

  0%|          | 0/50000 [00:00<?, ?it/s]

Padding timedelta
Processing 2towers_model_mlp2_towerS1_towerI1_heads5


  0%|          | 0/25 [00:00<?, ?it/s]

(84800, 4)
   Session_Id  Item_ID     score  rank
0     4366310     3746  0.056247     1
0     4366310     3904  0.026965     2
0     4366310     4347  0.022030     3
0     4366310     1879  0.006710     4
0     4366310      882  0.006598     5
Processing 2towers_model_mlp2_towerS1_towerI1


  0%|          | 0/25 [00:00<?, ?it/s]

(84800, 4)
   Session_Id  Item_ID     score  rank
0     4366310     2380  0.034569     1
0     4366310     4705  0.030799     2
0     4366310      594  0.011054     3
0     4366310      437  0.009560     4
0     4366310     4390  0.007797     5


In [220]:
datasets = {}

for model_name in selected_models:
  print(f"Merging {model_name} predicts with actual")

  datasets[model_name] = merge_results(submission_df[model_name], bought)
  print(datasets[model_name].shape)
  print(datasets[model_name].head())

Merging 2towers_model_mlp2_towerS1_towerI1_heads5 predicts with actual
(848, 6)
   Session_Id  Item_ID                     date  target  score  rank
0     4366310    23405   2021-06-07 07:42:38.98       1    NaN   NaN
1     4366375    13363  2021-06-19 05:56:35.982       1    NaN   NaN
2     4366550    11754   2021-06-11 22:53:44.71       1    NaN   NaN
3     4366563     6130  2021-06-21 23:15:13.657       1    NaN   NaN
4     4366608    26720  2021-06-05 21:24:26.868       1    NaN   NaN
Merging 2towers_model_mlp2_towerS1_towerI1 predicts with actual
(848, 6)
   Session_Id  Item_ID                     date  target  score  rank
0     4366310    23405   2021-06-07 07:42:38.98       1    NaN   NaN
1     4366375    13363  2021-06-19 05:56:35.982       1    NaN   NaN
2     4366550    11754   2021-06-11 22:53:44.71       1    NaN   NaN
3     4366563     6130  2021-06-21 23:15:13.657       1    NaN   NaN
4     4366608    26720  2021-06-05 21:24:26.868       1    NaN   NaN


In [221]:
metrics = {}

for model_name in selected_models:
  print(f"Evaluating {model_name}")

  metrics[model_name] = evaluate(datasets[model_name])

Evaluating 2towers_model_mlp2_towerS1_towerI1_heads5
Hit score (rate): 2e-05
MRR@100 score: 0.013157894736842105
normalized score (hit-rate=0.010392): 0.00013673890608875127
Evaluating 2towers_model_mlp2_towerS1_towerI1
Hit score (rate): 4e-05
MRR@100 score: 0.017884674683934627
normalized score (hit-rate=0.010392): 0.00018586034475461473


In [222]:
list_for_df = np.array([list(vals.values()) for vals in list(metrics.values())], dtype="O")
compression_table = pd.DataFrame.from_records(list_for_df.T, index=list(metrics.values())[0].keys(), columns=metrics.keys())

In [223]:
compression_table

Unnamed: 0,2towers_model_mlp2_towerS1_towerI1_heads5,2towers_model_mlp2_towerS1_towerI1
Hit Score (rate),2e-05,4e-05
MRR@100,0.013158,0.017885
normalized score (hit-rate=0.010392),0.000137,0.000186


# Final


In [214]:
dataset_name = 'final'

bought = pd.read_csv(f"./dataset/original_data/test_{dataset_name}_purchases.csv")
submission_set = prepare_data(dataset_name)

submission_df = {}

for model_name in selected_models:
    print(f"Processing {model_name}")
    submission_df[model_name] = process_data(submission_set, models_for_training[model_name])
    print(submission_df[model_name].shape)
    print(submission_df[model_name].head())

  0%|          | 0/50000 [00:00<?, ?it/s]

  0%|          | 0/50000 [00:00<?, ?it/s]

Padding timedelta
Processing 2towers_model_mlp2_towerS1_towerI1_heads5


  0%|          | 0/25 [00:00<?, ?it/s]

(84800, 4)
   Session_Id  Item_ID     score  rank
0     4362417     2587  0.049064     1
0     4362417     4741  0.032055     2
0     4362417     3746  0.028102     3
0     4362417     1556  0.011491     4
0     4362417      734  0.008164     5
Processing 2towers_model_mlp2_towerS1_towerI1


  0%|          | 0/25 [00:00<?, ?it/s]

(84800, 4)
   Session_Id  Item_ID     score  rank
0     4362417     2587  0.090685     1
0     4362417     1731  0.048648     2
0     4362417     3282  0.030115     3
0     4362417     4741  0.015664     4
0     4362417      578  0.014565     5


In [215]:
datasets = {}

for model_name in selected_models:
  print(f"Merging {model_name} predicts with actual")

  datasets[model_name] = merge_results(submission_df[model_name], bought)
  print(datasets[model_name].shape)
  print(datasets[model_name].head())

Merging 2towers_model_mlp2_towerS1_towerI1_heads5 predicts with actual
(848, 6)
   Session_Id  Item_ID                     date  target  score  rank
0     4362417    21868  2021-06-02 12:13:51.363       1    NaN   NaN
1     4362518     6373  2021-06-19 01:35:12.726       1    NaN   NaN
2     4362580    19882  2021-06-11 14:10:54.749       1    NaN   NaN
3     4362737     2342  2021-06-16 15:14:30.632       1    NaN   NaN
4     4362742     2123  2021-06-04 14:54:53.743       1    NaN   NaN
Merging 2towers_model_mlp2_towerS1_towerI1 predicts with actual
(848, 6)
   Session_Id  Item_ID                     date  target  score  rank
0     4362417    21868  2021-06-02 12:13:51.363       1    NaN   NaN
1     4362518     6373  2021-06-19 01:35:12.726       1    NaN   NaN
2     4362580    19882  2021-06-11 14:10:54.749       1    NaN   NaN
3     4362737     2342  2021-06-16 15:14:30.632       1    NaN   NaN
4     4362742     2123  2021-06-04 14:54:53.743       1    NaN   NaN


In [216]:
metrics = {}

for model_name in selected_models:
  print(f"Evaluating {model_name}")

  metrics[model_name] = evaluate(datasets[model_name])

Evaluating 2towers_model_mlp2_towerS1_towerI1_heads5
Hit score (rate): 0.0001
MRR@100 score: 0.025133032715958053
normalized score (hit-rate=0.010392): 0.00026118641842074055
Evaluating 2towers_model_mlp2_towerS1_towerI1
Hit score (rate): 4e-05
MRR@100 score: 0.016666666666666666
normalized score (hit-rate=0.010392): 0.00017320261437908495


In [217]:
list_for_df = np.array([list(vals.values()) for vals in list(metrics.values())], dtype="O")
compression_table = pd.DataFrame.from_records(list_for_df.T, index=list(metrics.values())[0].keys(), columns=metrics.keys())

In [218]:
compression_table

Unnamed: 0,2towers_model_mlp2_towerS1_towerI1_heads5,2towers_model_mlp2_towerS1_towerI1
Hit Score (rate),0.0001,4e-05
MRR@100,0.025133,0.016667
normalized score (hit-rate=0.010392),0.000261,0.000173
