In [None]:
####### INSTALATION #######

!pip uninstall torch -y
!pip install torch==1.13.1
# !pip uninstall torch-scatter -y
# !pip uninstall torch-sparse -y
# !pip uninstall pyg-lib -y
# !pip uninstall git+https://github.com/pyg-team/pytorch_geometric.git -y
# !pip uninstall sentence_transformers -y

import os
import torch
os.environ['TORCH'] = torch.__version__
print(torch.__version__)

!pip uninstall torch-scatter torch-sparse torch-geometric torch-cluster  --y
!pip install torch-scatter -f https://data.pyg.org/whl/torch-{torch.__version__}.html
!pip install pyg-lib -f https://data.pyg.org/whl/nightly/torch-${TORCH}.html
!pip install git+https://github.com/pyg-team/pytorch_geometric.git
!pip install pandas
!pip install matplotlib
!pip install pyarrow fastparquet
!pip install transformers
!pip install lightfm
!pip install memory-profiler
# !pip install torch-sparse -f https://data.pyg.org/whl/torch-{torch.__version__}.html
# !pip install torch-cluster -f https://data.pyg.org/whl/torch-{torch.__version__}.html
# !pip install sentence_transformers==0.1.0

In [22]:
###### IMPORT #######
import numpy as np
import time
import pandas as pd
import random
import copy
# from neo4j import GraphDatabase
from torch_geometric.data import Data
import torch
from torch_geometric.nn import GCNConv
import torch.nn.functional as F
from tqdm import tqdm
from collections import defaultdict
import json
import multiprocessing
import matplotlib.pyplot as plt
from lightfm import LightFM
from lightfm.data import Dataset

from sklearn import preprocessing, feature_extraction, model_selection
from sklearn.metrics import mean_absolute_error, mean_squared_error
from torch_geometric.data import HeteroData
import torch_geometric.transforms as T
from torch_geometric.loader import LinkNeighborLoader
from torch_geometric.nn import SAGEConv, to_hetero
import torch.nn.functional as F
from sklearn.metrics import roc_auc_score
import re
from transformers import AutoTokenizer, AutoModel
import os
import pickle
from memory_profiler import profile
# from sentence_transformers import SentenceTransformer

In [23]:
########## SETUP ARGS ###########
possible_experiments = {
    0: 'full',
    1: 'diversity',
    2: 'ucsp',
    3: 'icsp',
    4: 'usparsity',
    5: 'isparsity',
    6: 'sBERT',
    7: 'TFIDF',
    8: 'remove_item_feat', 
    9: 'add_social_edges',
}
experiment = possible_experiments[0]

possible_modes = ['debug', 'experiment']
mode = possible_modes[0]

model_variants = ['gnn', 'pop', 'mfn']
len_interactions_to_consider = 100000
each_user_all2all_new_edges = 10 # when running sBERT, we have the embedding saved for 100k contracts
# model_variant_eval = model_variants[1]

dataset_mode = 'contract'



In [25]:
#### DATA LOADER ####
from torch_geometric.data import download_url, extract_zip
from torch import Tensor

def data_loader(ratings_df):
    unique_user_id = ratings_df['userId'].unique()
    unique_user_id = pd.DataFrame(data={
        'userId': unique_user_id,
        'mappedID': pd.RangeIndex(len(unique_user_id)),
    })
    # print("Mapping of user IDs to consecutive values:")
    # print("==========================================")
    # print(unique_user_id.head())

    unique_item_id = ratings_df['itemId'].unique()
    unique_item_id = pd.DataFrame(data={
        'itemId': unique_item_id,
        'mappedID': pd.RangeIndex(len(unique_item_id)),
    })
    # print("Mapping of item IDs to consecutive values:")
    # print("===========================================")
    # print(unique_item_id.head())

    ratings_user_id = pd.merge(ratings_df['userId'], unique_user_id,
                                left_on='userId', right_on='userId', how='left')
    ratings_user_id = torch.from_numpy(ratings_user_id['mappedID'].values)
    ratings_item_id = pd.merge(ratings_df['itemId'], unique_item_id,
                                left_on='itemId', right_on='itemId', how='left')
    ratings_item_id = torch.from_numpy(ratings_item_id['mappedID'].values)
    edge_index_user_to_item = torch.stack([ratings_user_id, ratings_item_id], dim=0)
    # print()
    # print("Final edge indices pointing from users to items:")
    # print("=================================================")
    # print(edge_index_user_to_item)
    return unique_user_id, unique_item_id, edge_index_user_to_item

def movie_loader():
    url = 'https://files.grouplens.org/datasets/movielens/ml-latest-small.zip'
    extract_zip(download_url(url, '.'), '.')
    movies_path = './ml-latest-small/movies.csv'
    ratings_path = './ml-latest-small/ratings.csv'
    items_ratings_df = pd.read_csv(ratings_path)
    items_ratings_df = items_ratings_df.rename(columns={'movieId': 'itemId'})
    unique_user_id, unique_item_id, edge_index_user_to_item = data_loader(items_ratings_df)
    items_df = pd.read_csv(movies_path)
    items_df = items_df.rename(columns={'movieId': 'itemId', 'title': 'name'})
    items_df = pd.merge(items_df, unique_item_id, on='itemId', how='left')
    items_df = items_df.sort_values('mappedID') # (Just the last 20 movies have NaN mappedId)
    genres = items_df['genres'].str.get_dummies('|')
    print(genres[["Action", "Adventure", "Drama", "Horror"]].head())
    item_feat = torch.from_numpy(genres.values).to(torch.float)
    assert item_feat.size() == (9742, 20)  # 20 genres in total.
    return unique_user_id, unique_item_id, edge_index_user_to_item, items_df, item_feat, items_ratings_df

import pandas as pd
import numpy as np
import torch

def movie_loader_sparse(k):
    # Load Data
    url = 'https://files.grouplens.org/datasets/movielens/ml-latest-small.zip'
    extract_zip(download_url(url, '.'), '.')
    movies_path = './ml-latest-small/movies.csv'
    ratings_path = './ml-latest-small/ratings.csv'

    # Read and rename columns
    items_ratings_df = pd.read_csv(ratings_path)
    items_ratings_df = items_ratings_df.rename(columns={'movieId': 'itemId'})
    items_df = pd.read_csv(movies_path)
    items_df = items_df.rename(columns={'movieId': 'itemId', 'title': 'name'})

    # Select k% of each user's ratings
    items_ratings_df = items_ratings_df.groupby('userId').apply(lambda x: x.sample(frac=k/100)).reset_index(drop=True)

    # Keep only items present in the filtered ratings
    valid_item_ids = items_ratings_df['itemId'].unique()
    items_df = items_df[items_df['itemId'].isin(valid_item_ids)]

    # Recompute unique_user_id, unique_item_id, edge_index_user_to_item
    unique_user_id, unique_item_id, edge_index_user_to_item = data_loader(items_ratings_df)

    # Merge and sort items data
    items_df = pd.merge(items_df, unique_item_id, on='itemId', how='left')
    items_df = items_df.sort_values('mappedID')

    # Process genres and create item features
    genres = items_df['genres'].str.get_dummies('|')
    item_feat = torch.from_numpy(genres.values).to(torch.float)

    # Ensure the item feature size is as expected
    assert item_feat.size() == (len(valid_item_ids), 20)  # 20 genres in total.

    return unique_user_id, unique_item_id, edge_index_user_to_item, items_df, item_feat, items_ratings_df


def contract_loader():
    items_ratings_df = pd.read_parquet('dataset/user_contract_rating.parquet')
    items_ratings_df = items_ratings_df[:len_interactions_to_consider] if mode == 'debug' else items_ratings_df #$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

    def calculate_sparcity_value(df):
        num_users = df['user'].nunique()
        num_items = df['item'].nunique()
        num_interactions = len(df)
        total_possible_interactions = num_users * num_items / 100
        sparsity = 1 - (num_interactions / total_possible_interactions)
        return sparsity
    
    def filter_interactions(df, column, k):
        valid_entries = df[column].value_counts()
        valid_entries = valid_entries[valid_entries >= k]
        df = df[df[column].isin(valid_entries.index)]
        print(f'{column} sparcity value is:', calculate_sparcity_value(df))
        return df

    ########## SPARCITY EXPERIMENT ###########
    if experiment == 'usparsity':
        u = 1
        items_ratings_df = filter_interactions(items_ratings_df, 'user', u)
    elif experiment == 'isparsity':
        i = 20
        items_ratings_df = filter_interactions(items_ratings_df, 'item', i)

    items_df = {}
    items_df['name'] = items_ratings_df['item'].unique()
    items_df['itemId'], unique_names = pd.factorize(items_df['name'])
    # items_df['itemId'] = items_df['itemId'] + 1 #TODO test commenting this line didn't breal anything
    items_df = pd.DataFrame(items_df, columns=['itemId', 'name'])

    def get_item_feat_sbert(items_df):
        contract2comments = pd.read_parquet('dataset/contracts2comment.parquet')
        c2c_main_class = contract2comments[contract2comments['contract_name'] == contract2comments['class_name']]

        def reorder_text(text):
            lines = text.split("\n")
            notice_lines = [line for line in lines if "@notice" in line]
            other_lines = [line for line in lines if "@notice" not in line]
            reorderd_text = "\n".join(notice_lines + other_lines)
            return reorderd_text

        def preprocess_text(text):
            text = reorder_text(text)
            text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
            # Remove special characters, numbers, etc.
            text = re.sub(r'\W', ' ', text)
            # Remove extra spaces
            text = re.sub(r'\s+', ' ', text).strip()
            text = text[:512] if len(text) > 512 else text
            return text

        sentences = []
        for i, item in items_df.iterrows():
            comment_class = c2c_main_class[c2c_main_class['contract_name'] == item['name']]
            if not comment_class.empty and comment_class['class_documentation'].iloc[0] != '':
                sentences.append(comment_class['class_documentation'].iloc[0])
            else:
                class_names = contract2comments[contract2comments['contract_name'] == item['name']]['class_name']
                sentences.append(' '.join(class_names))

        preprocessed_sentences = [preprocess_text(sentence) for sentence in sentences]
        tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/bert-base-nli-mean-tokens")
        model = AutoModel.from_pretrained("sentence-transformers/bert-base-nli-mean-tokens")
        device = torch.device("cpu") #"cuda" if torch.cuda.is_available() else "cpu") # NOT enough GPU memory
        model = model.to(device)
        inputs = tokenizer(preprocessed_sentences, padding=True, truncation=True, return_tensors="pt", max_length=512)
        inputs = {key: tensor.to(device) for key, tensor in inputs.items()}
        with torch.no_grad():
            outputs = model(**inputs)
            embeddings = outputs.last_hidden_state.mean(dim=1)
        item_feat = embeddings
        # model = SentenceTransformer('sentence-transformers/distilbert-base-nli-mean-tokens')
        # embeddings = model.encode(preprocessed_sentences)
        
        return item_feat
    
    def get_item_feat_tfidf(items_df):
        contract_top_words_df = pd.read_parquet('dataset/contract_top_words.parquet')
        contract_top_words_df = contract_top_words_df.rename(columns={'contract_name': 'name'})
        contracts_df_top_words = items_df.merge(contract_top_words_df, on='name', how='left')
        contracts_df_top_words['keywords'] = contracts_df_top_words['keywords'].fillna('')
        items_df = contracts_df_top_words
        items_df.set_index('itemId', inplace=True)
        # f =5 # ratio to determine the number of top keywords selected for each contract to construct item_feat
        items_df['truncated_keywords'] = items_df['keywords'].apply(lambda x: ','.join(x.split(',')))
        X_df = items_df['truncated_keywords'].str.get_dummies(',')
        item_feat = torch.from_numpy(X_df.values).to(torch.float)
        return item_feat
    
    ########### SBERT EXPERIMENT ###########
    if experiment == 'TFIDF':
        # item_feat = get_item_feat_tfidf(items_df)
        item_feat = np.load('tfidf_embeddings_full.npy') # np.load('tfidf_embeddings_100k.npy')
        item_feat = torch.from_numpy(item_feat[:len(items_df['itemId'].unique())]).to(torch.float)
        
    else:
        # item_feat = get_item_feat_sbert(items_df)
        item_feat = np.load('sbert_embeddings_full.npy') #np.load('sbert_embeddings_100k.npy')
        item_feat = torch.from_numpy(item_feat[:len(items_df['itemId'].unique())]).to(torch.float)
        print(item_feat.shape)

    print('item feature tensor shape', item_feat.shape)
    items_ratings_df = items_ratings_df.rename(columns={'user': 'userId', 'item': 'itemId'})
    unique_user_id, unique_item_id, edge_index_user_to_item = data_loader(items_ratings_df)
    print('number of unique users', len(unique_user_id))
    print('number of unique items', len(unique_item_id))
    return unique_user_id, unique_item_id, edge_index_user_to_item, items_df, item_feat, items_ratings_df

loaders = {
    'contract_loader': contract_loader,
    'movie_loader': movie_loader,
}
#unique_user_id, unique_item_id, edge_index_user_to_item, items_df, item_feat, items_ratings_df = loaders[f'{dataset_mode}_loader']()


In [4]:
def calculate_sparsity(edge_index_user_to_item):
    # Determine the number of users and items
    num_users = edge_index_user_to_item[0].max() + 1  # Assuming user IDs start from 0
    num_items = edge_index_user_to_item[1].max() + 1  # Assuming item IDs start from 0

    # Calculate the number of interactions
    num_interactions = edge_index_user_to_item.shape[1]

    # Total possible interactions
    total_possible_interactions = num_users * num_items

    # Calculate sparsity
    sparsity = 1 - (num_interactions / total_possible_interactions)
    return sparsity

In [26]:
unique_user_id, unique_item_id, edge_index_user_to_item, items_df, item_feat, items_ratings_df = movie_loader_sparse(30)
print(len(unique_item_id))
print(len(edge_index_user_to_item[0])/len(unique_user_id))
calculate_sparsity(edge_index_user_to_item)
len(unique_user_id)

Using existing file ml-latest-small.zip
Extracting ./ml-latest-small.zip


6151
49.575409836065575


610

In [27]:
######### LINK BINARY PRED MODEL ##########
def train_test_generator(unique_user_id, item_feat, edge_index_user_to_item):  
    data = HeteroData()
    data["user"].node_id = torch.arange(len(unique_user_id))
    data["item"].node_id = torch.arange(item_feat.shape[0])
    data["item"].x = item_feat
    data["user", "rates", "item"].edge_index = edge_index_user_to_item
    data = T.ToUndirected()(data)

    transform = T.RandomLinkSplit(
        num_val=0,
        num_test=0.2,
        disjoint_train_ratio=0.3,
        neg_sampling_ratio=2,
        add_negative_train_samples=False,
        edge_types=("user", "rates", "item"),
        rev_edge_types=("item", "rev_rates", "user"), 
    )
    
    train_data, val_data, test_data = transform(data)
    return data, train_data, test_data

def GNN_recommender(data, train_data):

    # Define seed edges:
    print('1')
    edge_label_index = train_data["user", "rates", "item"].edge_label_index
    edge_label = train_data["user", "rates", "item"].edge_label
    print('2')
    train_loader = LinkNeighborLoader(
        data=train_data,
        num_neighbors=[20, 10],
        neg_sampling_ratio=2.0,
        edge_label_index=(("user", "rates", "item"), edge_label_index),
        edge_label=edge_label,
        batch_size=128,
        shuffle=True,
    )
    print('3')

    class GNN(torch.nn.Module):
        def __init__(self, hidden_channels):
            super().__init__()
            self.conv1 = SAGEConv(hidden_channels, hidden_channels)
            self.conv2 = SAGEConv(hidden_channels, hidden_channels)
        def forward(self, x: Tensor, edge_index: Tensor) -> Tensor:
            x = F.relu(self.conv1(x, edge_index))
            x = self.conv2(x, edge_index)
            return x
    # Our final classifier applies the dot-product between source and destination
    # node embeddings to drive edge-level predictions:
    class Classifier(torch.nn.Module):
        def forward(self, x_user: Tensor, x_item: Tensor, edge_label_index: Tensor) -> Tensor:
            edge_feat_user = x_user[edge_label_index[0]] # Convert node embeddings to edge-level representations:
            edge_feat_item = x_item[edge_label_index[1]]
            scores = (edge_feat_user * edge_feat_item).sum(dim=-1)
            return scores # Apply dot-product to get a prediction per supervision edge:
        
    class Model(torch.nn.Module):
        def __init__(self, hidden_channels):
            super().__init__()
            # Since the dataset does not come with rich features, we also learn two
            # embedding matrices for users and items:
            self.item_lin = torch.nn.Linear(item_feat.shape[1], hidden_channels)
            self.user_emb = torch.nn.Embedding(data["user"].num_nodes, hidden_channels)
            self.item_emb = torch.nn.Embedding(data["item"].num_nodes, hidden_channels)
            # Instantiate homogeneous GNN:
            self.gnn = GNN(hidden_channels)
            # Convert GNN model into a heterogeneous variant:
            self.gnn = to_hetero(self.gnn, metadata=data.metadata())
            self.classifier = Classifier()

        def forward(self, data: HeteroData) -> Tensor:
            x_dict = {
            "user": self.user_emb(data["user"].node_id),
            "item": self.item_lin(data["item"].x) + self.item_emb(data["item"].node_id),
            } 
            # `x_dict` holds feature matrices of all node types
            # `edge_index_dict` holds all edge indices of all edge types
            x_dict = self.gnn(x_dict, data.edge_index_dict)
            pred = self.classifier(
                x_dict["user"],
                x_dict["item"],
                data["user", "rates", "item"].edge_label_index,
            )
            return pred
            
    ########## TRAINING ##########
    model = Model(hidden_channels=64)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Device: '{device}'")
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    for epoch in range(1, 10):
        total_loss = total_examples = 0
        for sampled_data in tqdm(train_loader):
            optimizer.zero_grad()
            sampled_data.to(device)
            pred = model(sampled_data)
            ground_truth = sampled_data["user", "rates", "item"].edge_label
            loss = F.binary_cross_entropy_with_logits(pred, ground_truth)
            loss.backward()
            optimizer.step()
            total_loss += float(loss) * pred.numel()
            total_examples += pred.numel()

        # TODO: Add the val_loader, keep the best model
        print(f"Epoch: {epoch:03d}, Loss: {total_loss / total_examples:.4f}")

    ########## AUC EVAL VALIDATION #########
    # edge_label_index = val_data["user", "rates", "item"].edge_label_index
    # edge_label = val_data["user", "rates", "item"].edge_label
    # # val_data has neg samples in it
    # val_loader = LinkNeighborLoader(
    #     data=val_data,
    #     num_neighbors=[20, 10],
    #     edge_label_index=(("user", "rates", "item"), edge_label_index),
    #     edge_label=edge_label,
    #     batch_size=3 * 128,
    #     shuffle=False,
    # )
    # sampled_data = next(iter(val_loader))
    # preds = []
    # ground_truths = []
    # for sampled_data in tqdm(val_loader):
    #     with torch.no_grad():
    #         sampled_data.to(device)
    #         preds.append(model(sampled_data))
    #         ground_truths.append(sampled_data["user", "rates", "item"].edge_label)
    # pred = torch.cat(preds, dim=0).cpu().numpy()
    # ground_truth = torch.cat(ground_truths, dim=0).cpu().numpy()
    # auc = roc_auc_score(ground_truth, pred)
    # print()
    # print(f"Validation AUC: {auc:.4f}")
    # return data, train_data, val_data, train_loader, val_loader, ground_truth, pred, test_data, model
    return model

In [28]:
########## TRAIN TEST GENERAION ############

####### ITEM FEAT ABLATION EXPRIMENT ####### 
if experiment == 'ablation_item_feat':
    item_feat = torch.zeros_like(item_feat)

# ####### SOCIAL EDGES ABLEATION EXPERIMENT #######
def add_social_edges(edge_index_user_to_item, unique_user_id, unique_item_id, items_ratings_df, item_feat):
    unique_item_id = unique_item_id.copy()
    # Define the filename where the data will be saved
    filename = 'dataset/saved_social_edges_100k.pkl'

    # Check if the file exists
    if os.path.exists(filename):
        # If it does, load the data and return it
        with open(filename, 'rb') as f:
            unique_user_id, unique_item_id_w_users, edge_index_user_to_item, item_feat = pickle.load(f)
        print('Data loaded from file')
    else:
        user_transactions_df = pd.read_parquet('dataset/user_transactions.parquet')
        contract_addresses = pd.read_parquet('dataset/contract_addresses.parquet')
        contract_set = set(contract_addresses['address'])

        # Shifting item_ids
        edge_index_user_to_item[1] = edge_index_user_to_item[1] + len(edge_index_user_to_item[0].unique())
        unique_item_id['mappedID'] = unique_item_id['mappedID'] + len(edge_index_user_to_item[0].unique())
        #Adding user_ids to item_ids since now users can be an item too #TODO if the GNN performance turned to be bad, just add 'to' user addresses to both item_feat and unique_item_ids
        unique_item_id_w_users = pd.concat([unique_user_id.rename(columns={'userId': 'entityId'}), unique_item_id.rename(columns={'itemId': 'entityId'})], axis=0)
        user_feat = torch.zeros((len(edge_index_user_to_item[0].unique()), item_feat.shape[1]))
        item_feat= torch.cat([user_feat, item_feat], dim=0) # Why don't we adding item_feat to user_feat?

        # unique_item_id_w_users['type'] = 'user' or 'item'

        users = items_ratings_df['userId'].unique()

        print('edge index shape before adding social edges:', edge_index_user_to_item.shape)
        count = 0
        #note there is a 200k constraint, delete it
        for i, interaction in tqdm(user_transactions_df.iterrows(), total=len(user_transactions_df)):
            if interaction['from'] not in contract_set and interaction['to'] not in contract_set and interaction['from'] in users and interaction['to'] in users:
                if interaction['from'] == interaction['to']: continue
                from_user_id = unique_item_id_w_users[unique_item_id_w_users['entityId'] == interaction['from']]['mappedID'].iloc[0]
                to_user_id = unique_item_id_w_users[unique_item_id_w_users['entityId'] == interaction['to']]['mappedID'].iloc[0]
                social_edge = torch.tensor([[from_user_id], 
                                            [to_user_id]], dtype=torch.int64)
                edge_index_user_to_item = torch.cat([edge_index_user_to_item, social_edge], dim=1)
                # count += 1
                # if count % 5 == 0: break
        print('edge index shape after adding social edges:', edge_index_user_to_item.shape)
        del user_transactions_df
        del contract_addresses
        del contract_set
        import gc
        gc.collect()

        #uncomment below
        with open(filename, 'wb') as f:
            pickle.dump((unique_user_id, unique_item_id_w_users, edge_index_user_to_item, item_feat), f)
        print('social edges saved to dataset/saved_social_edges_100k.pkl')
    
    return unique_user_id, unique_item_id_w_users, edge_index_user_to_item, item_feat

if experiment == 'add_social_edges':
    # uncomment below after debuging
    unique_user_id_w_social, unique_item_id_w_social, edge_index_user_to_item_w_social, item_feat_w_social = add_social_edges(edge_index_user_to_item, unique_user_id, unique_item_id, items_ratings_df, item_feat)
    data, train_data, test_data = train_test_generator(unique_user_id_w_social, item_feat_w_social, edge_index_user_to_item_w_social)
else:
    data, train_data, test_data = train_test_generator(unique_user_id, item_feat, edge_index_user_to_item)



In [29]:
########## GNN TRAINING ############
#if model_mode == GNN run below
model_gnn = GNN_recommender(data, train_data)


1
2
3
Device: 'cuda'


100%|██████████| 57/57 [00:01<00:00, 55.32it/s]


Epoch: 001, Loss: 0.5937


100%|██████████| 57/57 [00:01<00:00, 46.47it/s]


Epoch: 002, Loss: 0.4601


100%|██████████| 57/57 [00:01<00:00, 46.50it/s]


Epoch: 003, Loss: 0.4065


100%|██████████| 57/57 [00:01<00:00, 46.68it/s]


Epoch: 004, Loss: 0.3760


100%|██████████| 57/57 [00:01<00:00, 46.80it/s]


Epoch: 005, Loss: 0.3530


100%|██████████| 57/57 [00:01<00:00, 46.56it/s]


Epoch: 006, Loss: 0.3396


100%|██████████| 57/57 [00:01<00:00, 46.46it/s]


Epoch: 007, Loss: 0.3324


100%|██████████| 57/57 [00:01<00:00, 46.41it/s]


Epoch: 008, Loss: 0.3159


100%|██████████| 57/57 [00:01<00:00, 46.37it/s]

Epoch: 009, Loss: 0.3042





In [None]:
####### CSP EXPRIMENTS #######
### CSP #### note: if the ratio==1, rerun from the first step
if experiment == 'ucsp' or experiment == 'icsp':
    def csp_test_gen(train_data, test_data, unique_data, entity_index, experiment_abbr):
        train_data_unique_entities = set(train_data['user', 'rates', 'item'].edge_label_index[entity_index].unique().numpy())
        unique_entities = set(unique_data['mappedID'].unique())
        entities_not_in_train = unique_entities - train_data_unique_entities
        mask = torch.tensor([entity in entities_not_in_train for entity in test_data["user", "rates", "item"].edge_label_index[entity_index].numpy()])
        
        test_data_filtered = copy.deepcopy(test_data)
        test_data_filtered["user", "rates", "item"].edge_label_index = test_data_filtered["user", "rates", "item"].edge_label_index[:, mask]
        test_data_filtered["user", "rates", "item"].edge_label = test_data_filtered["user", "rates", "item"].edge_label[mask]
        
        ratio = len(test_data_filtered["user", "rates", "item"].edge_label_index[entity_index]) / len(test_data["user", "rates", "item"].edge_label_index[entity_index])
        print(f'test to train ratio {experiment_abbr}', ratio)
        
        return test_data_filtered, ratio

    test_data_csp, test_to_train_ratio_csp = csp_test_gen(
        train_data, test_data, unique_user_id, 0 if experiment == 'ucsp' else 1, 'CSP-user' if experiment == 'ucsp' else 'CSP-item'
    )
    print('test data len BEFOR CSP test gen:', len(test_data['user', 'rates', 'item'].edge_label_index[0]))
    print('test data len AFTER CSP test gen:', len(test_data_csp['user', 'rates', 'item'].edge_label_index[0]))


In [30]:
######## ALL_TO_ALL USER_ITEM PAIRS GENERATOR IN TEST_DATA #########

# If mode GNN run below
### SLICING TEST_DATA FOR ALL_TO_ALL EVAL ###
slice_rate = 1
if experiment == 'ucsp' or experiment == 'icsp': 
    slice_rate = 1
    test_data_sliced = copy.deepcopy(test_data_csp)
else:
    test_data_sliced = copy.deepcopy(test_data)

test_data_sliced["user", "rates", "item"].edge_label_index = test_data_sliced["user", "rates", "item"].edge_label_index[:, : int(slice_rate * len(test_data_sliced["user", "rates", "item"].edge_label_index[0]))]
test_data_sliced["user", "rates", "item"].edge_label = test_data_sliced["user", "rates", "item"].edge_label[ : int(slice_rate * len(test_data_sliced["user", "rates", "item"].edge_label))]

edge_index_zip = set(zip(test_data_sliced["user", "rates", "item"].edge_label_index[0].numpy(), test_data_sliced["user", "rates", "item"].edge_label_index[1].numpy()))

all_users = test_data_sliced["user", "rates", "item"].edge_label_index[0].unique().numpy()
all_items = test_data_sliced["user", "rates", "item"].edge_label_index[1].unique().numpy()

# which elp the model most: keep the social_edges in test and be evaluated or remove all social_edges in test_set?
if experiment == 'add_social_edges': all_items = [item for item in all_items if item > len(all_users)]

new_edges = []
new_labels = []

edge_index_set = set(edge_index_zip)
for user_id in tqdm(all_users, total=len(all_users)):
    count_user_new_edges = 0
    random.shuffle(all_items) #TODO: before that we should exclude items that user interacted with (ground_truth) since we add label=0 for all new edges
    for item_id in all_items:  #TODO: maybe here first shuffle item_id, to prevent adding same items for all users 
        if count_user_new_edges > each_user_all2all_new_edges:
            break
        if (user_id, item_id) not in edge_index_set:
            count_user_new_edges += 1
            new_edges.append((user_id, item_id))
            new_labels.append(0)

test_data_all2all = copy.deepcopy(test_data_sliced)
if new_edges:
    new_edges_tensor = torch.tensor(new_edges, dtype=torch.int64).t().contiguous()
    new_labels_tensor = torch.tensor(new_labels, dtype=torch.int64)

    test_data_all2all["user", "rates", "item"].edge_label_index = torch.cat((test_data_all2all["user", "rates", "item"].edge_label_index, new_edges_tensor), dim=1)
    test_data_all2all["user", "rates", "item"].edge_label = torch.cat((test_data_all2all["user", "rates", "item"].edge_label, new_labels_tensor), dim=0)

print('test edges shape BEFORE adding all possible user item pairs', test_data_sliced["user", "rates", "item"].edge_label_index.shape)
print('test edges shape AFTER adding all possible user item pairs', test_data_all2all["user", "rates", "item"].edge_label_index.shape)

print('unique test users', len(test_data_all2all["user", "rates", "item"].edge_label_index[0].unique()))
print('unique test items', len(test_data_all2all["user", "rates", "item"].edge_label_index[1].unique()))



100%|██████████| 610/610 [00:04<00:00, 147.19it/s]

test edges shape BEFORE adding all possible user item pairs torch.Size([2, 18144])
test edges shape AFTER adding all possible user item pairs torch.Size([2, 24854])
unique test users 610
unique test items 5662





In [31]:
######## GNN PRED FOR TEST_DATA_all2all ######### 
import time

def pred_gnn_gen(device, model_gnn, test_data):
    test_loader_gnn = LinkNeighborLoader(
        data=test_data_all2all,
        num_neighbors=[20, 10],
        edge_label_index=(("user", "rates", "item"), test_data_all2all["user", "rates", "item"].edge_label_index),
        edge_label=test_data_all2all["user", "rates", "item"].edge_label,
        batch_size= 3 * 128, # TO calculate latency on inference time, use 17 as batch size, this will yield 10400 predictions that is near to MF number of preds (=10600)
        shuffle=False,
    )
    sampled_data_gnn = next(iter(test_loader_gnn))
    preds_gnn = []
    ground_truths_gnn = []
    model_gnn = model_gnn.to(device)
    for sampled_data_gnn in tqdm(test_loader_gnn):
        with torch.no_grad():
            sampled_data_gnn.to(device)
            preds_gnn.append(model_gnn(sampled_data_gnn))
            ground_truths_gnn.append(sampled_data_gnn["user", "rates", "item"].edge_label)
    pred_gnn = torch.cat(preds_gnn, dim=0).cpu().numpy()
    ground_truth_gnn = torch.cat(ground_truths_gnn, dim=0).cpu().numpy()
    print('all ground truth len', len(ground_truth_gnn))
    return pred_gnn, ground_truth_gnn

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # To check the memory usage of GNN, put cpu to be comparable with MF models
start_time = time.time()
pred_gnn, ground_truth_gnn = pred_gnn_gen(device, model_gnn, test_data_all2all)
end_time = time.time()
print('required time for GNN inference on testset', end_time - start_time)
# %memit pred_gnn_gen(device, test_data_all2all) %Uncomment to check the memory usage of GNN on inference

  0%|          | 0/65 [00:00<?, ?it/s]

100%|██████████| 65/65 [00:00<00:00, 146.72it/s]

all ground truth len 24854
required time for GNN inference on testset 0.46135830879211426





In [32]:
########### DATA PREPRATION FOR MF & POP MODELS  #############
'''
For LightFM models, we need a df of train and test data, 
but from GNN train/test generation, we have a HeteroData
Here we turn a HeteroData to a DataFrame
'''

def add_topic(df, contract_to_topic_df, unique_item_id):
    item_to_topic = pd.Series(contract_to_topic_df['most_probable_topic'].values, index=contract_to_topic_df['contract_name']).to_dict()
    mappedID_to_itemId = pd.Series(unique_item_id['itemId'].values, index=unique_item_id['mappedID']).to_dict()
    df['item_name'] = df['item'].map(mappedID_to_itemId)
    df['topic'] = df['item_name'].map(item_to_topic).fillna(0).astype(int)
    df = df.drop(columns=['item_name'])
    return df

test_df_index = test_data_all2all['user', 'rates', 'item'].edge_label_index.numpy()
test_df_label = test_data_all2all['user', 'rates', 'item'].edge_label.numpy()

test_df_index = test_df_index.T 
test_df_mf = pd.DataFrame(test_df_index, columns=['user', 'item'])
test_df_mf['rating'] = test_df_label


train_df_index = train_data['user', 'rates', 'item'].edge_label_index.numpy()
train_df_label = train_data['user', 'rates', 'item'].edge_label.numpy()
train_df_index = train_df_index.T 
train_df_mf = pd.DataFrame(train_df_index, columns=['user', 'item'])
train_df_mf['rating'] = train_df_label

if dataset_mode == 'contract':
    contract_to_topic_df = pd.read_parquet("dataset/contract_name_topic.parquet")
    train_df_mf= add_topic(train_df_mf, contract_to_topic_df, unique_item_id)
    test_df_mf = add_topic(test_df_mf, contract_to_topic_df, unique_item_id)

In [33]:
########### POP & MF_N & MF_C TRAIN/PRED  #############
top_contracts = train_df_mf['item'].value_counts()[:20].index.tolist() # can put any number > max(k) instead of 100
test_df_mf['pred_pop'] = 0
test_df_mf.loc[test_df_mf['item'].isin(top_contracts), 'pred_pop'] = 1
pred_pop = test_df_mf['pred_pop'].to_numpy()
ground_truth_pop = test_df_mf['rating'].to_numpy()

##### MF_N #####
def mfn_pred_gen(train_df_mf, test_df_mf):
    dataset = Dataset()
    user_ids_mfn = np.union1d(train_df_mf['user'].unique(), test_df_mf['user'].unique())
    item_ids_mfn = np.union1d(train_df_mf['item'].unique(), test_df_mf['item'].unique())
    dataset.fit(user_ids_mfn, item_ids_mfn)
    user_ids_mapping, _, item_ids_mapping, _ = dataset.mapping()

    (train_interactions_mfn, train_interactions_weight_mfn) = dataset.build_interactions((row['user'], row['item'], row['rating']) for index, row in train_df_mf.iterrows())

    model_mfn = LightFM(loss='warp')
    model_mfn.fit(train_interactions_mfn, epochs=30, num_threads=2)

    test_df_mf['pred_mfn'] = float(0)

    start_time = time.time()
    for user, user_data in tqdm(test_df_mf.groupby('user'), total=test_df_mf['user'].nunique()):
        user_id_internal = user_ids_mapping[user]
        item_ids_internal = np.array([item_ids_mapping[item] for item in user_data['item']])
        predictions_mfn = model_mfn.predict(user_id_internal, item_ids_internal)
        test_df_mf.loc[user_data.index, 'pred_mfn'] = predictions_mfn
    end_time = time.time()
    print('required time for MFN inference on testset with neg_edges_param = 10:', end_time - start_time)

    pred_mfn = test_df_mf['pred_mfn'].to_numpy()
    ground_truth_mfn = test_df_mf['rating'].to_numpy()
    return pred_mfn, ground_truth_mfn

pred_mfn, ground_truth_mfn = mfn_pred_gen(train_df_mf, test_df_mf)

##### MF_C #####
if dataset_mode == 'contract':
#     def mfc_pred_gen(train_df_mf, test_df_mf):
#         dataset = Dataset()
#         user_ids_mfc = np.union1d(train_df_mf['user'].unique(), test_df_mf['user'].unique())
#         item_ids_mfc = np.union1d(train_df_mf['topic'].unique(), test_df_mf['topic'].unique())
#         dataset.fit(user_ids_mfc, item_ids_mfc)
#         user_ids_mapping, _, item_ids_mapping, _ = dataset.mapping()

#         (train_interactions_mfc, train_interactions_weight_mfc) = dataset.build_interactions((row['user'], row['topic'], row['rating']) for index, row in train_df_mf.iterrows())

#         model_mfc = LightFM(loss='warp')
#         model_mfc.fit(train_interactions_mfc, epochs=30, num_threads=2, sample_weight=train_interactions_weight_mfc) # TODO maybe do not pass the weights to the MF models

#         def topic_popular_contracts(df):
#             item_rating_sum = df.groupby(['topic', 'item'])['rating'].sum().reset_index()
#             sorted_items = item_rating_sum.sort_values(['topic', 'rating'], ascending=[True, False])
#             topic_to_popular_items = {k: g['item'].tolist() for k, g in sorted_items.groupby('topic')}
#             return topic_to_popular_items

#         test_df_mf['pred_mfc'] = float(0)
#         topic_popular_contracts_dict = topic_popular_contracts(test_df_mf)

#         start_time = time.time()
#         for user, user_data in tqdm(test_df_mf.groupby('user'), total=test_df_mf['user'].nunique()):
#             user_id_internal = user_ids_mapping[user]
#             item_ids_internal = np.array([item_ids_mapping[item] for item in user_data['topic']])
#             predictions_mfc = model_mfc.predict(user_id_internal, item_ids_internal)
#             test_df_mf.loc[user_data.index, 'pred_mfc'] = predictions_mfc
#         end_time = time.time()
#         print('required time for MFC inference on testset with neg_edges_param = 10:', end_time - start_time)

#         pred_mfc = test_df_mf['pred_mfc'].to_numpy()
#         ground_truth_mfc = test_df_mf['rating'].to_numpy()
#         return pred_mfc, ground_truth_mfc

    # pred_mfc, ground_truth_mfc = mfc_pred_gen(train_df_mf, test_df_mf)

    def mfc_pred_gen(train_df_mf, test_df_mf):
        dataset = Dataset()
        user_ids_mfc = np.union1d(train_df_mf['user'].unique(), test_df_mf['user'].unique())
        item_ids_mfc = np.union1d(train_df_mf['item'].unique(), test_df_mf['item'].unique())
        topic_ids = np.union1d(train_df_mf['topic'].unique(), test_df_mf['topic'].unique())

        # We need to tell the dataset about the item feature columns we have:
        dataset.fit(
            users=user_ids_mfc, 
            items=item_ids_mfc,
            item_features=topic_ids,
        )
        user_ids_mapping, _, item_ids_mapping, _ = dataset.mapping()

        # Building item features such that each item has its associated topic as a feature
        item_features = dataset.build_item_features(
            (x, [y]) for x,y in zip(train_df_mf['item'], train_df_mf['topic'])
        )

        (train_interactions_mfn, train_interactions_weight_mfn) = dataset.build_interactions(
            (row['user'], row['item'], row['rating']) for index, row in train_df_mf.iterrows()
        )

        model_mfc = LightFM(loss='warp')
        # Including the item features in the fit method
        model_mfc.fit(
            train_interactions_mfn, 
            item_features=item_features, 
            epochs=30, 
            num_threads=2
        )
        test_df_mf['pred_mfc'] = float(0)

        start_time = time.time()
        for user, user_data in tqdm(test_df_mf.groupby('user'), total=test_df_mf['user'].nunique()):
            user_id_internal = user_ids_mapping[user]
            item_ids_internal = np.array([item_ids_mapping[item] for item in user_data['item']])
            predictions_mfn = model_mfc.predict(user_id_internal, item_ids_internal)
            test_df_mf.loc[user_data.index, 'pred_mfc'] = predictions_mfn
        end_time = time.time()
        print('required time for MFC inference on testset with neg_edges_param = 10:', end_time - start_time)

        pred_mfn = test_df_mf['pred_mfc'].to_numpy()
        ground_truth_mfn = test_df_mf['rating'].to_numpy()
        return pred_mfn, ground_truth_mfn

    pred_mfc, ground_truth_mfc = mfc_pred_gen(train_df_mf, test_df_mf)

100%|██████████| 610/610 [00:00<00:00, 977.60it/s]

required time for MFN inference on testset with neg_edges_param = 10: 0.6301472187042236





In [32]:
####### METRIC EVAL #######

# def precision_at_k(user_id, sorted_indices, ground_truth, k):
#     top_k_indices = sorted_indices[:k]
#     top_k_labels = ground_truth[top_k_indices]
#     num_ones = np.sum(ground_truth == 1)
#     hit = np.sum(top_k_labels > 0)

#     return hit / min(num_ones, k) if num_ones != 0 else k # k

# def average_hit_at_k(k, ground_truth, pred, user_ids, edge_index, model_variant):
#     precisions = []
#     for user_id in user_ids: # tqdm(user_ids, total=len(user_ids)):
#         mask = edge_index[0] == user_id
#         filtered_pred = pred[mask]
#         filtered_ground_truth = ground_truth[mask]
#         if np.sum(filtered_ground_truth == 1) == 0: continue
#         # print(filtered_pred)
#         # print(filtered_ground_truth)
#         sorted_indices = np.argsort(filtered_pred)[::-1]
#         pop_hit = np.sum(filtered_pred[:np.sum(filtered_ground_truth == 1)] > 0) / (min(np.sum(filtered_ground_truth == 1), k) if np.sum(filtered_ground_truth == 1) != 0 else k)
        
#         precisions.append(
#             precision_at_k(user_id, sorted_indices, filtered_ground_truth, k) if model_variant != 'pop' else pop_hit
#         )
#         break
        
#     return np.mean(precisions)

def precision_at_k(user_id, sorted_indices, ground_truth, k):
    """
    Computes the hit@k for a single user.

    Args:
    user_id: The user id.
    sorted_indices: Indices that would sort the predicted ratings.
    ground_truth: Actual ratings (binary) indicating whether an item is relevant or not.
    k: The number of recommendations to consider.

    Returns:
    The hit@k for the given user.
    """
    top_k_indices = sorted_indices[:k]
    top_k_labels = ground_truth[top_k_indices]
    
    # Check if there's any relevant item in the top k recommendations
    hit = int(np.sum(top_k_labels) > 0)

    return hit

def average_hit_at_k(k, ground_truth, pred, user_ids, edge_index, model_variant):
    """
    Computes the mean hit@k.

    Args:
    k: The number of recommendations to consider.
    ground_truth: Actual ratings (binary) indicating whether an item is relevant or not.
    pred: Predicted ratings.
    user_ids: Array of user ids to calculate the metric for.

    Returns:
    The mean hit@k over all users.
    """
    
    hits = []
    for user_id in user_ids: 
        mask = edge_index[0] == user_id
        filtered_pred = pred[mask]
        filtered_ground_truth = ground_truth[mask]
        sorted_indices = np.argsort(filtered_pred)[::-1]
        pop_hit = np.sum(filtered_pred[:np.sum(filtered_ground_truth == 1)] > 0) / (min(np.sum(filtered_ground_truth == 1), k) if np.sum(filtered_ground_truth == 1) != 0 else k)
        hits.append(precision_at_k(user_id, sorted_indices, filtered_ground_truth, k) if model_variant != 'pop' else pop_hit)
        
    return np.mean(hits)

def dcg_at_k(r, k):
    """
    Compute DCG@k for a list of relevance scores
    
    Parameters:
    - r: Relevance scores in rank order
    - k: Rank
    
    Returns:
    - DCG@k
    """
    r = np.asfarray(r)[:k]
    return np.sum(r / np.log2(np.arange(2, r.size + 2)))

def ndcg_at_k(r, k):
    """
    Compute NDCG@k for a list of relevance scores
    
    Parameters:
    - r: Relevance scores in rank order
    - k: Rank
    
    Returns:
    - NDCG@k
    """
    dcg_max = dcg_at_k(sorted(r, reverse=True), k)
    if not dcg_max:
        return 0.
    return dcg_at_k(r, k) / dcg_max

def calculate_ndcg_at_k(k, ground_truth, pred, edge_index):
    """
    Calculate the average NDCG@k for all users
    
    Parameters:
    - k: Rank
    - ground_truth: True relevance scores
    - pred: Predicted relevance scores
    - edge_index: User-item interaction indices
    
    Returns:
    - Average NDCG@k
    """
    user_ids = np.unique(edge_index[0].numpy())
    ndcgs = []
    for user_id in user_ids: # tqdm(user_ids, total=len(user_ids)):
        mask = edge_index[0] == user_id
        filtered_pred = pred[mask]
        filtered_ground_truth = ground_truth[mask]
        
        # Sort by predicted score
        sorted_indices = np.argsort(filtered_pred)[::-1]
        sorted_ground_truth = filtered_ground_truth[sorted_indices]
        
        ndcgs.append(ndcg_at_k(sorted_ground_truth, k))
        
    return np.mean(ndcgs)

def average_precision_at_k(user_id, sorted_indices, ground_truth, k):
    """
    Computes the average precision at k for a single user.
    
    Args:
    user_id: The user id.
    sorted_indices: Indices that would sort the predicted ratings.
    ground_truth: Actual ratings (binary) indicating whether an item is relevant or not.
    k: The number of recommendations to consider.
    
    Returns:
    The average precision at k for the given user.
    """
    top_k_indices = sorted_indices[:k]
    top_k_labels = ground_truth[top_k_indices]
    
    relevant_indices = np.where(top_k_labels > 0)[0]
    num_relevant = len(relevant_indices)
    
    if num_relevant == 0:
        return 0
    
    score = 0.0
    for i in relevant_indices:
        prec_at_i = np.sum(top_k_labels[:i+1]) / (i + 1)
        score += prec_at_i
    
    return score / min(num_relevant, k)

def mean_ap_at_k(k, ground_truth, pred, user_ids, edge_index):
    """
    Computes the mean average precision at k.
    
    Args:
    k: The number of recommendations to consider.
    ground_truth: Actual ratings (binary) indicating whether an item is relevant or not.
    pred: Predicted ratings.
    
    Returns:
    The mean average precision at k over all users.
    """
    
    average_precisions = []
    for user_id in user_ids: # tqdm(user_ids, total=len(user_ids)):
        mask = edge_index[0] == user_id
        filtered_pred = pred[mask]
        filtered_ground_truth = ground_truth[mask]
        sorted_indices = np.argsort(filtered_pred)[::-1]
        
        average_precisions.append(
            average_precision_at_k(user_id, sorted_indices, filtered_ground_truth, k)
        )
        
    return np.mean(average_precisions)

def evaluate(k_values, test_data_all2all, ground_truth, pred, model_variant_eval):
    edge_index = test_data_all2all['user', 'rates', 'item'].edge_label_index
    user_ids = np.unique(edge_index[0].numpy())

    for k in k_values:
        ### HIT@K ###
        hit_at_k = average_hit_at_k(k, ground_truth, pred, user_ids, edge_index, model_variant_eval)
        print(f"HIT@{k}: {hit_at_k}")
    if model_variant_eval != 'pop':
        for k in k_values:
            ### NDCG@K ###
            ndcg_result = calculate_ndcg_at_k(k, ground_truth, pred, edge_index)
            print(f"NDCG@{k}: {ndcg_result}")
        for k in k_values:
            map_at_k = mean_ap_at_k(k, ground_truth, pred, user_ids, edge_index)
            print(f"MAP@{k}: {map_at_k}")


eval_loader = {
    'gnn': {
        'ground_truth': ground_truth_gnn,
        'pred': pred_gnn
    },
    #'pop': {
     #   'ground_truth': ground_truth_pop,
      #  'pred': pred_pop
    #},
    #'mfn': {
    #    'ground_truth': ground_truth_mfn,
    #    'pred': pred_mfn
    #},
    # 'mfc': {
    #     'ground_truth': ground_truth_mfc,
    #     'pred': pred_mfc
    # },

}
model_variants = ['gnn']

for model_variant_eval in model_variants:
    k_values = [1, 5, 10, 15, 20] if mode != 'debug' else [1, 5, 10, 15, 20]
    print(f'$$$$$$ {model_variant_eval} $$$$$$')
    evaluate(k_values, test_data_all2all, ground_truth=eval_loader[model_variant_eval]['ground_truth'], pred=eval_loader[model_variant_eval]['pred'], model_variant_eval=model_variant_eval)

$$$$$$ gnn $$$$$$
HIT@1: 0.5475409836065573
HIT@5: 0.8688524590163934
HIT@10: 0.9229508196721311
HIT@15: 0.9393442622950819
HIT@20: 0.9459016393442623
NDCG@1: 0.5475409836065573
NDCG@5: 0.5676712411083231
NDCG@10: 0.6066840496623556
NDCG@15: 0.637545447173939
NDCG@20: 0.6572792294492733
MAP@1: 0.5475409836065573
MAP@5: 0.6400455373406193
MAP@10: 0.609270642303673
MAP@15: 0.5855408967910019
MAP@20: 0.5722937223394562


In [21]:
experiment = 'diversity'

In [22]:
############# DIVERSITY EXPERIMENT ##############
model_variants = ['mfn', 'mfc', 'pop']
eval_loader = {
    # 'gnn': {
    #     'ground_truth': ground_truth_gnn,
    #     'pred': pred_gnn
    # },
    'pop': {
        'ground_truth': ground_truth_pop,
        'pred': pred_pop
    },
    'mfn': {
        'ground_truth': ground_truth_mfn,
        'pred': pred_mfn
    },
    'mfc': {
        'ground_truth': ground_truth_mfc,
        'pred': pred_mfc
    },

}
for model_variant_eval in model_variants:
    k_values = [1, 5, 10, 15, 20] if mode != 'debug' else [1, 5, 10, 15, 20]
if experiment == 'diversity':
    edge_index = test_data_all2all['user', 'rates', 'item'].edge_label_index
    user_ids = np.unique(edge_index[0].numpy())

    for model_variant_eval in model_variants:
        pred = eval_loader[model_variant_eval]['pred']
        ground_truth = eval_loader[model_variant_eval]['ground_truth']

        for k in k_values:
            recs_list = set()
            for user_id in user_ids: # tqdm(user_ids, total=len(user_ids)):
                mask = edge_index[0] == user_id
                filtered_pred = pred[mask]
                filtered_items = edge_index[1][mask]
                sorted_indices = np.argsort(filtered_pred)[::-1]
                top_k_indices = sorted_indices[:k]
                top_k_indices = top_k_indices.copy()
                top_k_items = filtered_items[top_k_indices].numpy()
                recs_list.update(top_k_items)

            diversity_at_k = len(recs_list) / len(np.unique(edge_index[1].numpy()))
            print(f'Item coverage diversity for {model_variant_eval} @{k}:', diversity_at_k)
        
        for k in k_values:
            users_with_relevant_recs = set()
            
            for user_id in user_ids: # tqdm(user_ids, total=len(user_ids)):
                mask = edge_index[0] == user_id
                filtered_pred = pred[mask]
                sorted_indices = np.argsort(filtered_pred)[::-1]
                top_k_indices = sorted_indices[:k]
                filtered_ground_truth = ground_truth[mask] 
                relevant_recs = filtered_ground_truth[top_k_indices] 
                
                if np.sum(relevant_recs) > 0:  # At least one relevant recommendation
                    users_with_relevant_recs.add(user_id)
            
            user_coverage_at_k = len(users_with_relevant_recs) / len(user_ids)
            print(f'User coverage for {model_variant_eval} @{k}:', user_coverage_at_k)

    #######  Intra-List Diversity #######
    # TODO: Based on item_feat define the compute_dissimilarity method
    # for k in k_values:
    #     avg_dissimilarity = []
        
    #     for user_id in tqdm(user_ids, total=len(user_ids)):
    #         mask = edge_index[0] == user_id
    #         filtered_pred = pred[mask]
    #         filtered_items = edge_index[1][mask]
    #         sorted_indices = np.argsort(filtered_pred)[::-1]
    #         top_k_indices = sorted_indices[:k]
    #         top_k_items = filtered_items[top_k_indices].numpy()
            
    #         dissimilarity_sum = 0
    #         for i in range(len(top_k_items)):
    #             for j in range(i+1, len(top_k_items)):
    #                 dissimilarity_sum += compute_dissimilarity(top_k_items[i], top_k_items[j])
            
    #         if k > 1:
    #             avg_pairwise_dissimilarity = 2 * dissimilarity_sum / (k * (k - 1))
    #             avg_dissimilarity.append(avg_pairwise_dissimilarity)
        
    #     intra_list_diversity_at_k = np.mean(avg_dissimilarity)
    #     print(f'Intra-list diversity for {model_mode_eval} @{k}:', intra_list_diversity_at_k)

'''
Item coverage diversity for gnn @1: 0.1974024375230826
Item coverage diversity for gnn @5: 0.6526529607287948
Item coverage diversity for gnn @10: 0.9267512002954573
Item coverage diversity for gnn @15: 0.9996922319340146
Item coverage diversity for gnn @20: 1.0
User coverage for gnn @1: 0.5559827456864216
User coverage for gnn @5: 0.6452550637659414
User coverage for gnn @10: 0.6614778694673669
User coverage for gnn @15: 0.6639159789947486
User coverage for gnn @20: 0.6641035258814704
Item coverage diversity for mfn @1: 0.18576880462883172
Item coverage diversity for mfn @5: 0.5387172227009726
Item coverage diversity for mfn @10: 0.9447864089622061
Item coverage diversity for mfn @15: 0.9996922319340146
Item coverage diversity for mfn @20: 0.999938446386803
User coverage for mfn @1: 0.5041260315078769
User coverage for mfn @5: 0.5982745686421606
User coverage for mfn @10: 0.6395348837209303
User coverage for mfn @15: 0.6611965491372843
User coverage for mfn @20: 0.6641035258814704
'''


Item coverage diversity for mfn @1: 0.18333537857405816
Item coverage diversity for mfn @5: 0.5421524113388145
Item coverage diversity for mfn @10: 0.9469873604123206
Item coverage diversity for mfn @15: 0.99987728555651
Item coverage diversity for mfn @20: 1.0
User coverage for mfn @1: 0.5025765951466317
User coverage for mfn @5: 0.5920547175114775
User coverage for mfn @10: 0.6301883256816265
User coverage for mfn @15: 0.6531434460788906
User coverage for mfn @20: 0.6569849152065961
Item coverage diversity for mfc @1: 0.19830654067983802
Item coverage diversity for mfc @5: 0.6280525217818137
Item coverage diversity for mfc @10: 0.9240397594796907
Item coverage diversity for mfc @15: 0.999938642778255
Item coverage diversity for mfc @20: 1.0
User coverage for mfc @1: 0.4732502576595147
User coverage for mfc @5: 0.5725662887660452
User coverage for mfc @10: 0.6233486367469315
User coverage for mfc @15: 0.6515506418064274
User coverage for mfc @20: 0.6568912208376276
Item coverage diver

'\nItem coverage diversity for gnn @1: 0.1974024375230826\nItem coverage diversity for gnn @5: 0.6526529607287948\nItem coverage diversity for gnn @10: 0.9267512002954573\nItem coverage diversity for gnn @15: 0.9996922319340146\nItem coverage diversity for gnn @20: 1.0\nUser coverage for gnn @1: 0.5559827456864216\nUser coverage for gnn @5: 0.6452550637659414\nUser coverage for gnn @10: 0.6614778694673669\nUser coverage for gnn @15: 0.6639159789947486\nUser coverage for gnn @20: 0.6641035258814704\nItem coverage diversity for mfn @1: 0.18576880462883172\nItem coverage diversity for mfn @5: 0.5387172227009726\nItem coverage diversity for mfn @10: 0.9447864089622061\nItem coverage diversity for mfn @15: 0.9996922319340146\nItem coverage diversity for mfn @20: 0.999938446386803\nUser coverage for mfn @1: 0.5041260315078769\nUser coverage for mfn @5: 0.5982745686421606\nUser coverage for mfn @10: 0.6395348837209303\nUser coverage for mfn @15: 0.6611965491372843\nUser coverage for mfn @20: 