In [None]:
####### INSTALATION #######

!pip uninstall torch -y
!pip install torch==1.13.1
# !pip uninstall torch-scatter -y
# !pip uninstall torch-sparse -y
# !pip uninstall pyg-lib -y
# !pip uninstall git+https://github.com/pyg-team/pytorch_geometric.git -y
# !pip uninstall sentence_transformers -y

import os
import torch
os.environ['TORCH'] = torch.__version__
print(torch.__version__)

!pip uninstall torch-scatter torch-sparse torch-geometric torch-cluster  --y
!pip install torch-scatter -f https://data.pyg.org/whl/torch-{torch.__version__}.html
!pip install pyg-lib -f https://data.pyg.org/whl/nightly/torch-${TORCH}.html
!pip install git+https://github.com/pyg-team/pytorch_geometric.git
!pip install pandas
!pip install matplotlib
!pip install pyarrow fastparquet
!pip install transformers
!pip install lightfm
# !pip install torch-sparse -f https://data.pyg.org/whl/torch-{torch.__version__}.html
# !pip install torch-cluster -f https://data.pyg.org/whl/torch-{torch.__version__}.html
# !pip install sentence_transformers==0.1.0

In [14]:
###### IMPORT #######
import numpy as np
import pandas as pd
import random
import copy
# from neo4j import GraphDatabase
from torch_geometric.data import Data
import torch
from torch_geometric.nn import GCNConv
import torch.nn.functional as F
from tqdm import tqdm
from collections import defaultdict
import json
import multiprocessing
import matplotlib.pyplot as plt
from lightfm import LightFM
from lightfm.data import Dataset

from sklearn import preprocessing, feature_extraction, model_selection
from sklearn.metrics import mean_absolute_error, mean_squared_error
from torch_geometric.data import HeteroData
import torch_geometric.transforms as T
from torch_geometric.loader import LinkNeighborLoader
from torch_geometric.nn import SAGEConv, to_hetero
import torch.nn.functional as F
from sklearn.metrics import roc_auc_score
import re
from transformers import AutoTokenizer, AutoModel
# from sentence_transformers import SentenceTransformer

In [17]:
possible_experiments = ['none', 'diversity', 'ucsp', 'icsp', 'usparcity', 'isparcity', 'sBERT', 'TFIDF', 'ablation_item_feat', 'ablation_social_edges']
possible_experiments = {
    0: 'none',
    1: 'diversity',
    2: 'ucsp',
    3: 'icsp',
    4: 'usparcity',
    5: 'isparcity',
    6: 'sBERT',
    7: 'TFIDF',
    8: 'ablation_item_feat', 
    9: 'ablation_social_edges', # meaning adding social relationships
}
experiment = possible_experiments[7]

possible_modes = ['debug', 'experiment']
mode = possible_modes[0]

model_variants = ['gnn', 'pop', 'mfn', 'mfc']
len_interactions_to_consider = 100000 # when running sBERT, we have the embedding saved for 100k contracts
# model_variant_eval = model_variants[1]

dataset_mode = 'contract'



In [18]:
#### DATA LOADER ####
from torch_geometric.data import download_url, extract_zip
from torch import Tensor

def data_loader(ratings_df):
    unique_user_id = ratings_df['userId'].unique()
    unique_user_id = pd.DataFrame(data={
        'userId': unique_user_id,
        'mappedID': pd.RangeIndex(len(unique_user_id)),
    })
    # print("Mapping of user IDs to consecutive values:")
    # print("==========================================")
    # print(unique_user_id.head())

    unique_item_id = ratings_df['itemId'].unique()
    unique_item_id = pd.DataFrame(data={
        'itemId': unique_item_id,
        'mappedID': pd.RangeIndex(len(unique_item_id)),
    })
    # print("Mapping of item IDs to consecutive values:")
    # print("===========================================")
    # print(unique_item_id.head())

    ratings_user_id = pd.merge(ratings_df['userId'], unique_user_id,
                                left_on='userId', right_on='userId', how='left')
    ratings_user_id = torch.from_numpy(ratings_user_id['mappedID'].values)
    ratings_item_id = pd.merge(ratings_df['itemId'], unique_item_id,
                                left_on='itemId', right_on='itemId', how='left')
    ratings_item_id = torch.from_numpy(ratings_item_id['mappedID'].values)
    edge_index_user_to_item = torch.stack([ratings_user_id, ratings_item_id], dim=0)
    # print()
    # print("Final edge indices pointing from users to items:")
    # print("=================================================")
    # print(edge_index_user_to_item)
    return unique_user_id, unique_item_id, edge_index_user_to_item

def movie_loader():
    url = 'https://files.grouplens.org/datasets/movielens/ml-latest-small.zip'
    extract_zip(download_url(url, '.'), '.')
    movies_path = './ml-latest-small/movies.csv'
    ratings_path = './ml-latest-small/ratings.csv'
    items_ratings_df = pd.read_csv(ratings_path)
    items_ratings_df = items_ratings_df.rename(columns={'movieId': 'itemId'})
    unique_user_id, unique_item_id, edge_index_user_to_item = data_loader(items_ratings_df)
    items_df = pd.read_csv(movies_path)
    items_df = items_df.rename(columns={'movieId': 'itemId', 'title': 'name'})
    items_df = pd.merge(items_df, unique_item_id, on='itemId', how='left')
    items_df = items_df.sort_values('mappedID') # (Just the last 20 movies have NaN mappedId)
    genres = items_df['genres'].str.get_dummies('|')
    print(genres[["Action", "Adventure", "Drama", "Horror"]].head())
    item_feat = torch.from_numpy(genres.values).to(torch.float)
    assert item_feat.size() == (9742, 20)  # 20 genres in total.
    return unique_user_id, unique_item_id, edge_index_user_to_item, items_df, item_feat, items_ratings_df

def contract_loader():
    items_ratings_df = pd.read_parquet('dataset/user_contract_rating.parquet')
    def calculate_sparcity_value(df):
        num_users = df['user'].nunique()
        num_items = df['item'].nunique()
        num_interactions = len(df)
        total_possible_interactions = num_users * num_items / 100
        sparsity = 1 - (num_interactions / total_possible_interactions)
        return sparsity
    
    def filter_interactions(df, column, k):
        valid_entries = df[column].value_counts()
        valid_entries = valid_entries[valid_entries > k]
        df = df[df[column].isin(valid_entries.index)]
        print(f'{column} sparcity value is:', calculate_sparcity_value(df))
        return df

    ########## SPARCITY EXPERIMENT ###########
    if experiment == 'usparcity':
        k = 5
        items_ratings_df = filter_interactions(items_ratings_df, 'user', k)
    elif experiment == 'isparcity':
        k = 5
        items_ratings_df = filter_interactions(items_ratings_df, 'item', k)

    items_ratings_df = items_ratings_df[:len_interactions_to_consider] if mode == 'debug' else items_ratings_df #$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
    items_df = {}
    items_df['name'] = items_ratings_df['item'].unique()
    items_df['itemId'], unique_names = pd.factorize(items_df['name'])
    # items_df['itemId'] = items_df['itemId'] + 1 #TODO test commenting this line didn't breal anything
    items_df = pd.DataFrame(items_df, columns=['itemId', 'name'])

    def get_item_feat_sbert(items_df):
        contract2comments = pd.read_parquet('dataset/contracts2comment.parquet')
        c2c_main_class = contract2comments[contract2comments['contract_name'] == contract2comments['class_name']]

        def reorder_text(text):
            lines = text.split("\n")
            notice_lines = [line for line in lines if "@notice" in line]
            other_lines = [line for line in lines if "@notice" not in line]
            reorderd_text = "\n".join(notice_lines + other_lines)
            return reorderd_text

        def preprocess_text(text):
            text = reorder_text(text)
            text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
            # Remove special characters, numbers, etc.
            text = re.sub(r'\W', ' ', text)
            # Remove extra spaces
            text = re.sub(r'\s+', ' ', text).strip()
            text = text[:512] if len(text) > 512 else text
            return text

        sentences = []
        for i, item in items_df.iterrows():
            comment_class = c2c_main_class[c2c_main_class['contract_name'] == item['name']]
            if not comment_class.empty and comment_class['class_documentation'].iloc[0] != '':
                sentences.append(comment_class['class_documentation'].iloc[0])
            else:
                class_names = contract2comments[contract2comments['contract_name'] == item['name']]['class_name']
                sentences.append(' '.join(class_names))

        preprocessed_sentences = [preprocess_text(sentence) for sentence in sentences]
        tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/bert-base-nli-mean-tokens")
        model = AutoModel.from_pretrained("sentence-transformers/bert-base-nli-mean-tokens")
        device = torch.device("cpu") #"cuda" if torch.cuda.is_available() else "cpu") # NOT enough GPU memory
        model = model.to(device)
        inputs = tokenizer(preprocessed_sentences, padding=True, truncation=True, return_tensors="pt", max_length=512)
        inputs = {key: tensor.to(device) for key, tensor in inputs.items()}
        with torch.no_grad():
            outputs = model(**inputs)
            embeddings = outputs.last_hidden_state.mean(dim=1)
        item_feat = embeddings
        # model = SentenceTransformer('sentence-transformers/distilbert-base-nli-mean-tokens')
        # embeddings = model.encode(preprocessed_sentences)
        
        return item_feat
    
    def get_item_feat_tfidf(items_df):
        contract_top_words_df = pd.read_parquet('dataset/contract_top_words.parquet')
        contract_top_words_df = contract_top_words_df.rename(columns={'contract_name': 'name'})
        contracts_df_top_words = items_df.merge(contract_top_words_df, on='name', how='left')
        contracts_df_top_words['keywords'] = contracts_df_top_words['keywords'].fillna('')
        items_df = contracts_df_top_words
        items_df.set_index('itemId', inplace=True)
        # f =5 # ratio to determine the number of top keywords selected for each contract to construct item_feat
        items_df['truncated_keywords'] = items_df['keywords'].apply(lambda x: ','.join(x.split(',')))
        X_df = items_df['truncated_keywords'].str.get_dummies(',')
        item_feat = torch.from_numpy(X_df.values).to(torch.float)
        return item_feat
    
    ########### SBERT EXPERIMENT ###########
    if experiment == 'sBERT':
        # item_feat = get_item_feat_sbert(items_df)
        item_feat = np.load('sbert_embeddings_100k.npy')
        item_feat = torch.from_numpy(item_feat).to(torch.float)
        print(item_feat.shape)
    else: # elif experiment == 'TFIDF': TODO
        # item_feat = get_item_feat_tfidf(items_df)
        item_feat = np.load('tfidf_embeddings_100k.npy')
        item_feat = torch.from_numpy(item_feat).to(torch.float)

    print('item feature tensor shape', item_feat.shape)
    items_ratings_df = items_ratings_df.rename(columns={'user': 'userId', 'item': 'itemId'})
    unique_user_id, unique_item_id, edge_index_user_to_item = data_loader(items_ratings_df)
    print('number of unique users', len(unique_user_id))
    print('number of unique items', len(unique_item_id))
    return unique_user_id, unique_item_id, edge_index_user_to_item, items_df, item_feat, items_ratings_df

loaders = {
    'contract_loader': contract_loader,
    'movie_loader': movie_loader,
}
unique_user_id, unique_item_id, edge_index_user_to_item, items_df, item_feat, items_ratings_df = loaders[f'{dataset_mode}_loader']()


item feature tensor shape torch.Size([17310, 8023])
number of unique users 10761
number of unique items 17310


In [19]:
######### LINK BINARY PRED MODEL ##########
def train_test_generator(unique_user_id, item_feat, edge_index_user_to_item):  
    data = HeteroData()
    data["user"].node_id = torch.arange(len(unique_user_id))
    data["item"].node_id = torch.arange(item_feat.shape[0])
    data["item"].x = item_feat
    data["user", "rates", "item"].edge_index = edge_index_user_to_item
    data = T.ToUndirected()(data)

    transform = T.RandomLinkSplit(
        num_val=0,
        num_test=0.2,
        disjoint_train_ratio=0.3,
        neg_sampling_ratio=2,
        add_negative_train_samples=False,
        edge_types=("user", "rates", "item"),
        rev_edge_types=("item", "rev_rates", "user"), 
    )
    
    train_data, val_data, test_data = transform(data)
    return data, train_data, test_data

def GNN_recommender(data, train_data):

    # Define seed edges:
    edge_label_index = train_data["user", "rates", "item"].edge_label_index
    edge_label = train_data["user", "rates", "item"].edge_label
    train_loader = LinkNeighborLoader(
        data=train_data,
        num_neighbors=[20, 10],
        neg_sampling_ratio=2.0,
        edge_label_index=(("user", "rates", "item"), edge_label_index),
        edge_label=edge_label,
        batch_size=128,
        shuffle=True,
    )

    class GNN(torch.nn.Module):
        def __init__(self, hidden_channels):
            super().__init__()
            self.conv1 = SAGEConv(hidden_channels, hidden_channels)
            self.conv2 = SAGEConv(hidden_channels, hidden_channels)
        def forward(self, x: Tensor, edge_index: Tensor) -> Tensor:
            x = F.relu(self.conv1(x, edge_index))
            x = self.conv2(x, edge_index)
            return x
    # Our final classifier applies the dot-product between source and destination
    # node embeddings to derive edge-level predictions:
    class Classifier(torch.nn.Module):
        def forward(self, x_user: Tensor, x_item: Tensor, edge_label_index: Tensor) -> Tensor:
            edge_feat_user = x_user[edge_label_index[0]] # Convert node embeddings to edge-level representations:
            edge_feat_item = x_item[edge_label_index[1]]
            scores = (edge_feat_user * edge_feat_item).sum(dim=-1)
            return scores # Apply dot-product to get a prediction per supervision edge:
        
    class Model(torch.nn.Module):
        def __init__(self, hidden_channels):
            super().__init__()
            # Since the dataset does not come with rich features, we also learn two
            # embedding matrices for users and items:
            self.item_lin = torch.nn.Linear(item_feat.shape[1], hidden_channels)
            self.user_emb = torch.nn.Embedding(data["user"].num_nodes, hidden_channels)
            self.item_emb = torch.nn.Embedding(data["item"].num_nodes, hidden_channels)
            # Instantiate homogeneous GNN:
            self.gnn = GNN(hidden_channels)
            # Convert GNN model into a heterogeneous variant:
            self.gnn = to_hetero(self.gnn, metadata=data.metadata())
            self.classifier = Classifier()

        def forward(self, data: HeteroData) -> Tensor:
            x_dict = {
            "user": self.user_emb(data["user"].node_id),
            "item": self.item_lin(data["item"].x) + self.item_emb(data["item"].node_id),
            } 
            # `x_dict` holds feature matrices of all node types
            # `edge_index_dict` holds all edge indices of all edge types
            x_dict = self.gnn(x_dict, data.edge_index_dict)
            pred = self.classifier(
                x_dict["user"],
                x_dict["item"],
                data["user", "rates", "item"].edge_label_index,
            )
            return pred
            
    ########## TRAINING ##########
    model = Model(hidden_channels=64)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Device: '{device}'")
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    for epoch in range(1, 10):
        total_loss = total_examples = 0
        for sampled_data in tqdm(train_loader):
            optimizer.zero_grad()
            sampled_data.to(device)
            pred = model(sampled_data)
            ground_truth = sampled_data["user", "rates", "item"].edge_label
            loss = F.binary_cross_entropy_with_logits(pred, ground_truth)
            loss.backward()
            optimizer.step()
            total_loss += float(loss) * pred.numel()
            total_examples += pred.numel()

        # TODO: Add the val_loader, keep the best model
        print(f"Epoch: {epoch:03d}, Loss: {total_loss / total_examples:.4f}")

    ########## AUC EVAL VALIDATION #########
    # edge_label_index = val_data["user", "rates", "item"].edge_label_index
    # edge_label = val_data["user", "rates", "item"].edge_label
    # # val_data has neg samples in it
    # val_loader = LinkNeighborLoader(
    #     data=val_data,
    #     num_neighbors=[20, 10],
    #     edge_label_index=(("user", "rates", "item"), edge_label_index),
    #     edge_label=edge_label,
    #     batch_size=3 * 128,
    #     shuffle=False,
    # )
    # sampled_data = next(iter(val_loader))
    # preds = []
    # ground_truths = []
    # for sampled_data in tqdm(val_loader):
    #     with torch.no_grad():
    #         sampled_data.to(device)
    #         preds.append(model(sampled_data))
    #         ground_truths.append(sampled_data["user", "rates", "item"].edge_label)
    # pred = torch.cat(preds, dim=0).cpu().numpy()
    # ground_truth = torch.cat(ground_truths, dim=0).cpu().numpy()
    # auc = roc_auc_score(ground_truth, pred)
    # print()
    # print(f"Validation AUC: {auc:.4f}")
    # return data, train_data, val_data, train_loader, val_loader, ground_truth, pred, test_data, model
    return model

In [20]:
########## TRAIN TEST GENERAION ############

####### ITEM FEAT ABLATION EXPRIMENT ####### 
if experiment == 'ablation_item_feat':
    item_feat = torch.zeros_like(item_feat)

# ####### SOCIAL EDGES ABLEATION EXPERIMENT #######
def add_social_edges(edge_index_user_to_item, unique_item_id, items_ratings_df, item_feat):
    user_transactions_df = pd.read_csv('dataset/user_transactions.csv')
    contract_addresses = pd.read_csv('dataset/contract_addresses.csv')
    contract_set = set(contract_addresses['address'])
    
    edge_index_user_to_item[1] = edge_index_user_to_item[1] + len(edge_index_user_to_item[0].unique())
    unique_item_id['mappedID'] = unique_item_id['mappedID'] + len(edge_index_user_to_item[0].unique())
    user_feat = torch.zeros((len(edge_index_user_to_item[0].unique()), item_feat.shape[1]))
    item_feat= torch.cat([item_feat, user_feat], dim=0)

    unique_user_id['type'] = 'user'
    unique_item_id['type'] = 'item'
    node2id = pd.concat([
        unique_item_id.rename(columns={'itemId': 'entityId'}),
        unique_user_id.rename(columns={'userId': 'entityId'})
    ], ignore_index=True)

    users = items_ratings_df['userId'].unique()

    print('edge index shape before adding social edges:', edge_index_user_to_item.shape)
    for i, interaction in tqdm(user_transactions_df.iterrows(), total=len(user_transactions_df)):
        if interaction['from'] not in contract_set and interaction['to'] not in contract_set and interaction['from'] in users and  interaction['to'] in users:
            from_user_id = node2id[node2id['entityId'] == interaction['from']]['mappedID'].iloc[0]
            to_user_id = node2id[node2id['entityId'] == interaction['to']]['mappedID'].iloc[0]
            social_edge = torch.tensor([[from_user_id], 
                                        [to_user_id]], dtype=torch.int64)
            edge_index_user_to_item = torch.cat([edge_index_user_to_item, social_edge], dim=1)
    print('edge index shape after adding social edges:', edge_index_user_to_item.shape)
    
    return unique_user_id, unique_item_id, edge_index_user_to_item, item_feat
if experiment == 'ablation_social_edges':
    unique_user_id, unique_item_id, edge_index_user_to_item, item_feat = add_social_edges(edge_index_user_to_item, unique_item_id, items_ratings_df, item_feat)


data, train_data, test_data = train_test_generator(unique_user_id, item_feat, edge_index_user_to_item)

In [21]:
########## GNN TRAINING ############
#if model_mode == GNN run below
model = GNN_recommender(data, train_data)


Device: 'cuda'


100%|██████████| 188/188 [00:29<00:00,  6.27it/s]


Epoch: 001, Loss: 0.4497


100%|██████████| 188/188 [00:29<00:00,  6.34it/s]


Epoch: 002, Loss: 0.3306


100%|██████████| 188/188 [00:29<00:00,  6.31it/s]


Epoch: 003, Loss: 0.2843


100%|██████████| 188/188 [00:29<00:00,  6.36it/s]


Epoch: 004, Loss: 0.2547


100%|██████████| 188/188 [00:30<00:00,  6.14it/s]


Epoch: 005, Loss: 0.2314


100%|██████████| 188/188 [00:30<00:00,  6.19it/s]


Epoch: 006, Loss: 0.2107


100%|██████████| 188/188 [00:29<00:00,  6.29it/s]


Epoch: 007, Loss: 0.1960


100%|██████████| 188/188 [00:30<00:00,  6.18it/s]


Epoch: 008, Loss: 0.1752


100%|██████████| 188/188 [00:29<00:00,  6.33it/s]

Epoch: 009, Loss: 0.1658





In [None]:
####### CSP EXPRIMENTS #######
### CSP #### note: if the ratio==1, rerun from the first step
if experiment == 'ucsp' or experiment == 'icsp':
    def csp_test_gen(train_data, test_data, unique_data, entity_index, experiment_abbr):
        train_data_unique_entities = set(train_data['user', 'rates', 'item'].edge_label_index[entity_index].unique().numpy())
        unique_entities = set(unique_data['mappedID'].unique())
        entities_not_in_train = unique_entities - train_data_unique_entities
        mask = torch.tensor([entity in entities_not_in_train for entity in test_data["user", "rates", "item"].edge_label_index[entity_index].numpy()])
        
        test_data_filtered = copy.deepcopy(test_data)
        test_data_filtered["user", "rates", "item"].edge_label_index = test_data_filtered["user", "rates", "item"].edge_label_index[:, mask]
        test_data_filtered["user", "rates", "item"].edge_label = test_data_filtered["user", "rates", "item"].edge_label[mask]
        
        ratio = len(test_data_filtered["user", "rates", "item"].edge_label_index[entity_index]) / len(test_data["user", "rates", "item"].edge_label_index[entity_index])
        print(f'test to train ratio {experiment_abbr}', ratio)
        
        return test_data_filtered, ratio

    test_data_csp, test_to_train_ratio_csp = csp_test_gen(
        train_data, test_data, unique_user_id, 0 if experiment == 'ucsp' else 1, 'CSP-user' if experiment == 'ucsp' else 'CSP-item'
    )
    print('test data len BEFOR CSP test gen:', len(test_data['user', 'rates', 'item'].edge_label_index[0]))
    print('test data len AFTER CSP test gen:', len(test_data_csp['user', 'rates', 'item'].edge_label_index[0]))


In [22]:
######## ALL_TO_ALL USER_ITEM PAIRS GENERATOR IN TEST_DATA #########

# If mode GNN run below
### SLICING TEST_DATA FOR ALL_TO_ALL EVAL ###
slice_rate = 1
if experiment == 'ucsp' or experiment == 'icsp': 
    slice_rate = 1
    test_data_sliced = test_data_csp

test_data_sliced = copy.deepcopy(test_data)
test_data_sliced["user", "rates", "item"].edge_label_index = test_data_sliced["user", "rates", "item"].edge_label_index[:, : int(slice_rate * len(test_data_sliced["user", "rates", "item"].edge_label_index[0]))]
test_data_sliced["user", "rates", "item"].edge_label = test_data_sliced["user", "rates", "item"].edge_label[ : int(slice_rate * len(test_data_sliced["user", "rates", "item"].edge_label))]

edge_index_zip = set(zip(test_data_sliced["user", "rates", "item"].edge_label_index[0].numpy(), test_data_sliced["user", "rates", "item"].edge_label_index[1].numpy()))

all_users = test_data_sliced["user", "rates", "item"].edge_label_index[0].unique().numpy()
all_items = test_data_sliced["user", "rates", "item"].edge_label_index[1].unique().numpy()

# which elp the model most: keep the social_edges in test and be evaluated or remove all social_edges in test_set?
if experiment == 'ablation_social_edges':
    all_items = [item for item in all_items if item > len(all_users)]

new_edges = []
new_labels = []

#TODO instead of all possible pairs, we can continue for each user if it reaches to x samples (pos + neg)
for user_id in tqdm(all_users, total=len(all_users)):
    count_user_new_edges = 0
    random.shuffle(all_items) #TODO: before that we should exclude items that user interacted with (ground_truth) since we add label=0 for all new edges
    for item_id in all_items: #TODO: maybe here first shuffle item_id, to prevent adding same items for all users 
        if count_user_new_edges > 10: #instead of slice rate, lets play with this parameter
            break
        if (user_id, item_id) not in edge_index_zip:
            count_user_new_edges += 1
            new_edges.append((user_id, item_id))
            new_labels.append(0)

test_data_all2all = copy.deepcopy(test_data_sliced)
if new_edges:
    new_edges_tensor = torch.tensor(new_edges, dtype=torch.int64).t().contiguous()
    new_labels_tensor = torch.tensor(new_labels, dtype=torch.int64)

    test_data_all2all["user", "rates", "item"].edge_label_index = torch.cat((test_data_all2all["user", "rates", "item"].edge_label_index, new_edges_tensor), dim=1)
    test_data_all2all["user", "rates", "item"].edge_label = torch.cat((test_data_all2all["user", "rates", "item"].edge_label, new_labels_tensor), dim=0)

print('test edges shape BEFORE adding all possible user item pairs', test_data_sliced["user", "rates", "item"].edge_label_index.shape)
print('test edges shape AFTER adding all possible user item pairs', test_data_all2all["user", "rates", "item"].edge_label_index.shape)

print('unique test users', len(test_data_all2all["user", "rates", "item"].edge_label_index[0].unique()))
print('unique test items', len(test_data_all2all["user", "rates", "item"].edge_label_index[1].unique()))



100%|██████████| 10662/10662 [03:18<00:00, 53.58it/s]


test edges shape BEFORE adding all possible user item pairs torch.Size([2, 60000])
test edges shape AFTER adding all possible user item pairs torch.Size([2, 177282])
unique test users 10662
unique test items 16329


In [23]:
mask = test_data_all2all['user', 'rates', 'item'].edge_label_index[0] == 46
print(test_data_all2all['user', 'rates', 'item'].edge_label_index[1][mask])
''' 
[  369,   355,   366,  9184,  8461,  8378,  8327, 12630, 10764, 16986,
         4803, 11606, 14969,  2428,  7477, 12700,   959, 16575,   517, 10928,
        16489,  9835,  9558,  3025, 15835,   533,   570,  7590]
'''

tensor([ 3818, 11103,  7781, 12831,  4362,  9024, 10115, 16568,  9489, 15679,
         4557, 16877, 10654])


' \n[  369,   355,   366,  9184,  8461,  8378,  8327, 12630, 10764, 16986,\n         4803, 11606, 14969,  2428,  7477, 12700,   959, 16575,   517, 10928,\n        16489,  9835,  9558,  3025, 15835,   533,   570,  7590]\n'

In [24]:
######## GNN PRED FOR TEST_DATA_all2all ######### 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
test_loader_gnn = LinkNeighborLoader(
    data=test_data_all2all,
    num_neighbors=[20, 10],
    edge_label_index=(("user", "rates", "item"), test_data_all2all["user", "rates", "item"].edge_label_index),
    edge_label=test_data_all2all["user", "rates", "item"].edge_label,
    batch_size=3 * 128,
    shuffle=False,
)
sampled_data_gnn = next(iter(test_loader_gnn))
preds_gnn = []
ground_truths_gnn = []
for sampled_data_gnn in tqdm(test_loader_gnn):
    with torch.no_grad():
        sampled_data_gnn.to(device)
        preds_gnn.append(model(sampled_data_gnn))
        ground_truths_gnn.append(sampled_data_gnn["user", "rates", "item"].edge_label)
pred_gnn = torch.cat(preds_gnn, dim=0).cpu().numpy()
ground_truth_gnn = torch.cat(ground_truths_gnn, dim=0).cpu().numpy()

print('all ground truth len', len(ground_truth_gnn))

100%|██████████| 462/462 [01:05<00:00,  7.08it/s]

all ground truth len 177282





In [25]:
########### DATA PREPRATION FOR MF & POP MODELS  #############
'''
For LightFM models, we need a df of train and test data, 
but from GNN train/test generation, we have a HeteroData
Here we turn a HeteroData to a DataFrame
'''

def add_topic(df, contract_to_topic_df, unique_item_id):
    item_to_topic = pd.Series(contract_to_topic_df['most_probable_topic'].values, index=contract_to_topic_df['contract_name']).to_dict()
    mappedID_to_itemId = pd.Series(unique_item_id['itemId'].values, index=unique_item_id['mappedID']).to_dict()
    df['item_name'] = df['item'].map(mappedID_to_itemId)
    df['topic'] = df['item_name'].map(item_to_topic).fillna(0).astype(int)
    df = df.drop(columns=['item_name'])
    return df

test_df_index = test_data_all2all['user', 'rates', 'item'].edge_label_index.numpy()
test_df_label = test_data_all2all['user', 'rates', 'item'].edge_label.numpy()

test_df_index = test_df_index.T 
test_df_mf = pd.DataFrame(test_df_index, columns=['user', 'item'])
test_df_mf['rating'] = test_df_label


train_df_index = train_data['user', 'rates', 'item'].edge_label_index.numpy()
train_df_label = train_data['user', 'rates', 'item'].edge_label.numpy()
train_df_index = train_df_index.T 
train_df_mf = pd.DataFrame(train_df_index, columns=['user', 'item'])
train_df_mf['rating'] = train_df_label

if dataset_mode == 'contract':
    contract_to_topic_df = pd.read_parquet("dataset/contract_name_topic.parquet")
    train_df_mf= add_topic(train_df_mf, contract_to_topic_df, unique_item_id)
    test_df_mf = add_topic(test_df_mf, contract_to_topic_df, unique_item_id)




In [26]:
########### POP & MF_N & MF_C TRAIN/PRED  #############
top_contracts = train_df_mf['item'].value_counts()[:20].index.tolist() # can put any number > max(k) instead of 100
test_df_mf['pred_pop'] = 0
test_df_mf.loc[test_df_mf['item'].isin(top_contracts), 'pred_pop'] = 1
pred_pop = test_df_mf['pred_pop'].to_numpy()
ground_truth_pop = test_df_mf['rating'].to_numpy()

##### MF_N #####
dataset = Dataset()
user_ids_mfn = np.union1d(train_df_mf['user'].unique(), test_df_mf['user'].unique())
item_ids_mfn = np.union1d(train_df_mf['item'].unique(), test_df_mf['item'].unique())
dataset.fit(user_ids_mfn, item_ids_mfn)
user_ids_mapping, _, item_ids_mapping, _ = dataset.mapping()

(train_interactions_mfn, train_interactions_weight_mfn) = dataset.build_interactions((row['user'], row['item'], row['rating']) for index, row in train_df_mf.iterrows())

model_mfn = LightFM(loss='warp')
model_mfn.fit(train_interactions_mfn, epochs=30, num_threads=2)

test_df_mf['pred_mfn'] = float(0)

for user, user_data in tqdm(test_df_mf.groupby('user'), total=test_df_mf['user'].nunique()):
    user_id_internal = user_ids_mapping[user]
    item_ids_internal = np.array([item_ids_mapping[item] for item in user_data['item']])
    predictions_mfn = model_mfn.predict(user_id_internal, item_ids_internal)
    test_df_mf.loc[user_data.index, 'pred_mfn'] = predictions_mfn

pred_mfn = test_df_mf['pred_mfn'].to_numpy()
ground_truth_mfn = test_df_mf['rating'].to_numpy()

##### MF_C #####
if dataset_mode == 'contract':
    dataset = Dataset()
    user_ids_mfc = np.union1d(train_df_mf['user'].unique(), test_df_mf['user'].unique())
    item_ids_mfc = np.union1d(train_df_mf['topic'].unique(), test_df_mf['topic'].unique())
    dataset.fit(user_ids_mfc, item_ids_mfc)
    user_ids_mapping, _, item_ids_mapping, _ = dataset.mapping()

    (train_interactions_mfc, train_interactions_weight_mfc) = dataset.build_interactions((row['user'], row['topic'], row['rating']) for index, row in train_df_mf.iterrows())

    model_mfc = LightFM(loss='warp')
    model_mfc.fit(train_interactions_mfc, epochs=30, num_threads=2, sample_weight=train_interactions_weight_mfc) # TODO maybe do not pass the weights to the MF models

    def topic_popular_contracts(df):
        item_rating_sum = df.groupby(['topic', 'item'])['rating'].sum().reset_index()
        sorted_items = item_rating_sum.sort_values(['topic', 'rating'], ascending=[True, False])
        topic_to_popular_items = {k: g['item'].tolist() for k, g in sorted_items.groupby('topic')}
        return topic_to_popular_items

    test_df_mf['pred_mfc'] = float(0)
    topic_popular_contracts_dict = topic_popular_contracts(test_df_mf)

    for user, user_data in tqdm(test_df_mf.groupby('user'), total=test_df_mf['user'].nunique()):
        user_id_internal = user_ids_mapping[user]
        item_ids_internal = np.array([item_ids_mapping[item] for item in user_data['topic']])
        predictions_mfc = model_mfc.predict(user_id_internal, item_ids_internal)
        test_df_mf.loc[user_data.index, 'pred_mfc'] = predictions_mfc

    pred_mfc = test_df_mf['pred_mfc'].to_numpy()
    ground_truth_mfc = test_df_mf['rating'].to_numpy()



100%|██████████| 10662/10662 [00:10<00:00, 1028.05it/s]
100%|██████████| 10662/10662 [00:09<00:00, 1072.15it/s]


In [27]:
####### METRIC EVAL #######

# def precision_at_k(user_id, sorted_indices, ground_truth, k):
#     top_k_indices = sorted_indices[:k]
#     top_k_labels = ground_truth[top_k_indices]
#     num_ones = np.sum(ground_truth == 1)
#     hit = np.sum(top_k_labels > 0)

#     return hit / min(num_ones, k) if num_ones != 0 else k # k

# def average_hit_at_k(k, ground_truth, pred, user_ids, edge_index, model_variant):
#     precisions = []
#     for user_id in user_ids: # tqdm(user_ids, total=len(user_ids)):
#         mask = edge_index[0] == user_id
#         filtered_pred = pred[mask]
#         filtered_ground_truth = ground_truth[mask]
#         if np.sum(filtered_ground_truth == 1) == 0: continue
#         # print(filtered_pred)
#         # print(filtered_ground_truth)
#         sorted_indices = np.argsort(filtered_pred)[::-1]
#         pop_hit = np.sum(filtered_pred[:np.sum(filtered_ground_truth == 1)] > 0) / (min(np.sum(filtered_ground_truth == 1), k) if np.sum(filtered_ground_truth == 1) != 0 else k)
        
#         precisions.append(
#             precision_at_k(user_id, sorted_indices, filtered_ground_truth, k) if model_variant != 'pop' else pop_hit
#         )
#         break
        
#     return np.mean(precisions)

def precision_at_k(user_id, sorted_indices, ground_truth, k):
    """
    Computes the hit@k for a single user.

    Args:
    user_id: The user id.
    sorted_indices: Indices that would sort the predicted ratings.
    ground_truth: Actual ratings (binary) indicating whether an item is relevant or not.
    k: The number of recommendations to consider.

    Returns:
    The hit@k for the given user.
    """
    top_k_indices = sorted_indices[:k]
    top_k_labels = ground_truth[top_k_indices]
    
    # Check if there's any relevant item in the top k recommendations
    hit = int(np.sum(top_k_labels) > 0)

    return hit

def average_hit_at_k(k, ground_truth, pred, user_ids, edge_index, model_variant):
    """
    Computes the mean hit@k.

    Args:
    k: The number of recommendations to consider.
    ground_truth: Actual ratings (binary) indicating whether an item is relevant or not.
    pred: Predicted ratings.
    user_ids: Array of user ids to calculate the metric for.

    Returns:
    The mean hit@k over all users.
    """
    
    hits = []
    for user_id in user_ids: 
        mask = edge_index[0] == user_id
        filtered_pred = pred[mask]
        filtered_ground_truth = ground_truth[mask]
        sorted_indices = np.argsort(filtered_pred)[::-1]
        pop_hit = np.sum(filtered_pred[:np.sum(filtered_ground_truth == 1)] > 0) / (min(np.sum(filtered_ground_truth == 1), k) if np.sum(filtered_ground_truth == 1) != 0 else k)
        hits.append(precision_at_k(user_id, sorted_indices, filtered_ground_truth, k) if model_variant != 'pop' else pop_hit)
        
    return np.mean(hits)

def dcg_at_k(r, k):
    """
    Compute DCG@k for a list of relevance scores
    
    Parameters:
    - r: Relevance scores in rank order
    - k: Rank
    
    Returns:
    - DCG@k
    """
    r = np.asfarray(r)[:k]
    return np.sum(r / np.log2(np.arange(2, r.size + 2)))

def ndcg_at_k(r, k):
    """
    Compute NDCG@k for a list of relevance scores
    
    Parameters:
    - r: Relevance scores in rank order
    - k: Rank
    
    Returns:
    - NDCG@k
    """
    dcg_max = dcg_at_k(sorted(r, reverse=True), k)
    if not dcg_max:
        return 0.
    return dcg_at_k(r, k) / dcg_max

def calculate_ndcg_at_k(k, ground_truth, pred, edge_index):
    """
    Calculate the average NDCG@k for all users
    
    Parameters:
    - k: Rank
    - ground_truth: True relevance scores
    - pred: Predicted relevance scores
    - edge_index: User-item interaction indices
    
    Returns:
    - Average NDCG@k
    """
    user_ids = np.unique(edge_index[0].numpy())
    ndcgs = []
    for user_id in user_ids: # tqdm(user_ids, total=len(user_ids)):
        mask = edge_index[0] == user_id
        filtered_pred = pred[mask]
        filtered_ground_truth = ground_truth[mask]
        
        # Sort by predicted score
        sorted_indices = np.argsort(filtered_pred)[::-1]
        sorted_ground_truth = filtered_ground_truth[sorted_indices]
        
        ndcgs.append(ndcg_at_k(sorted_ground_truth, k))
        
    return np.mean(ndcgs)

def average_precision_at_k(user_id, sorted_indices, ground_truth, k):
    """
    Computes the average precision at k for a single user.
    
    Args:
    user_id: The user id.
    sorted_indices: Indices that would sort the predicted ratings.
    ground_truth: Actual ratings (binary) indicating whether an item is relevant or not.
    k: The number of recommendations to consider.
    
    Returns:
    The average precision at k for the given user.
    """
    top_k_indices = sorted_indices[:k]
    top_k_labels = ground_truth[top_k_indices]
    
    relevant_indices = np.where(top_k_labels > 0)[0]
    num_relevant = len(relevant_indices)
    
    if num_relevant == 0:
        return 0
    
    score = 0.0
    for i in relevant_indices:
        prec_at_i = np.sum(top_k_labels[:i+1]) / (i + 1)
        score += prec_at_i
    
    return score / min(num_relevant, k)

def mean_ap_at_k(k, ground_truth, pred, user_ids, edge_index):
    """
    Computes the mean average precision at k.
    
    Args:
    k: The number of recommendations to consider.
    ground_truth: Actual ratings (binary) indicating whether an item is relevant or not.
    pred: Predicted ratings.
    
    Returns:
    The mean average precision at k over all users.
    """
    
    average_precisions = []
    for user_id in user_ids: # tqdm(user_ids, total=len(user_ids)):
        mask = edge_index[0] == user_id
        filtered_pred = pred[mask]
        filtered_ground_truth = ground_truth[mask]
        sorted_indices = np.argsort(filtered_pred)[::-1]
        
        average_precisions.append(
            average_precision_at_k(user_id, sorted_indices, filtered_ground_truth, k)
        )
        
    return np.mean(average_precisions)

def evaluate(k_values, test_data_all2all, ground_truth, pred, model_variant_eval):
    edge_index = test_data_all2all['user', 'rates', 'item'].edge_label_index
    user_ids = np.unique(edge_index[0].numpy())

    for k in k_values:
        ### HIT@K ###
        hit_at_k = average_hit_at_k(k, ground_truth, pred, user_ids, edge_index, model_variant_eval)
        print(f"HIT@{k}: {hit_at_k}")
    if model_variant_eval != 'pop':
        for k in k_values:
            ### NDCG@K ###
            ndcg_result = calculate_ndcg_at_k(k, ground_truth, pred, edge_index)
            print(f"NDCG@{k}: {ndcg_result}")
        for k in k_values:
            map_at_k = mean_ap_at_k(k, ground_truth, pred, user_ids, edge_index)
            print(f"MAP@{k}: {map_at_k}")


eval_loader = {
    'gnn': {
        'ground_truth': ground_truth_gnn,
        'pred': pred_gnn
    },
    'pop': {
        'ground_truth': ground_truth_pop,
        'pred': pred_pop
    },
    'mfn': {
        'ground_truth': ground_truth_mfn,
        'pred': pred_mfn
    },
    'mfc': {
        'ground_truth': ground_truth_mfc,
        'pred': pred_mfc
    },

}
model_variants = ['gnn', 'mfn', 'mfc', 'pop']

for model_variant_eval in model_variants:
    k_values = [1, 5, 10, 15, 20] if mode != 'debug' else [1, 5, 10, 15, 20]
    print(f'$$$$$$ {model_variant_eval} $$$$$$')
    evaluate(k_values, test_data_all2all, ground_truth=eval_loader[model_variant_eval]['ground_truth'], pred=eval_loader[model_variant_eval]['pred'], model_variant_eval=model_variant_eval)
''' 
$$$$$$ gnn $$$$$$
HIT@1: 0.5142080090030948
HIT@5: 0.620838413204539
HIT@10: 0.6456907061802495
HIT@15: 0.6540373253305823
HIT@20: 0.658257526024571
NDCG@1: 0.5142080090030948
NDCG@5: 0.4994564012728649
NDCG@10: 0.5290019131240989
NDCG@15: 0.544483549926359
NDCG@20: 0.5523988690721114
MAP@1: 0.5142080090030948
MAP@5: 0.5367350234976607
MAP@10: 0.5168923211232593
MAP@15: 0.503944347256433
MAP@20: 0.49623072122664735
$$$$$$ mfn $$$$$$
HIT@1: 0.47566350933133267
HIT@5: 0.5704773515896089
HIT@10: 0.6071462065084873
HIT@15: 0.6302166369689581
HIT@20: 0.6455969239426053
NDCG@1: 0.47566350933133267
NDCG@5: 0.42977502252776134
NDCG@10: 0.4554831736282756
NDCG@15: 0.4747337780914004
NDCG@20: 0.48886568020012644
MAP@1: 0.47566350933133267
MAP@5: 0.49407166004980885
MAP@10: 0.4733229370087464
MAP@15: 0.457218128568187
MAP@20: 0.4434546136013384
$$$$$$ mfc $$$$$$
HIT@1: 0.11694645034230516
HIT@5: 0.32861296070524243
HIT@10: 0.37869267560724
HIT@15: 0.47125574416205573
HIT@20: 0.5889524524055144
NDCG@1: 0.11694645034230516
NDCG@5: 0.14529863419019398
NDCG@10: 0.16835341889875108
NDCG@15: 0.2021470409907854
NDCG@20: 0.24482327489226452
MAP@1: 0.11694645034230516
MAP@5: 0.18436728250336054
MAP@10: 0.1857349684900897
MAP@15: 0.18316491255129508
MAP@20: 0.17885540581471826
$$$$$$ pop $$$$$$
HIT@1: 0.4504360874050455
HIT@5: 0.21883303635624748
HIT@10: 0.21570305417487265
HIT@15: 0.21566717614499667
HIT@20: 0.21566717614499667
'''

$$$$$$ gnn $$$$$$
HIT@1: 0.5428625023447758
HIT@5: 0.6344025511161133
HIT@10: 0.6539110861001688
HIT@15: 0.6586006377790283
HIT@20: 0.65888201087976
NDCG@1: 0.5428625023447758
NDCG@5: 0.5324252582144876
NDCG@10: 0.5633633600123634
NDCG@15: 0.5757228555503907
NDCG@20: 0.5784592047295187
MAP@1: 0.5428625023447758
MAP@5: 0.5599634736030347
MAP@10: 0.5388797402901704
MAP@15: 0.5288210458179221
MAP@20: 0.5263080748159996
$$$$$$ mfn $$$$$$
HIT@1: 0.5066591633839805
HIT@5: 0.5956668542487338
HIT@10: 0.6353404614518852
HIT@15: 0.6551303695366723
HIT@20: 0.65888201087976
NDCG@1: 0.5066591633839805
NDCG@5: 0.467144790410904
NDCG@10: 0.49931553478383517
NDCG@15: 0.5243132258159413
NDCG@20: 0.5363092892460521
MAP@1: 0.5066591633839805
MAP@5: 0.5222554398799475
MAP@10: 0.49799210511789
MAP@15: 0.4765613333842895
MAP@20: 0.46592082424885894
$$$$$$ mfc $$$$$$
HIT@1: 0.10617145000937911
HIT@5: 0.30969799287188143
HIT@10: 0.4830238229225286
HIT@15: 0.63918589382855
HIT@20: 0.6586944288126055
NDCG@1: 0.

' \n$$$$$$ gnn $$$$$$\nHIT@1: 0.5142080090030948\nHIT@5: 0.620838413204539\nHIT@10: 0.6456907061802495\nHIT@15: 0.6540373253305823\nHIT@20: 0.658257526024571\nNDCG@1: 0.5142080090030948\nNDCG@5: 0.4994564012728649\nNDCG@10: 0.5290019131240989\nNDCG@15: 0.544483549926359\nNDCG@20: 0.5523988690721114\nMAP@1: 0.5142080090030948\nMAP@5: 0.5367350234976607\nMAP@10: 0.5168923211232593\nMAP@15: 0.503944347256433\nMAP@20: 0.49623072122664735\n$$$$$$ mfn $$$$$$\nHIT@1: 0.47566350933133267\nHIT@5: 0.5704773515896089\nHIT@10: 0.6071462065084873\nHIT@15: 0.6302166369689581\nHIT@20: 0.6455969239426053\nNDCG@1: 0.47566350933133267\nNDCG@5: 0.42977502252776134\nNDCG@10: 0.4554831736282756\nNDCG@15: 0.4747337780914004\nNDCG@20: 0.48886568020012644\nMAP@1: 0.47566350933133267\nMAP@5: 0.49407166004980885\nMAP@10: 0.4733229370087464\nMAP@15: 0.457218128568187\nMAP@20: 0.4434546136013384\n$$$$$$ mfc $$$$$$\nHIT@1: 0.11694645034230516\nHIT@5: 0.32861296070524243\nHIT@10: 0.37869267560724\nHIT@15: 0.4712557

In [None]:
############# DIVERSITY EXPERIMENT ##############
if experiment == 'diversity':
    edge_index = test_data_all2all['user', 'rates', 'item'].edge_label_index
    user_ids = np.unique(edge_index[0].numpy())
    pred = eval_loader[model_variant_eval]['pred']
    ground_truth = eval_loader[model_variant_eval]['ground_truth']

    for k in k_values:
        recs_list = set()
        for user_id in tqdm(user_ids, total=len(user_ids)):
            mask = edge_index[0] == user_id
            filtered_pred = pred[mask]
            filtered_items = edge_index[1][mask]
            sorted_indices = np.argsort(filtered_pred)[::-1]
            top_k_indices = sorted_indices[:k]
            top_k_indices = top_k_indices.copy()
            top_k_items = filtered_items[top_k_indices].numpy()
            recs_list.update(top_k_items)

        diversity_at_k = len(recs_list) / len(np.unique(edge_index[1].numpy()))
        print(f'Item coverage diversity for {model_variant_eval} @{k}:', diversity_at_k)
    
    for k in k_values:
        users_with_relevant_recs = set()
        
        for user_id in tqdm(user_ids, total=len(user_ids)):
            mask = edge_index[0] == user_id
            filtered_pred = pred[mask]
            sorted_indices = np.argsort(filtered_pred)[::-1]
            top_k_indices = sorted_indices[:k]
            filtered_ground_truth = ground_truth[mask] 
            relevant_recs = filtered_ground_truth[top_k_indices] 
            
            if np.sum(relevant_recs) > 0:  # At least one relevant recommendation
                users_with_relevant_recs.add(user_id)
        
        user_coverage_at_k = len(users_with_relevant_recs) / len(user_ids)
        print(f'User coverage for {model_variant_eval} @{k}:', user_coverage_at_k)

    #######  Intra-List Diversity #######
    # TODO: Based on item_feat define the compute_dissimilarity method
    # for k in k_values:
    #     avg_dissimilarity = []
        
    #     for user_id in tqdm(user_ids, total=len(user_ids)):
    #         mask = edge_index[0] == user_id
    #         filtered_pred = pred[mask]
    #         filtered_items = edge_index[1][mask]
    #         sorted_indices = np.argsort(filtered_pred)[::-1]
    #         top_k_indices = sorted_indices[:k]
    #         top_k_items = filtered_items[top_k_indices].numpy()
            
    #         dissimilarity_sum = 0
    #         for i in range(len(top_k_items)):
    #             for j in range(i+1, len(top_k_items)):
    #                 dissimilarity_sum += compute_dissimilarity(top_k_items[i], top_k_items[j])
            
    #         if k > 1:
    #             avg_pairwise_dissimilarity = 2 * dissimilarity_sum / (k * (k - 1))
    #             avg_dissimilarity.append(avg_pairwise_dissimilarity)
        
    #     intra_list_diversity_at_k = np.mean(avg_dissimilarity)
    #     print(f'Intra-list diversity for {model_mode_eval} @{k}:', intra_list_diversity_at_k)



In [None]:
######### HIT@K EVAL V1 ##########
# in val_data len(edge_index) = 80670, but len(edge_label_index) = 30249, we selected edge_label_index since for train_loader used the same
def precision_at_k(user_id, edge_index, ground_truth, pred, k):

    mask = edge_index[0] == user_id
    filtered_pred = pred[mask]
    filtered_ground_truth = ground_truth[mask]
    sorted_indices = filtered_pred.argsort()[:: -1]

    top_k = [(filtered_ground_truth[i], filtered_pred[i]) for i in sorted_indices[:k]]
    hit = 0
    for i in range(len(top_k)):
        ground_truth, pred = top_k[i]
        if ground_truth > 0 and pred > 0: # I think we should remove this: and pred > 0:
            hit += 1
    precision = hit / k

    return precision


def ap_at_k(k, precision_at_k, mode):
    precisions = []
    edge_index = val_loader.data['user', 'rates', 'item'].edge_label_index
    for user_id in tqdm(edge_index[0], total=len(edge_index[0])):
        if mode == 'mfn':
            precisions.append(precision_at_k(user_id, edge_index, ground_truth, pred_mfn, k)) # ground_truth is the same for both GNN and mf
        if mode == 'mfc':
            precisions.append(precision_at_k(user_id, edge_index, ground_truth, pred_mfc, k))
        else:
            precisions.append(precision_at_k(user_id, edge_index, ground_truth, pred, k))
            break

    return np.mean(precisions)


k_values = [1, 2, 3, 4, 5]
for k in k_values:
    hit_at_k = ap_at_k(k, precision_at_k, mode='GNN')
    print(f"AP@{k}:", hit_at_k)

