<i>Copyright (c) Microsoft Corporation. All rights reserved.</i>

<i>Licensed under the MIT License.</i>

<i>This notebook has been taken from Microsoft's recommender system library: [Source](https://github.com/microsoft/recommenders/blob/main/examples/02_model_collaborative_filtering/lightgcn_deep_dive.ipynb). It has been modified to fit the context of our study. The modifications include the addition of new evaluation methods, the ability to add new datasets, and cluster validation process.</i>

# LightGCN - simplified GCN model for recommendation

This notebook serves as an introduction to LightGCN [1], which is an simple, linear and neat Graph Convolution Network (GCN) [3] model for recommendation.

## 0 Global Settings and Imports

In [1]:
from recommenders.datasets.python_splitters import python_stratified_split
import sys
import os
import codecs
import scrapbook as sb
import pandas as pd
import numpy as np
import tensorflow as tf
tf.get_logger().setLevel('ERROR') # only show error messages


from recommenders.utils.timer import Timer
from recommenders.models.deeprec.models.graphrec.lightgcn import LightGCN
from recommenders.models.deeprec.DataModel.ImplicitCF import ImplicitCF
#from recommenders.datasets import movielens
from recommenders.datasets.python_splitters import python_stratified_split
from recommenders.utils.constants import SEED as DEFAULT_SEED
from recommenders.models.deeprec.deeprec_utils import prepare_hparams
from recommenders.evaluation.python_evaluation import (
    map_at_k,
    ndcg_at_k,
    precision_at_k,
    recall_at_k,
    serendipity,
    user_serendipity,
    user_item_serendipity,
    catalog_coverage
)

print("System version: {}".format(sys.version))
print("Pandas version: {}".format(pd.__version__))
print("Tensorflow version: {}".format(tf.__version__))
#os.chdir('../')

System version: 3.10.0 (tags/v3.10.0:b494f59, Oct  4 2021, 19:00:18) [MSC v.1929 64 bit (AMD64)]
Pandas version: 2.2.2
Tensorflow version: 2.17.0


In [2]:
os.getcwd()

'C:\\Users\\clari\\Desktop\\M2 - Thesis\\Research\\Dr Jacques Bou Abdo\\Recommender System\\5 - Ensemble Learning Model\\Accuracy Metrics\\serendipity-main\\notebooks'

In [3]:
DEFAULT_SEED = None

In [4]:
# top k items to recommend
TOP_K = 10

# Select MovieLens data size: 100k, 1m, 10m, or 20m
#MOVIELENS_DATA_SIZE = 'framework-ml-25m-subset'
MOVIELENS_DATA_SIZE = 'framework-ml-25m-subset(3) (EL2.4)'
#MOVIELENS_DATA_SIZE = 'ml-25m-subset(3)-#5'
#MOVIELENS_DATA_SIZE = 'ml-25m-subset(3)-nf-1-2-3-4'
OG_DT = 'ml-25m'
# Model parameters
EPOCHS = 15
BATCH_SIZE = 1024

SEED = DEFAULT_SEED  # Set None for non-deterministic results

yaml_file = "./models/lightgcn/config/lightgcn.yaml"
user_file = "./models/lightgcn/output/tests/user_embeddings.csv"
item_file = "./models/lightgcn/output/item_embeddings.csv"

In [5]:

yaml_file = os.path.join(os.getcwd(), ".." ,"models", "lightgcn", "config", "lightgcn.yaml")
user_file = os.path.join(os.getcwd(), ".." ,"models","lightgcn","output_CH",MOVIELENS_DATA_SIZE,"user_embedding_CH.csv")
item_file = os.path.join(os.getcwd(), ".." ,"models","lightgcn","output_CH",MOVIELENS_DATA_SIZE,"item_embedding_CH.csv")
print(os.path.exists(yaml_file))
print(os.path.exists(user_file))
print(os.path.exists(item_file))



True
True
True


In [6]:
from sklearn.model_selection import train_test_split

ratio = 0.85
#dataset = 'ml-5m-#1'
dataset_name = MOVIELENS_DATA_SIZE
dataset_path = os.path.join('datasets', dataset_name)

#ratings_path = os.path.join(dataset_path, 'u.data')
#ratings_file = codecs.open(ratings_path, 'r', 'UTF-8')
#df = pd.read_csv(ratings_file, sep='\t', names=('userID', 'itemID', 'rating', 'timestamp'))
#dataset_name = 'ml-5m-#2'
#dataset = '5m'
dataset_path = os.path.join('datasets', dataset_name)
output_path_exp1 = './output/exp-4/'
#ratings_path = os.path.join(dataset_path, 'u.data')
#ratings_path = os.path.join(dataset_path, '//clean//ratings.csv')
#ratings_file = codecs.open(ratings_path, 'r', 'UTF-8')
ratings_path = "../datasets/"+dataset_name+ "/clean/ratings.csv"



#df = pd.read_csv(ratings_file, sep='\t', names=('userID', 'itemID', 'rating', 'timestamp'))

if (MOVIELENS_DATA_SIZE == 'ml-25m-subset-nf-1-2-3-3.1-4'):
    cos = ['itemId',	'userId', 'title',	'rating']
    df = pd.read_csv(ratings_path, usecols=cos).rename(columns = {'userId':'userID', 'itemId':'itemID'})
elif (MOVIELENS_DATA_SIZE == 'ml-25m-subset(3)-nf-1-2-3-4'):
    df = pd.read_csv(ratings_path).rename(columns = {'userId':'userID', 'itemId':'itemID'})
else:
    df = pd.read_csv(ratings_path).rename(columns = {'userId':'userID', 'movieId':'itemID'}).copy()

if '#2' in dataset_name:
     df = df[(df['isNoisy'] == "[0.]") | (df['isNoisy'] == 0)]
elif 'nf-1' in dataset_name:
     #df = df[(df['1&2&3&4 = 0'] == 0) & (df['1&2&3&4 = 1'] == "0")]
    df = df[(df['1&2&3&4 = 1'] == 0)]
    print(len(df))
#elif ('nf1' in dataset_name) or ('nf2' in dataset_name) or ('nf3' in dataset_name) or ('nf4' in dataset_name):
#    df = df
elif (not 'framework' in dataset_name) & ('ml-25m-subset(3)-' in dataset_name):
    df = df[df['isNoisy'] == 0] 
elif 'framework' in dataset_name:
    df = df[df['layer3_result'] == 0]
len(df['userID'].unique())
if 'itemID' not in df.columns:
    df = df.rename(columns = {'itemId':'itemID'})

In [7]:
df.columns

Index(['Unnamed: 0.2', 'Unnamed: 0.1', 'userID', 'itemID', 'rating',
       'timestamp', 'user_cat', 'rating_group', 'item_cat', 'nf1',
       'user_group', 'coherence', 'title', 'genres', 'RND', 'thresh', 'nf2',
       'Unnamed: 0', 'prediction', 'nf3', 'nf4', 'noisedegree', '1&2', '1&3',
       '1&4', '2&3', '2&4', '3&4', '1&2&3', '2&3&4', '1&2&4', '1&3&4',
       '1&2&3&4 = 0', '1&2&3&4 = 1', 'isNoisy', 'FinalOutput', 'OptOut',
       'layer3_result'],
      dtype='object')

In [8]:
df.head()

Unnamed: 0,Unnamed: 0.2,Unnamed: 0.1,userID,itemID,rating,timestamp,user_cat,rating_group,item_cat,nf1,...,1&2&3,2&3&4,1&2&4,1&3&4,1&2&3&4 = 0,1&2&3&4 = 1,isNoisy,FinalOutput,OptOut,layer3_result
15,15,15,131923,30812,3.0,1526512276,Benevolent,Su,Strongly-preferred,0,...,0,0,0,0,0,0,0,0,0,0
34,34,34,147152,2353,3.0,1001236089,Benevolent,Su,Strongly-preferred,0,...,0,0,0,0,1,0,0,0,0,0
37,37,37,78544,1645,2.5,1347684642,Variable,Au,Strongly-preferred,0,...,0,0,0,0,1,0,0,0,0,0
38,38,38,112064,36517,3.5,1160240881,Benevolent,Su,Strongly-preferred,0,...,0,0,0,0,1,0,0,0,0,0
42,42,42,50115,2181,4.0,1373732206,Benevolent,Su,Strongly-preferred,0,...,0,0,0,0,1,0,0,0,0,0


In [9]:
len(df)

107084

In [10]:

# Normal train/test split (random portion) 
train, test = python_stratified_split(df, ratio=ratio)

#train2, test2 = train_test_split(df, test_size=(1 - ratio), random_state=42)
train.to_csv('../output/exp-4/'+dataset_name+'/train.csv')
test.to_csv('../output/exp-4/'+dataset_name+'/test.csv')
train_df = pd.read_csv('../output/exp-4/'+dataset_name+'/train.csv')
test_df = pd.read_csv('../output/exp-4/'+dataset_name+'/test.csv')

In [11]:
df

Unnamed: 0,Unnamed: 0.2,Unnamed: 0.1,userID,itemID,rating,timestamp,user_cat,rating_group,item_cat,nf1,...,2&3&4,1&2&4,1&3&4,1&2&3&4 = 0,1&2&3&4 = 1,isNoisy,FinalOutput,OptOut,layer3_result,random
15,15,15,131923,30812,3.0,1526512276,Benevolent,Su,Strongly-preferred,0,...,0,0,0,0,0,0,0,0,0,0.374540
34,34,34,147152,2353,3.0,1001236089,Benevolent,Su,Strongly-preferred,0,...,0,0,0,1,0,0,0,0,0,0.950714
37,37,37,78544,1645,2.5,1347684642,Variable,Au,Strongly-preferred,0,...,0,0,0,1,0,0,0,0,0,0.731994
38,38,38,112064,36517,3.5,1160240881,Benevolent,Su,Strongly-preferred,0,...,0,0,0,1,0,0,0,0,0,0.598658
42,42,42,50115,2181,4.0,1373732206,Benevolent,Su,Strongly-preferred,0,...,0,0,0,1,0,0,0,0,0,0.156019
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
634290,634290,634290,11500,73290,5.0,1524103274,Critical,Su,Strongly-preferred,0,...,0,0,0,0,0,0,0,0,0,0.659668
634304,634304,634304,40006,7444,4.0,1503891979,Benevolent,Su,Strongly-preferred,0,...,0,0,0,1,0,0,0,0,0,0.150811
634311,634311,634311,89091,2455,3.0,1101711979,Benevolent,Su,Strongly-preferred,0,...,0,0,0,1,0,0,0,0,0,0.800995
634316,634316,634316,93451,949,3.5,1122655562,Benevolent,Su,Strongly-preferred,0,...,0,0,0,0,0,0,0,0,0,0.960883


In [12]:
clusters = pd.read_csv('../output/exp-4/'+dataset_name+'/means_output_clusters_CH.csv').drop(columns=['rating'])
train_clusters = train_df.reset_index().merge(clusters, left_on=['userID','itemID'], right_on=['userId','movieId']).drop(columns=['userId'])
total_groups = set(clusters.cluster.to_list())
print("Total groups:", len(total_groups))

Total groups: 100


In [13]:
train_clusters

Unnamed: 0,index,Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,userID,itemID,rating,timestamp_x,user_cat,rating_group,...,1&3&4,1&2&3&4 = 0,1&2&3&4 = 1,isNoisy,FinalOutput,OptOut,layer3_result,movieId,timestamp_y,cluster
0,0,239673,239673,239673,107,1189,3.0,1116042395,Benevolent,Su,...,0,0,0,0,0,0,0,1189,1116042395,8
1,1,429104,429104,429104,107,3897,4.0,1112450347,Benevolent,Su,...,0,0,0,0,0,0,0,3897,1112450347,80
2,2,52476,52476,52476,107,1225,4.0,1116042211,Benevolent,Su,...,0,0,0,0,0,0,0,1225,1116042211,8
3,3,544059,544059,544059,107,1394,4.5,1112450520,Benevolent,Su,...,0,0,0,0,0,0,0,1394,1112450520,80
4,4,177803,177803,177803,107,337,3.5,1116042439,Benevolent,Su,...,0,0,0,0,0,0,0,337,1116042439,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90939,90939,101322,101322,101322,162476,556,3.5,1102097140,Benevolent,Su,...,0,0,0,0,0,0,0,556,1102097140,28
90940,90940,197421,197421,197421,162476,8966,4.0,1102139061,Benevolent,Su,...,0,0,0,0,0,0,0,8966,1102139061,28
90941,90941,450914,450914,450914,162476,6772,4.0,1102097155,Benevolent,Su,...,0,0,0,0,0,0,0,6772,1102097155,28
90942,90942,167963,167963,167963,162488,1285,5.0,1202822575,Benevolent,Su,...,0,0,0,0,0,0,0,1285,1202822575,45


In [14]:
# Target group cluster (we iterate over all of them in every run)
target_group = 4

# Train data
#target_group_df = train_clusters[train_clusters['group_clusters'] == target_group]
#train = target_group_df[['userID', 'itemID', 'rating', 'timestamp']]
target_group_df = train_clusters
if 'timestamp' not in target_group_df.columns:
    rt = pd.read_csv( r"C:\Users\clari\Desktop\M2 - Thesis\Research\Dr Jacques Bou Abdo\Recommender System\4 - Review\dataset\\"+OG_DT+"\\Full Data set (Used with NF3 First)\\ratings.csv"
).rename(columns = {'userId':'userID', 'movieId':'itemID'})
    target_group_df = target_group_df.merge(rt, on=['userID','itemID','rating'], how='left')
train = target_group_df[['userID', 'itemID', 'rating', 'timestamp']]



# Test data Choose only ratings that can be predicted
users_in_train = list(set(train.userID.to_list()))
test = test_df[test_df.userID.isin(users_in_train)]
# This gave a memory error, but it seems its not actually used
#test2 = test_df[test_df - test_df.userID.isin(users_in_train)]

In [15]:
len(users_in_train)

5522

In [16]:
target_group_df

Unnamed: 0,index,Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,userID,itemID,rating,timestamp_x,user_cat,rating_group,...,1&2&3&4 = 0,1&2&3&4 = 1,isNoisy,FinalOutput,OptOut,layer3_result,movieId,timestamp_y,cluster,timestamp
0,0,239673,239673,239673,107,1189,3.0,1116042395,Benevolent,Su,...,0,0,0,0,0,0,1189,1116042395,8,1116042395
1,1,429104,429104,429104,107,3897,4.0,1112450347,Benevolent,Su,...,0,0,0,0,0,0,3897,1112450347,80,1112450347
2,2,52476,52476,52476,107,1225,4.0,1116042211,Benevolent,Su,...,0,0,0,0,0,0,1225,1116042211,8,1116042211
3,3,544059,544059,544059,107,1394,4.5,1112450520,Benevolent,Su,...,0,0,0,0,0,0,1394,1112450520,80,1112450520
4,4,177803,177803,177803,107,337,3.5,1116042439,Benevolent,Su,...,0,0,0,0,0,0,337,1116042439,8,1116042439
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90939,90939,101322,101322,101322,162476,556,3.5,1102097140,Benevolent,Su,...,0,0,0,0,0,0,556,1102097140,28,1102097140
90940,90940,197421,197421,197421,162476,8966,4.0,1102139061,Benevolent,Su,...,0,0,0,0,0,0,8966,1102139061,28,1102139061
90941,90941,450914,450914,450914,162476,6772,4.0,1102097155,Benevolent,Su,...,0,0,0,0,0,0,6772,1102097155,28,1102097155
90942,90942,167963,167963,167963,162488,1285,5.0,1202822575,Benevolent,Su,...,0,0,0,0,0,0,1285,1202822575,45,1202822575


In [17]:
train

Unnamed: 0,userID,itemID,rating,timestamp
0,107,1189,3.0,1116042395
1,107,3897,4.0,1112450347
2,107,1225,4.0,1116042211
3,107,1394,4.5,1112450520
4,107,337,3.5,1116042439
...,...,...,...,...
90939,162476,556,3.5,1102097140
90940,162476,8966,4.0,1102139061
90941,162476,6772,4.0,1102097155
90942,162488,1285,5.0,1202822575


In [18]:
print("total users in main dataset:", len(list(set(df.userID.to_list()))))
print("total users in train dataset:", len(list(set(train.userID.to_list()))))
print("total users in test dataset:", len(list(set(test.userID.to_list()))))
# train = train.set_index('index')
# test = test.set_index('index')

total users in main dataset: 5522
total users in train dataset: 5522
total users in test dataset: 4962


In [19]:
test

Unnamed: 0,Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,userID,itemID,rating,timestamp,user_cat,rating_group,item_cat,...,1&2&3,2&3&4,1&2&4,1&3&4,1&2&3&4 = 0,1&2&3&4 = 1,isNoisy,FinalOutput,OptOut,layer3_result
0,111872,111872,111872,107,1172,3.5,1116042312,Benevolent,Su,Strongly-preferred,...,0,0,0,0,0,0,0,0,0,0
1,61075,61075,61075,107,3185,3.5,1113518028,Benevolent,Su,Strongly-preferred,...,0,0,0,0,1,0,0,0,0,0
2,79228,79228,79228,107,27432,3.0,1162221547,Benevolent,Su,Strongly-preferred,...,0,1,0,0,0,0,0,0,0,0
3,499483,499483,499483,120,3594,4.0,990195894,Benevolent,Su,Strongly-preferred,...,0,0,0,0,1,0,0,0,0,0
4,297936,297936,297936,120,1246,5.0,963884571,Benevolent,Su,Strongly-preferred,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16135,576681,576681,576681,162380,3917,3.5,1228163981,Benevolent,Su,Strongly-preferred,...,0,0,0,0,1,0,0,0,0,0
16136,266131,266131,266131,162380,480,3.5,1228161872,Benevolent,Su,Strongly-preferred,...,0,0,0,0,1,0,0,0,0,0
16137,262402,262402,262402,162380,1320,3.5,1228161129,Benevolent,Su,Strongly-preferred,...,0,0,0,0,1,0,0,0,0,0
16138,340869,340869,340869,162476,8014,4.5,1102096562,Benevolent,Su,Strongly-preferred,...,0,0,0,0,0,0,0,0,0,0


In [20]:
train

Unnamed: 0,userID,itemID,rating,timestamp
0,107,1189,3.0,1116042395
1,107,3897,4.0,1112450347
2,107,1225,4.0,1116042211
3,107,1394,4.5,1112450520
4,107,337,3.5,1116042439
...,...,...,...,...
90939,162476,556,3.5,1102097140
90940,162476,8966,4.0,1102139061
90941,162476,6772,4.0,1102097155
90942,162488,1285,5.0,1202822575


In [21]:
set(train_clusters[train_clusters['cluster'] == target_group].cluster.to_list())

{4}

### 2.2 Process data

`ImplicitCF` is a class that intializes and loads data for the training process. During the initialization of this class, user IDs and item IDs are reindexed, ratings greater than zero are converted into implicit positive interaction, and adjacency matrix $R$ of user-item graph is created. Some important methods of `ImplicitCF` are:

`get_norm_adj_mat`, load normalized adjacency matrix of user-item graph if it already exists in `adj_dir`, otherwise call `create_norm_adj_mat` to create the matrix and save the matrix if `adj_dir` is not `None`. This method will be called during the initialization process of LightGCN model.

`create_norm_adj_mat`, create normalized adjacency matrix of user-item graph by calculating $D^{-\frac{1}{2}} A D^{-\frac{1}{2}}$, where $\mathbf{A}=\left(\begin{array}{cc}\mathbf{0} & \mathbf{R} \\ \mathbf{R}^{T} & \mathbf{0}\end{array}\right)$.

`train_loader`, generate a batch of training data — sample a batch of users and then sample one positive item and one negative item for each user. This method will be called before each epoch of the training process.


In [22]:
data = ImplicitCF(train=train, test=test, seed=SEED)

### 2.3 Prepare hyper-parameters

Important parameters of `LightGCN` model are:

`data`, initialized LightGCNDataset object.

`epochs`, number of epochs for training.

`n_layers`, number of layers of the model.

`eval_epoch`, if it is not None, evaluation metrics will be calculated on test set every "eval_epoch" epochs. In this way, we can observe the effect of the model during the training process.

`top_k`, the number of items to be recommended for each user when calculating ranking metrics.

A complete list of parameters can be found in `yaml_file`. We use `prepare_hparams` to read the yaml file and prepare a full set of parameters for the model. Parameters passed as the function's parameters will overwrite yaml settings.

In [23]:
hparams = prepare_hparams(
    yaml_file,
    n_layers=3,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    learning_rate=0.015,
    eval_epoch=5,
    top_k=TOP_K,
)

### 2.4 Create and train model

With data and parameters prepared, we can create the LightGCN model.

To train the model, we simply need to call the `fit()` method.

In [24]:
model = LightGCN(hparams, data, seed=SEED)

Already create adjacency matrix.
Already normalize adjacency matrix.
Using xavier initialization.


In [25]:
with Timer() as train_time:
    model.fit()

print("Took {} seconds for training.".format(train_time.interval))

Epoch 1 (train)9.3s: train loss = 0.31999 = (mf)0.31950 + (embed)0.00049
Epoch 2 (train)8.9s: train loss = 0.19416 = (mf)0.19332 + (embed)0.00085
Epoch 3 (train)9.6s: train loss = 0.15471 = (mf)0.15346 + (embed)0.00124
Epoch 4 (train)9.3s: train loss = 0.12979 = (mf)0.12820 + (embed)0.00158


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


Epoch 5 (train)9.9s + (eval)2.3s: train loss = 0.10843 = (mf)0.10647 + (embed)0.00196, recall = 0.08656, ndcg = 0.05414, precision = 0.02015, map = 0.03129
Epoch 6 (train)5.5s: train loss = 0.09424 = (mf)0.09189 + (embed)0.00235
Epoch 7 (train)9.2s: train loss = 0.07633 = (mf)0.07356 + (embed)0.00277
Epoch 8 (train)8.7s: train loss = 0.06717 = (mf)0.06401 + (embed)0.00316
Epoch 9 (train)10.2s: train loss = 0.05649 = (mf)0.05295 + (embed)0.00354


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


Epoch 10 (train)9.4s + (eval)1.9s: train loss = 0.05098 = (mf)0.04707 + (embed)0.00391, recall = 0.08650, ndcg = 0.05369, precision = 0.01971, map = 0.03122
Epoch 11 (train)9.3s: train loss = 0.04470 = (mf)0.04045 + (embed)0.00425
Epoch 12 (train)10.2s: train loss = 0.03959 = (mf)0.03501 + (embed)0.00458
Epoch 13 (train)9.3s: train loss = 0.03715 = (mf)0.03227 + (embed)0.00487
Epoch 14 (train)9.0s: train loss = 0.03292 = (mf)0.02775 + (embed)0.00516


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


Epoch 15 (train)9.5s + (eval)2.5s: train loss = 0.03087 = (mf)0.02544 + (embed)0.00543, recall = 0.08077, ndcg = 0.04961, precision = 0.01850, map = 0.02823
Took 144.1328117999983 seconds for training.


### 2.5 Recommendation and Evaluation

Recommendation and evaluation have been performed on the specified test set during training. After training, we can also use the model to perform recommendation and evalution on other data. Here we still use `test` as test data, but `test` can be replaced by other data with similar data structure.

#### 2.5.1 Recommendation

We can call `recommend_k_items` to recommend k items for each user passed in this function. We set `remove_seen=True` to remove the items already seen by the user. The function returns a dataframe, containing each user and top k items recommended to them and the corresponding ranking scores.

In [26]:
topk_scores = model.recommend_k_items(train, top_k=TOP_K, remove_seen=True)
topk_scores.head()

Unnamed: 0,userID,itemID,prediction
0,107,1293,8.401673
1,107,3160,8.091802
2,107,3967,7.871881
3,107,2908,7.707891
4,107,1673,7.54796


#### 2.5.2 Evaluation

With `topk_scores` predicted by the model, we can evaluate how LightGCN performs on this test set.

In [27]:
columns_to_keep = ['userID', 'itemID', 'rating', 'timestamp']

# Create a new DataFrame with only the selected columns
#test1 = test[columns_to_keep].astype(float)


In [28]:
'''
import json

eval_map = map_at_k(test1, topk_scores, k=TOP_K)
eval_ndcg = ndcg_at_k(test1, topk_scores, k=TOP_K)
eval_precision = precision_at_k(test1, topk_scores, k=TOP_K)
eval_recall = recall_at_k(test1, topk_scores, k=TOP_K)
#eval_serendipity = serendipity(train, topk_scores)
#eval_coverage = catalog_coverage(train, topk_scores)
eval_serendipity = serendipity(train, topk_scores)
eval_coverage = catalog_coverage(train, topk_scores)

metric_results = {
    'MAP': eval_map,
    'NDCG': eval_ndcg,
    'Precision': eval_precision,
    'Recall': eval_recall,
    'User Serendipity': eval_serendipity,
    'Coverage': eval_coverage
}

print(json.dumps(metric_results, indent=4))
with open("../output/exp-4/"+dataset_name+"/metric_results.txt", "w") as fp:
    json.dump(metric_results, fp, indent=4)
'''

'\nimport json\n\neval_map = map_at_k(test1, topk_scores, k=TOP_K)\neval_ndcg = ndcg_at_k(test1, topk_scores, k=TOP_K)\neval_precision = precision_at_k(test1, topk_scores, k=TOP_K)\neval_recall = recall_at_k(test1, topk_scores, k=TOP_K)\n#eval_serendipity = serendipity(train, topk_scores)\n#eval_coverage = catalog_coverage(train, topk_scores)\neval_serendipity = serendipity(train, topk_scores)\neval_coverage = catalog_coverage(train, topk_scores)\n\nmetric_results = {\n    \'MAP\': eval_map,\n    \'NDCG\': eval_ndcg,\n    \'Precision\': eval_precision,\n    \'Recall\': eval_recall,\n    \'User Serendipity\': eval_serendipity,\n    \'Coverage\': eval_coverage\n}\n\nprint(json.dumps(metric_results, indent=4))\nwith open("../output/exp-4/"+dataset_name+"/metric_results.txt", "w") as fp:\n    json.dump(metric_results, fp, indent=4)\n'

In [29]:
train.head()

Unnamed: 0,userID,itemID,rating,timestamp
0,107,1189,3.0,1116042395
1,107,3897,4.0,1112450347
2,107,1225,4.0,1116042211
3,107,1394,4.5,1112450520
4,107,337,3.5,1116042439


In [30]:
#test2.head()

In [31]:
# load clusters if not previously loaded
# clusters = pd.read_csv('./output/exp-2/group_clusters.csv', usecols=['user_id', 'group'])

# get per-user serendipity score
#eval_serendipity = user_serendipity(train, topk_scores)
eval_serendipity = user_serendipity(train, topk_scores)
eval_ndcg = ndcg_at_k(test, topk_scores, k=TOP_K)
eval_precision = precision_at_k(test, topk_scores, k=TOP_K)
eval_recall = recall_at_k(test, topk_scores, k=TOP_K)

# calculate per-cluster serendipity score
eval_serendipity_clulsters = clusters.merge(eval_serendipity, left_on=['userId'], right_on=['userID']).drop(columns=['userID'])
cluster_serendipity = eval_serendipity_clulsters.groupby('cluster')
cluster_serendipity_df = cluster_serendipity['user_serendipity'].agg('mean').reset_index()
cluster_serendipity_df.to_csv('../output/exp-4/'+dataset_name+'/cluster_serendipity2.csv', index=False)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  reco_train_user_item_sim[col_sim].fillna(0, inplace=True)
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


In [32]:
len(train['userID'].unique())

5522

In [33]:
len(eval_serendipity['userID'].unique())

5522

In [34]:
len(cluster_serendipity['userId'].unique())

100

In [35]:
eval_serendipity.to_csv('../output/exp-4/'+dataset_name+'/user_serendipity3.csv', index=False)

### 2.6 Infer embeddings

With `infer_embedding` method of LightGCN model, we can export the embeddings of users and items in the training set to CSV files for future use.

In [36]:
model.infer_embedding(user_file, item_file)

## 3. Compare LightGCN with SAR and NCF

Here there are the performances of LightGCN compared to [SAR](../00_quick_start/sar_movielens.ipynb) and [NCF](../00_quick_start/ncf_movielens.ipynb) on MovieLens dataset of 100k and 1m. The method of data loading and splitting is the same as that described above and the GPU used was a GeForce GTX 1080Ti.

Settings common to the three models: `epochs=15, seed=42`.

Settings for LightGCN: `embed_size=64, n_layers=3, batch_size=1024, decay=0.0001, learning_rate=0.015 `.

Settings for SAR: `similarity_type="jaccard", time_decay_coefficient=30, time_now=None, timedecay_formula=True`.

Settings for NCF: `n_factors=4, layer_sizes=[16, 8, 4], batch_size=1024, learning_rate=0.001`.

| Data Size | Model    | Training time | Recommending time | MAP@10   | nDCG@10  | Precision@10 | Recall@10 |
| --------- | -------- | ------------- | ----------------- | -------- | -------- | ------------ | --------- |
| 100k      | LightGCN | 27.8865       | 0.6445            | 0.129236 | 0.436297 | 0.381866     | 0.205816  |
| 100k      | SAR      | 0.4895        | 0.1144            | 0.110591 | 0.382461 | 0.330753     | 0.176385  |
| 100k      | NCF      | 116.3174      | 7.7660            | 0.105725 | 0.387603 | 0.342100     | 0.174580  |
| 1m        | LightGCN | 396.7298      | 1.4343            | 0.075012 | 0.377501 | 0.345679     | 0.128096  |
| 1m        | SAR      | 4.5593        | 2.8357            | 0.060579 | 0.299245 | 0.270116     | 0.104350  |
| 1m        | NCF      | 1601.5846     | 85.4567           | 0.062821 | 0.348770 | 0.320613     | 0.108121  |

From the above results, we can see that LightGCN performs better than the other two models.

### References: 
1. Xiangnan He, Kuan Deng, Xiang Wang, Yan Li, Yongdong Zhang & Meng Wang, LightGCN: Simplifying and Powering Graph Convolution Network for Recommendation, 2020, https://arxiv.org/abs/2002.02126
2. LightGCN implementation [TensorFlow]: https://github.com/kuandeng/lightgcn
3. Thomas N. Kipf and Max Welling, Semi-Supervised Classification with Graph Convolutional Networks, ICLR, 2017, https://arxiv.org/abs/1609.02907
4. Xiang Wang, Xiangnan He, Meng Wang, Fuli Feng, and Tat-Seng Chua, Neural Graph Collaborative Filtering, SIGIR, 2019, https://arxiv.org/abs/1905.08108
5. Y. Koren, R. Bell and C. Volinsky, "Matrix Factorization Techniques for Recommender Systems", in Computer, vol. 42, no. 8, pp. 30-37, Aug. 2009, doi: 10.1109/MC.2009.263.  url: https://datajobs.com/data-science-repo/Recommender-Systems-%5BNetflix%5D.pdf

### Group Validation on System Metrics
Step 1: Clustering is already done
Step 2: All predictions file
Step 3: generating the values not done as well

In [37]:
# Let's generate the all_predictions file using lightGCN

import sys
import os
import surprise
import papermill as pm
import scrapbook as sb
import pandas as pd
from surprise import Dataset, Reader
from joblib import Parallel, delayed
from timeit import default_timer as timer
#import dask.dataframe as dd
#from dask.distributed import Client
from contextlib import contextmanager

from recommenders.utils.timer import Timer
# from recommenders.datasets import movielens -- I commented this line because it gave errors on the library; and its not used // error: module pandera has no attribute 'SchemasModel'
from recommenders.datasets.python_splitters import python_random_split
from recommenders.evaluation.python_evaluation import (rmse, mae, rsquared, exp_var, map_at_k, ndcg_at_k, precision_at_k, 
                                                     recall_at_k, get_top_k_items)
from recommenders.models.surprise.surprise_utils import predict, compute_ranking_predictions

In [38]:
# model.predict(test_data, usercol='userId', itemcol='movieId')
#predictions = model.predict(test, usercol='userId', itemcol='movieId')
#predictions.head()

In [39]:
df.head()

Unnamed: 0,Unnamed: 0.2,Unnamed: 0.1,userID,itemID,rating,timestamp,user_cat,rating_group,item_cat,nf1,...,2&3&4,1&2&4,1&3&4,1&2&3&4 = 0,1&2&3&4 = 1,isNoisy,FinalOutput,OptOut,layer3_result,random
15,15,15,131923,30812,3.0,1526512276,Benevolent,Su,Strongly-preferred,0,...,0,0,0,0,0,0,0,0,0,0.37454
34,34,34,147152,2353,3.0,1001236089,Benevolent,Su,Strongly-preferred,0,...,0,0,0,1,0,0,0,0,0,0.950714
37,37,37,78544,1645,2.5,1347684642,Variable,Au,Strongly-preferred,0,...,0,0,0,1,0,0,0,0,0,0.731994
38,38,38,112064,36517,3.5,1160240881,Benevolent,Su,Strongly-preferred,0,...,0,0,0,1,0,0,0,0,0,0.598658
42,42,42,50115,2181,4.0,1373732206,Benevolent,Su,Strongly-preferred,0,...,0,0,0,1,0,0,0,0,0,0.156019


In [40]:
train.head()

Unnamed: 0,userID,itemID,rating,timestamp
0,107,1189,3.0,1116042395
1,107,3897,4.0,1112450347
2,107,1225,4.0,1116042211
3,107,1394,4.5,1112450520
4,107,337,3.5,1116042439


In [41]:
clustered_df = clusters
print(clustered_df)
# group clusters into another dataframe with different representation
grouped_clusters = clustered_df.groupby('cluster')['userId'].apply(list).reset_index(name='users_list')
grouped_clusters['users_per_cluster'] = grouped_clusters.apply(lambda x: list(set(x.users_list)), axis=1)
grouped_clusters = grouped_clusters[['cluster', 'users_per_cluster']]

        userId  movieId   timestamp  cluster
0          107     2599  1112450353       80
1          107     7096  1116042392        8
2          107      337  1116042439        8
3          107     1258  1112450322       80
4          107    45028  1162221173       30
...        ...      ...         ...      ...
107079  162476      457  1102095665       28
107080  162476      269  1102097883       28
107081  162476     4847  1102097826       28
107082  162488     1285  1202822575       45
107083  162488     2232  1202824481       45

[107084 rows x 4 columns]


In [42]:
grouped_clusters

Unnamed: 0,cluster,users_per_cluster
0,0,"[52736, 89091, 57860, 87556, 25095, 119816, 11..."
1,1,"[34819, 89091, 27141, 48646, 90628, 152068, 95..."
2,2,"[117250, 61956, 91146, 76302, 94735, 68114, 14..."
3,3,"[98818, 76292, 87556, 123913, 133648, 64017, 7..."
4,4,"[114177, 127490, 90115, 108549, 48646, 10760, ..."
...,...,...
95,95,"[57356, 94735, 44048, 111123, 46101, 78870, 70..."
96,96,"[126477, 22030, 27150, 68114, 72210, 46101, 79..."
97,97,"[55808, 128523, 78351, 27664, 44048, 86031, 14..."
98,98,"[133122, 48646, 33291, 154125, 7185, 87570, 46..."


In [43]:
#print(grouped_clusters)
group_metric = {}
all_clusters_list = grouped_clusters.users_per_cluster.to_list()
#grouped_clusters.users_per_cluster.to_list()
all_users = len(set(df.userID.to_list()))
#print(all_clusters_list)
for index, row in grouped_clusters.iterrows():
    cluster_id = row['cluster']
    users_list = row['users_per_cluster']
    # users in the cluster vs. users in the equiv group
    #print('clust'+users_list)
    n_cluster_users = len(users_list)
    print('n_cluster_users' + str(n_cluster_users))
    n_cluster_users_equiv = all_users - n_cluster_users
    cluster_scores = topk_scores[topk_scores['userID'].isin(users_list)]
    #df_ndcg_cluster_equiv = df_ndcg.loc[~df_ndcg['userId'].isin(cluster)]
   
    cluster_df = df[df['userID'].isin(users_list)]
    
    #df_ndcg_cluster = ndcg_at_k(cluster_df, cluster_scores, k=TOP_K)
    #df_ndcg_cluster_equiv = df_ndcg_cluster.loc[~df_ndcg_cluster['userID'].isin(cluster)]

    #print(df_ndcg_cluster)

    
    # group metrics
    cluster_ndcg = ndcg_at_k(cluster_df, topk_scores, k=TOP_K)
    cluster_precision = precision_at_k(cluster_df, topk_scores, k=TOP_K)
    cluster_recall = recall_at_k(cluster_df, topk_scores, k=TOP_K)

    # group equiv. metrics
    cluster_ndcg_equiv = (cluster_ndcg*n_cluster_users) / n_cluster_users_equiv
    cluster_precision_equiv = (cluster_precision*n_cluster_users) / n_cluster_users_equiv
    cluster_recall_equiv = (cluster_recall*n_cluster_users) / n_cluster_users_equiv
    
    group_metric[cluster_id] = [
        cluster_ndcg,
        cluster_ndcg_equiv,
        cluster_precision,
        cluster_precision_equiv,
        cluster_recall,
        cluster_recall_equiv
    ]

n_cluster_users277


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users184


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users171


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users223


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users211


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users203


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users100


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users192


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users241


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users228


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users244


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users155


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users109
n_cluster_users261


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users149


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users206


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users257


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users205


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users190


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users189


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users122


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users173


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users164


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users224


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users159


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users163


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users205


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=

n_cluster_users39
n_cluster_users207


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users67
n_cluster_users209


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users180


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users155


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users167


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users204


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users190


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users179


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users207


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users94
n_cluster_users222


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users151


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users184


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users161


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users157


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users125


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users201


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users204


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users211


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users246


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users214


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users188


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users236


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users203


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users179


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users55
n_cluster_users225


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users193


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users187


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users57
n_cluster_users112


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users159


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users179


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users146


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users172


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users215


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users81


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users232


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users248


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users178


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users179


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users20
n_cluster_users192


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users83
n_cluster_users129


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users228


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users200


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users181


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users144


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users144
n_cluster_users159


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users260


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users223


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users154


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users198


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users196


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users196
n_cluster_users215


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users251


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users180


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users216


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users207


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users118


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users195


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users178


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users166


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users160


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users153


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users208


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users211


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


n_cluster_users207


  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


In [44]:
group_metric_df = pd.DataFrame.from_dict(group_metric, orient='index')\
    .reset_index()\
    .rename({
        'index': 'cluster',
        0: 'cluster-nDCG',
        1: 'cluster-nDCG-eq',
        2: 'cluster-precision',
        3: 'cluster-precision-eq',
        4: 'cluster-recall',
        5: 'cluster-recall-eq'
        }, axis=1)
group_metric_df['ndcg'] = eval_ndcg
group_metric_df['precision'] = eval_precision
group_metric_df['recall'] = eval_recall

# save results in csv
group_metric_df.to_csv('../output/exp-4/'+dataset_name+'/gv5.csv', index=False)