## Evaluation of the Best Parameter Settings

- take the best parameter settings for each algorithm out of the parameter search
- run each algorithm on a given number of evaluation users

## 0. Import Modules

In [1]:
import os
import sys
from pathlib import Path

# setting proper working directory
PROJECT_DIRECTORY = Path(os.path.abspath('')).resolve().parents[0]
sys.path.extend([str(PROJECT_DIRECTORY)])

print(f'Python {sys.version} on {sys.platform}')
print('Project directory: ', PROJECT_DIRECTORY)

Python 3.9.19 | packaged by conda-forge | (main, Mar 20 2024, 12:38:46) [MSC v.1929 64 bit (AMD64)] on win32
Project directory:  C:\Users\s8347434\Documents\RecSys2024


In [2]:
import numpy as np
import pandas as pd
from implicit.evaluation import leave_k_out_split
from src.utilities.Helper import load_data, create_sparse_matrix, train_test_split
from src.utilities.MfAlgorithms import MFAlgorithms, MatrixFactorizationRecommender
from src.utilities.NeighborAlgorithms import NeighborhoodAlgorithms, NeighborhoodRecommender
from src.utilities.NonPersonalizedAlgorithms import NonPersonalizedAlgorithms, NonPersonalizedRecommender
from src.utilities.Metrics import Evaluation, Metrics

  from .autonotebook import tqdm as notebook_tqdm


## 1. Read Data

In [3]:
FILENAME = PROJECT_DIRECTORY / "data/processed/user_item_interaction_FILTERED_ANONYMIZED.txt"
DATASET = "real"
ROWS = None
TRAIN_TEST_SPLIT_STRATEGY = 42
FOLDS = 5
temp_list = []

### 1.1 Read Dataset

In [4]:
db_interaction = load_data(FILENAME, rows = ROWS, dataset=DATASET)
print(db_interaction.shape)
print(db_interaction.keys())

if ROWS is not None:
    # ONLY FOR SUBSETS: drop users below median interactions
    threshold = np.median(db_interaction['userID'].value_counts())
    print("Median user interactions: ", threshold)
    # for manual values to remove
    threshold = 20
    filter_users = db_interaction['userID'].value_counts() >= threshold
    filter_users = filter_users[filter_users].index.tolist()

    db_interaction = db_interaction[db_interaction['userID'].isin(filter_users)].reset_index(drop=True)
    print("The new size is: ", db_interaction.shape)

sparse_user_item_interaction, user_index, item_index = create_sparse_matrix(db_interaction, dataset=DATASET)

print("Number of (users, items): ", sparse_user_item_interaction.shape)

print(sparse_user_item_interaction.getnnz())

n_total = sparse_user_item_interaction.shape[0]*sparse_user_item_interaction.shape[1]
n_ratings = sparse_user_item_interaction.nnz
sparsity = n_ratings/n_total
print(f"Matrix sparsity: {round(sparsity*100,2)}%")

(17665904, 2)
Index(['userID', 'itemID'], dtype='object')
Number of (users, items):  (58747, 37370)
17665904
Matrix sparsity: 0.8%


### 1.2 Split Dataset

In [5]:
#train_set, test_set = train_test_split(sparse_user_item_interaction, user_index, item_index, train_percentage=0.8, k=FOLDS, split_strategy=TRAIN_TEST_SPLIT_STRATEGY)
train_set, test_set = leave_k_out_split(sparse_user_item_interaction, K=10, random_state=42)

In [6]:
#np.random.seed(2)
NUMB_EVAL_USERS = 100
EVAL_USERS_LIST = []
TOP_N = 10
for n_eval_users in [100, 100, 100, 100, 100]:
    if NUMB_EVAL_USERS == sparse_user_item_interaction.shape[0]:
        EVAL_USERS = user_index.cat.categories
    else:
        EVAL_USERS = np.random.choice(user_index.cat.categories, n_eval_users, replace=False)
        EVAL_USERS_IDX = [user_index.cat.codes[user_index==user].unique()[0] for user in EVAL_USERS]
    # print(f'CustomerIDs: {EVAL_USERS}')
    if TRAIN_TEST_SPLIT_STRATEGY == "cross-fold":
        print(f'Total downloads per customer: {sparse_user_item_interaction[EVAL_USERS_IDX].getnnz(axis=1)}')
        for fold in range(FOLDS):
            print(f'Total downloads per customer in train: {train_set[fold][EVAL_USERS_IDX].getnnz(axis=1)}')
            print(f'Total downloads per customer in test: {test_set[fold][EVAL_USERS_IDX].getnnz(axis=1)}')
    else:
        pass
        #print(f'Total downloads per customer: {sparse_user_item_interaction[EVAL_USERS_IDX].getnnz(axis=1)}')
        #print(f'Total downloads per customer in train: {train_set[EVAL_USERS_IDX].getnnz(axis=1)}')
        #print(f'Total downloads per customer in test: {test_set[EVAL_USERS_IDX].getnnz(axis=1)}')
    EVAL_USERS_LIST.append(EVAL_USERS)

    print(len(EVAL_USERS))

100
100
100
100
100


### 1.3 Read Parameter Settings

In [8]:
filename = PROJECT_DIRECTORY / "data/evaluation/best3_parameter_settings.txt"
best3_parameter_settings = pd.read_csv(filename, sep="\t", encoding="utf-16", dtype={"factors": "Int64", "neighborhood_size": "Int64", "iterations": "Int64"})

print(best3_parameter_settings.head(2))
print(best3_parameter_settings.shape)

                Evaluator  MatchCount  Precision    MR       MRR       MAP  \
0  ALSMatrixFactorization          95      0.095  0.55  0.278607  0.221468   
1  ALSMatrixFactorization          98      0.098  0.56  0.288440  0.221122   

       NDCG  Coverage   APLT       ARP    a  regularization  factors  \
0  0.340508  0.010749  0.002  3609.005  1.0             1.0       65   
1  0.339124  0.021212  0.012  1630.747  1.2             1.0      300   

   iterations  learning_rate  alpha    q  neighborhood_size  beta  
0          10            0.0    0.0  0.0                  0   0.0  
1          10            0.0    0.0  0.0                  0   0.0  
(15, 19)


In [9]:
als_top3_parameter_settings = []
bpr_top3_parameter_settings = []
user_knn_top3_parameter_settings = []
item_asym_knn_top3_parameter_settings = []
user_asym_knn_top3_parameter_settings = []

for i, row in best3_parameter_settings.iterrows():
    if row["Evaluator"] == "ALSMatrixFactorization":
        als_top3_parameter_settings.append({"a": row["a"], "factors": row["factors"], "regularization": row["regularization"], "iterations": row["iterations"]})
    elif row["Evaluator"] == "BPRMatrixFactorization":
        bpr_top3_parameter_settings.append({"factors": row["factors"], "regularization": row["regularization"], "iterations": row["iterations"], "learning_rate": row["learning_rate"]})
    elif row["Evaluator"] == "UserKNN":
        user_knn_top3_parameter_settings.append({"alpha": row["alpha"], "q": row["q"], "neighborhood_size": row["neighborhood_size"]})
    elif row["Evaluator"] == "ItemIterativeAsymKNN":
        item_asym_knn_top3_parameter_settings.append({"alpha": row["alpha"], "beta": row["beta"], "q": row["q"], "neighborhood_size": row["neighborhood_size"]})
    elif row["Evaluator"] == "UserIterativeAsymKNN":
        user_asym_knn_top3_parameter_settings.append({"alpha": row["alpha"], "beta": row["beta"], "q": row["q"], "neighborhood_size": row["neighborhood_size"]})

step = len(user_knn_top3_parameter_settings) * len(EVAL_USERS_LIST)

## 2. Start Test Runs

In [None]:
# list of metrics to apply
metrics_list = ['MatchCount', 'Precision', 'MR', 'MRR', 'MAP', 'NDCG','Coverage', 'APLT', 'ARP']

# Instantiate the Evaluation class
evaluator = Evaluation(Metrics, sparse_user_item_interaction)

# Add metrics from the Metrics class
for metric in metrics_list:
    evaluator.add_metric(metric)

def evaluate(evaluators_predictions):# Evaluate metrics for each evaluator and store results
    results = []
    for evaluator_name, recommendations in evaluators_predictions.items():
        result = {
            'Evaluator': evaluator_name,
            'MatchCount': evaluator.evaluate('MatchCount', recommendations, test_set, user_index, item_index),
            'Precision': evaluator.evaluate('Precision', recommendations, test_set, user_index, item_index),
            'MR': evaluator.evaluate('MR', recommendations, test_set, user_index, item_index),
            'MRR': evaluator.evaluate('MRR', recommendations, test_set, user_index, item_index),
            'MAP': evaluator.evaluate('MAP', recommendations, test_set, user_index, item_index),
            'NDCG': evaluator.evaluate('NDCG', recommendations, test_set, user_index, item_index),
            'Coverage': evaluator.evaluate('Coverage', recommendations, test_set, user_index, item_index, threshold="Median"),
            'APLT': evaluator.evaluate('APLT', recommendations, test_set, user_index, item_index),
            'ARP': evaluator.evaluate('ARP', recommendations, test_set, user_index, item_index),
            'n_users': recommendations.shape[0],
            'top_N': len(recommendations['itemID'][0])
        }
        results.append(result)

    # Create a DataFrame from the results
    df = pd.DataFrame(results)
    return df

In [None]:
model = NonPersonalizedRecommender(NonPersonalizedAlgorithms)
model.add_algorithm('most_popular')
model.fit(user_item_matrix=train_set)
i = 1
temp_list = []
for eval_users in EVAL_USERS_LIST:
    Recoms = model.recommend(eval_users, user_index, item_index, TOP_N, already_interacted=[])
    evaluators_predictions = {'MostPop': Recoms}
    temp_df = evaluate(evaluators_predictions)
    temp_list.append(temp_df)
    i += 1
    print(f"Progress: {i/len(EVAL_USERS_LIST) *100:.2f}%")
    
most_pop_results_df = pd.concat(temp_list)

In [13]:
# most_pop_results_df.to_csv(f"../data/evaluation/test_runs/real_life_MostPop_evaluation_{len(EVAL_USERS_LIST[0])}EVAL-USERS_TOP-{TOP_N}_{sparse_user_item_interaction.getnnz()}ROWS.txt", sep="\t", encoding='utf-16', index=False)

In [None]:
model = MatrixFactorizationRecommender(MFAlgorithms)
model.add_algorithm('als_algorithm')
i = 1
temp_list = []
for parameter_setting in als_top3_parameter_settings:
        model.fit(user_item_matrix=train_set, factors=parameter_setting["factors"], regularization=parameter_setting["regularization"], alpha=parameter_setting["a"], iterations=parameter_setting["iterations"], random_state=42)
        print(f"Progress: {i/step *100:.2f}%")
        for eval_users in EVAL_USERS_LIST:
            print(f"Progress: {i/step *100:.2f}%")
            Recoms = model.recommend(eval_users, train_set, user_index, item_index, TOP_N)
            evaluators_predictions = {'ALSMatrixFactorization': Recoms}
            temp_df = evaluate(evaluators_predictions)
            temp_list.append(temp_df)
            i += 1
            print(f"Progress: {i/step *100:.2f}%")

als_results_df = pd.concat(temp_list)

In [None]:
# als_results_df.to_csv(f"../data/evaluation/test_runs/real_life_ALS_evaluation_{len(EVAL_USERS_LIST[0])}EVAL-USERS_TOP-{TOP_N}_{sparse_user_item_interaction.getnnz()}ROWS.txt", sep="\t", encoding='utf-16', index=False)

In [None]:
model = NeighborhoodRecommender(NeighborhoodAlgorithms)
model.add_algorithm('user_based_neighborhood')
i = 1
temp_list = []
for parameter_setting in user_knn_top3_parameter_settings:
    model.fit(user_item_matrix=train_set, alpha=parameter_setting["alpha"], q=parameter_setting["q"])
    for eval_users in EVAL_USERS_LIST:
        Recoms = model.recommend(eval_users, train_set, user_index, item_index, TOP_N, neighborhood_size=parameter_setting["neighborhood_size"], already_interacted=[])
        print(f"Progress: {i/step *100:.2f}%")
        evaluators_predictions = {'UserKNN': Recoms}
        temp_df = evaluate(evaluators_predictions)
        temp_list.append(temp_df)
        i += 1
        print(f"Progress: {i/step *100:.2f}%")

user_knn_results_df = pd.concat(temp_list)

In [None]:
# user_knn_results_df.to_csv(f"../data/evaluation/test_runs/real_life_UserKNN_evaluation_{len(EVAL_USERS_LIST[0])}EVAL-USERS_TOP-{TOP_N}_{sparse_user_item_interaction.getnnz()}ROWS.txt", sep="\t", encoding='utf-16', index=False)

In [None]:
model = NeighborhoodRecommender(NeighborhoodAlgorithms)
model.add_algorithm('user_based_iterative_asym_neighborhood')
i = 1
temp_list = []
for parameter_setting in user_asym_knn_top3_parameter_settings:
    model.fit(user_item_matrix=train_set, alpha=parameter_setting["alpha"], q=parameter_setting["q"])
    for eval_users in EVAL_USERS_LIST:
        Recoms = model.recommend(eval_users, train_set, user_index, item_index, TOP_N, neighborhood_size=parameter_setting["neighborhood_size"], beta=parameter_setting["beta"], already_interacted=[])
        print(f"Progress: {i/step *100:.2f}%")
        evaluators_predictions = {'UserAsymKNN': Recoms}
        temp_df = evaluate(evaluators_predictions)
        temp_list.append(temp_df)
        i += 1
        print(f"Progress: {i/step *100:.2f}%")

user_asym_knn_results_df = pd.concat(temp_list)

In [None]:
# user_asym_knn_results_df.to_csv(f"../data/evaluation/test_runs/real_life_UserAsymKNN_evaluation_{len(EVAL_USERS_LIST[0])}EVAL-USERS_TOP-{TOP_N}_{sparse_user_item_interaction.getnnz()}ROWS.txt", sep="\t", encoding='utf-16', index=False)

In [None]:
model = NeighborhoodRecommender(NeighborhoodAlgorithms)
model.add_algorithm('item_based_iterative_asym_neighborhood')
i = 1
temp_list = []
for parameter_setting in item_asym_knn_top3_parameter_settings:
    model.fit(user_item_matrix=train_set, alpha=parameter_setting["alpha"], q=parameter_setting["q"])
    for eval_users in EVAL_USERS_LIST:
        Recoms = model.recommend(eval_users, train_set, user_index, item_index, TOP_N, neighborhood_size=parameter_setting["neighborhood_size"], beta=parameter_setting["beta"], already_interacted=[])
        print(f"Progress: {i/step *100:.2f}%")
        evaluators_predictions = {'ItemAsymKNN': Recoms}
        temp_df = evaluate(evaluators_predictions)
        temp_list.append(temp_df)
        i += 1
        print(f"Progress: {i/step *100:.2f}%")

item_asym_knn_results_df = pd.concat(temp_list)

In [12]:
# item_asym_knn_results_df.to_csv(f"../data/evaluation/test_runs/real_life_ItemAsymKNN_evaluation_{len(EVAL_USERS_LIST[0])}EVAL-USERS_TOP-{TOP_N}_{sparse_user_item_interaction.getnnz()}ROWS.txt", sep="\t", encoding='utf-16', index=False)

### 3. Evaluate the Test Run Metrics

Read the test run files for each algorithm

In [16]:
n_users = 100
top_n = 10
rows = 17665904
eval_algorithms = ["MostPop", "ALS", "UserKNN", "UserAsymKNN", "ItemAsymKNN"]

temp_list = []
for algorithm in eval_algorithms:
    filename = PROJECT_DIRECTORY / f"data/evaluation/test_runs/real_life_{algorithm}_evaluation_{n_users}EVAL-USERS_TOP-{top_n}_{rows}ROWS.txt"
    temp_df = pd.read_csv(filename, sep="\t", encoding="utf-16")
    temp_list.append(temp_df)

overall_eval_df = pd.concat(temp_list)
print(overall_eval_df.head(2))
print(overall_eval_df.shape)

  Evaluator  MatchCount  Precision    MR       MRR       MAP      NDCG  \
0   MostPop           8      0.008  0.08  0.023512  0.023512  0.036422   
1   MostPop          15      0.015  0.14  0.074500  0.073548  0.090396   

   Coverage  APLT        ARP  n_users  top_N  
0  0.001124   0.0  12450.011      100     10  
1  0.001204   0.0  12427.732      100     10  
(65, 12)


Print the best $N$ values for each algorithm

In [31]:
N = 1

overall_eval_df.groupby('Evaluator').apply(lambda x: x.nlargest(N, 'NDCG')).reset_index(drop=True)

  overall_eval_df.groupby('Evaluator').apply(lambda x: x.nlargest(N, 'NDCG')).reset_index(drop=True)


Unnamed: 0,Evaluator,MatchCount,Precision,MR,MRR,MAP,NDCG,Coverage,APLT,ARP,n_users,top_N
0,ALSMatrixFactorization,130,0.13,0.63,0.394706,0.284739,0.437889,0.017982,0.0,4669.812,100,10
1,ItemAsymKNN,71,0.071,0.49,0.247579,0.213313,0.300595,0.006369,0.01,9651.946,100,10
2,MostPop,15,0.015,0.14,0.0745,0.073548,0.090396,0.001204,0.0,12427.732,100,10
3,UserAsymKNN,104,0.104,0.55,0.312413,0.233236,0.357557,0.015039,0.0,5684.484,100,10
4,UserKNN,112,0.112,0.65,0.329591,0.258861,0.400839,0.014075,0.004,6119.892,100,10


Aggregate the best $N$ values by it's mean.

> These values correspond to the ones in the seminar paper.

In [32]:
N = 5

best_eval_df = overall_eval_df.groupby('Evaluator').apply(lambda x: x.nlargest(N, 'NDCG')).reset_index(drop=True)

best_eval_df.groupby('Evaluator').agg({
    'MatchCount': 'mean',
    'Precision': 'mean',
    'MR': 'mean',
    'MRR': 'mean',
    'MAP': 'mean',
    'NDCG': 'mean',
    'Coverage': 'mean',
    'APLT': 'mean',
    'ARP': 'mean'
}).reset_index()

  best_eval_df = overall_eval_df.groupby('Evaluator').apply(lambda x: x.nlargest(N, 'NDCG')).reset_index(drop=True)


Unnamed: 0,Evaluator,MatchCount,Precision,MR,MRR,MAP,NDCG,Coverage,APLT,ARP
0,ALSMatrixFactorization,115.8,0.1158,0.608,0.358633,0.264218,0.403916,0.016634,0.0,5179.8074
1,ItemAsymKNN,64.2,0.0642,0.46,0.237421,0.203494,0.283813,0.005823,0.0104,9836.3442
2,MostPop,11.8,0.0118,0.108,0.043048,0.042225,0.057969,0.001054,0.0,12477.9728
3,UserAsymKNN,98.0,0.098,0.514,0.284855,0.210499,0.32947,0.016157,0.0164,5299.9964
4,UserKNN,111.8,0.1118,0.636,0.328724,0.254424,0.393336,0.015403,0.002,5830.9746
