In [1]:
# All imports
import pandas as pd
from scipy.sparse import csr_matrix
import implicit
from implicit import evaluation
from sklearn.model_selection import train_test_split


# Self-written classes
from Threshold_Filtering import Threshold_filter
threshold_filter = Threshold_filter()

from Scoring_Approaches import Scoring_Approaches
preference_scoring = Scoring_Approaches()

from Baseline_recommenders import Popularity_recommender, Random_recommender
pop_recommender = Popularity_recommender()
rand_recommender = Random_recommender()

from Evaluation_Metrics import Evaluation_Metrics
eval =  Evaluation_Metrics()

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Loading the raw data
df_raw = pd.read_csv('Interaction_Data.csv')

# Proceeding with only columns that are required for offline eval
df = df_raw.filter(["user_id", "item_id", "click", "Impressions"]).copy()

In [3]:
# Returns a dataframe in which all users have clicked at least 3 different ads, and each ad has been clicked by at least 10 different users
df = threshold_filter.both_total_clicks(df.copy(),3,10)

In [4]:
# Adding the inferences from all scoring approaches
threshold = 4
df = preference_scoring.all_approaches(threshold, df)

### Splitting data into train+test sets

In [5]:
df_pos = df[df['click'] > 0]
df_neg = df[df['click'] == 0]

X = df_pos.copy()
y = df_pos["user_id"]

#stratify so each user is present in both dataframes, and random_state for reproducibility
pos_train_df, test_df = train_test_split(X, test_size=0.30, stratify=y,random_state=42)

X_neg = df_neg.copy()
y_neg = df_neg["user_id"]
neg_train, neg_test = train_test_split(X_neg, test_size=0.30, stratify=y_neg,random_state=42) 


# Merging the negative interactions into the postive training set. 
train_df = pd.concat([pos_train_df, neg_train], ignore_index=True)

In [6]:
def sparse_traintest_matrix(train_df, test_df, col_name):
    """
    Create sparse matrices from the given train and test dataframes, with the given column name as the data.

    Parameters:
    train_df (pandas.DataFrame): The training dataframe containing 'user_id', 'item_id', and 'col_name' columns.
    test_df (pandas.DataFrame): The test dataframe containing 'user_id', 'item_id', and 'col_name' columns.
    col_name (str): The name of the column to be used for creating the sparse matrices.

    Returns:
    train (scipy.sparse.csr_matrix)
    test (scipy.sparse.csr_matrix)
    """
    train_df = train_df.copy().filter(['user_id', 'item_id', col_name])
    test_df = test_df.copy().filter(['user_id', 'item_id', col_name])
    train = csr_matrix((train_df[col_name], (train_df["user_id"], train_df["item_id"])))
    test = csr_matrix((test_df[col_name], (test_df["user_id"], test_df["item_id"])))
    return train, test

In [7]:
'''
Making different matrices for each of the different scoring approaches 
in order to compare results at the end
'''
train_binary, test_binary = sparse_traintest_matrix(train_df, test_df, "binary")
train_CTR, test_CTR = sparse_traintest_matrix(train_df, test_df, "CTR")
train_N, test_N = sparse_traintest_matrix(train_df, test_df, "N")
train_SqrtN, test_SqrtN = sparse_traintest_matrix(train_df, test_df, "SqrtN")
train_IPN, test_IPN = sparse_traintest_matrix(train_df, test_df, "IPN")
train_click, test_click = sparse_traintest_matrix(train_df, test_df, "click")


In [8]:
traintest_matrices = {'click':(train_click,test_click),
                      'binary':(train_binary,test_binary),
                      'CTR':(train_CTR,test_CTR),
                      'N':(train_N,test_N),
                      'SqrtN':(train_SqrtN,test_SqrtN),
                      'IPN':(train_IPN,test_IPN)                                   
                      }

scoring_keys = traintest_matrices.keys()

## Initializing and training the matrix factorization models

In [9]:
# The hyperparameters were found through gridsearch

ALS_params = {'click': {'factors': 5, 'regularization': 0.001, 'iterations': 5}, 
              'binary': {'factors': 5, 'regularization': 0.001, 'iterations': 5},
              'CTR': {'factors': 5, 'regularization': 0.001, 'iterations': 5}, 
              'N': {'factors': 5, 'regularization': 0.01, 'iterations': 5},
              'SqrtN': {'factors': 5, 'regularization': 0.1, 'iterations': 5}, 
              'IPN': {'factors': 5, 'regularization': 0.1, 'iterations': 10}}

LMF_params = {'click': {'factors': 60, 'regularization': 6.0, 'iterations': 60, 'learning_rate': 1.0}, 
              'binary': {'factors': 40, 'regularization': 6.0, 'iterations': 50, 'learning_rate': 1.0},
              'CTR': {'factors': 60, 'regularization': 0.6, 'iterations': 100, 'learning_rate': 1.0}, 
              'N': {'factors': 5, 'regularization': 0.6, 'iterations': 80, 'learning_rate': 1.0}, 
              'SqrtN': {'factors': 5, 'regularization': 0.6, 'iterations': 80, 'learning_rate': 1.0}, 
              'IPN': {'factors': 5, 'regularization': 6.0, 'iterations': 80, 'learning_rate': 1.0}}

# Best hyperparams for BPR were the default params of the model

In [10]:
lmf_models = {}
als_models = {}
bpr_models = {}
for i in scoring_keys:
    als_models[i] = implicit.als.AlternatingLeastSquares(factors=ALS_params[i]['factors'], regularization=ALS_params[i]['regularization'], iterations=ALS_params[i]['iterations'],random_state=42)
    als_models[i].fit(traintest_matrices[i][0])

    lmf_models[i] = implicit.lmf.LogisticMatrixFactorization(factors=LMF_params[i]['factors'], regularization=LMF_params[i]['regularization'], iterations=LMF_params[i]['iterations'], learning_rate=LMF_params[i]['learning_rate'],random_state=42)
    lmf_models[i].fit(traintest_matrices[i][0])

bpr_models['binary'] = implicit.bpr.BayesianPersonalizedRanking(random_state=42)
bpr_models['binary'].fit(traintest_matrices['binary'][0])

100%|██████████| 5/5 [00:00<00:00, 24.42it/s]
100%|██████████| 60/60 [00:08<00:00,  6.95it/s]
100%|██████████| 5/5 [00:00<00:00, 25.12it/s]
100%|██████████| 50/50 [00:03<00:00, 12.62it/s]
100%|██████████| 5/5 [00:00<00:00, 26.51it/s]
100%|██████████| 100/100 [00:14<00:00,  7.03it/s]
100%|██████████| 5/5 [00:00<00:00, 26.42it/s]
100%|██████████| 80/80 [00:01<00:00, 44.59it/s]
100%|██████████| 5/5 [00:00<00:00, 25.96it/s]
100%|██████████| 80/80 [00:01<00:00, 44.27it/s]
100%|██████████| 10/10 [00:00<00:00, 26.34it/s]
100%|██████████| 80/80 [00:01<00:00, 49.14it/s]
100%|██████████| 100/100 [00:07<00:00, 14.15it/s, train_auc=88.52%, skipped=34.30%]


## Evaluation

In [11]:
# Making list of the unique users in the data set, and a list of the items they clicked from the test set

pos_train = train_df[train_df['click'] > 0]
pos_test = test_df.copy()

unique_users = df['user_id'].unique()
user_test_clicks = []

for user in unique_users:
    clicks = pos_test[pos_test["user_id"] == user]["item_id"].tolist()
    user_test_clicks.append(clicks)

In [12]:
'''
Both functions below return a list of lists containing 
each unique users N number of recommendations from the given model
'''

def model_recommendations(model, train, N=10, filter=True):
    user_recommendations = []
    for user in unique_users:
        if type(train) is csr_matrix:
            recs,scores = model.recommend(user, train[user], N=N, filter_already_liked_items=filter)
        else:
            recs,scores = model.recommend(user, N=N)
        user_recommendations.append(recs)
    
    return user_recommendations


In [13]:
"""
This function calculates and prints the performance metrics (Precision, Recall,MAP, AUC, NDCG) from the given model trained on each scoring approach.

Parameters:
- model_name (str): The name of the model.
- model_dict (dict): A dictionary containing the different preference score names as keys and the model that is trained on these scores as values.

Returns:
None
"""

def model_performance_metrics(model_name,model_dict, k=10):
    r = 4 # Rounding to 4 decimal places
    print(model_name,f'\n{"-"*75}\n     {"".ljust(9)}| {"Precision".ljust(10)}| {"Recall".ljust(10)}| {"MAP".ljust(10)}| {"AUC".ljust(10)}| {"NDCG".ljust(10)} \n{"-"*75}')

    for key in model_dict:
        recs = model_recommendations(model_dict[key], traintest_matrices[key][0], N=k)
        traintest = traintest_matrices[key]
        resDict = implicit.evaluation.ranking_metrics_at_k(model_dict[key],traintest[0], traintest[1], show_progress=False, K=k)
        p_,r_ = eval.precision_recall(user_test_clicks, recs)
        mean_avg_p = eval.mean_average_precision(user_test_clicks, recs)
        ndcg = eval.ndcg(user_test_clicks, recs)

        print(f'    {key.ljust(10)}| {str(round(p_,r)).ljust(10)}| {str(round(r_,r)).ljust(10)}| {str(round(mean_avg_p,r)).ljust(10)}| {str(round(resDict["auc"],r)).ljust(10)}| {str(round(ndcg,r)).ljust(10)}')
    print()

'''
Prints the scores across all performance metrics for the baseline recommenders
'''
def baseline_performance_metrics(model_dict, k=10):
    r = 4
    print('Baselines',f'\n{"-"*75}\n     {"".ljust(9)}| {"Precision".ljust(10)}| {"Recall".ljust(10)}| {"MAP".ljust(10)}| {"AUC".ljust(10)}| {"NDCG".ljust(10)} \n{"-"*75}')

    for key in model_dict:
        recs = model_recommendations(model_dict[key], train_df, N=k)
        p_,r_ = eval.precision_recall(user_test_clicks, recs)
        mean_avg_p = eval.mean_average_precision(user_test_clicks, recs)
        ndcg = eval.ndcg(user_test_clicks, recs)

        # Using only my performance metrics
        print(f'    {key.ljust(10)}| {str(round(p_,r)).ljust(10)}| {str(round(r_,r)).ljust(10)}| {str(round(mean_avg_p,r)).ljust(10)}| {"-".ljust(10)}| {str(round(ndcg,r)).ljust(10)}')
    print()

In [14]:
model_performance_metrics('ALS Evaluation', als_models, k=5)

ALS Evaluation 
---------------------------------------------------------------------------
              | Precision | Recall    | MAP       | AUC       | NDCG       
---------------------------------------------------------------------------
    click     | 0.0742    | 0.2625    | 0.1573    | 0.6256    | 0.1911    
    binary    | 0.074     | 0.2635    | 0.155     | 0.6261    | 0.189     
    CTR       | 0.0548    | 0.1915    | 0.1129    | 0.5899    | 0.1411    
    N         | 0.0693    | 0.2415    | 0.1445    | 0.6151    | 0.1793    
    SqrtN     | 0.0695    | 0.2437    | 0.1457    | 0.6161    | 0.179     
    IPN       | 0.0771    | 0.2688    | 0.171     | 0.6289    | 0.2065    



In [15]:
model_performance_metrics('LMF Evaluation', lmf_models,k=5)

LMF Evaluation 
---------------------------------------------------------------------------
              | Precision | Recall    | MAP       | AUC       | NDCG       
---------------------------------------------------------------------------
    click     | 0.0722    | 0.2316    | 0.1372    | 0.6101    | 0.1868    
    binary    | 0.0679    | 0.2156    | 0.1235    | 0.602     | 0.1734    
    CTR       | 0.0634    | 0.2058    | 0.1129    | 0.597     | 0.1573    
    N         | 0.0738    | 0.2411    | 0.1484    | 0.6149    | 0.1956    
    SqrtN     | 0.0728    | 0.2423    | 0.1549    | 0.6155    | 0.1964    
    IPN       | 0.0527    | 0.1763    | 0.0985    | 0.5821    | 0.1341    



In [16]:
model_performance_metrics('BPR Evaluation', bpr_models, k=5)

BPR Evaluation 
---------------------------------------------------------------------------
              | Precision | Recall    | MAP       | AUC       | NDCG       
---------------------------------------------------------------------------
    binary    | 0.0524    | 0.1997    | 0.1033    | 0.5938    | 0.1267    



#### Baseline recommenders

In [17]:
pop_recommender.fit(train_df)
rand_recommender.fit(train_df)
baseline_dict = {'Popularity':pop_recommender,'Random':rand_recommender}

In [18]:
baseline_performance_metrics(baseline_dict, k=5)

Baselines 
---------------------------------------------------------------------------
              | Precision | Recall    | MAP       | AUC       | NDCG       
---------------------------------------------------------------------------
    Popularity| 0.0703    | 0.226     | 0.1421    | -         | 0.1887    
    Random    | 0.0065    | 0.0202    | 0.0093    | -         | 0.0147    

