In [3]:
import numpy as np
import pandas as pd
from surprise import Reader, Dataset, accuracy
from tqdm import tqdm
from surprise.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
import pickle
from surprise import NMF
from collections import defaultdict
from math import log2
tqdm.pandas()
interactions = pd.read_pickle('Pickle/interactions.pkl')
interactions = interactions[['user_id', 'book_id', 'rating', 'is_read']]  
train_df, test_df = train_test_split(interactions, test_size=0.2, random_state=42)

# Calculate the global mean rating
global_mean = train_df['rating'].mean()

# Calculate user bias with regularization
lambda_reg = 10
user_sum_ratings = train_df.groupby('user_id')['rating'].sum()
user_count_ratings = train_df.groupby('user_id')['rating'].count()
user_bias = (user_sum_ratings - user_count_ratings * global_mean) / (user_count_ratings + lambda_reg)

# Map user bias back to the original dataframe
train_df['user_bias'] = train_df['user_id'].map(user_bias)

# Calculate item bias with regularization
item_sum_ratings = train_df.groupby('book_id')['rating'].sum()
item_count_ratings = train_df.groupby('book_id')['rating'].count()
item_bias = (item_sum_ratings - item_count_ratings * global_mean) / (item_count_ratings + lambda_reg)

# Map item bias back to the original dataframe
train_df['item_bias'] = train_df['book_id'].map(item_bias)

# Normalize ratings
train_df['normalised_rating'] = train_df['rating'] - train_df['user_bias'] - train_df['item_bias']

# Convert standardized train_df to surprise dataset
reader = Reader(rating_scale=(train_df['rating'].min(), train_df['rating'].max()))
train_data = Dataset.load_from_df(train_df[['user_id', 'book_id', 'normalised_rating']], reader)

# Convert test_df to surprise dataset without normalization
test_data = Dataset.load_from_df(test_df[['user_id', 'book_id', 'rating']], reader)

# Build full trainset and testset
trainset = train_data.build_full_trainset()
testset = test_data.construct_testset([(uid, iid, r, {}) for uid, iid, r in test_df[['user_id', 'book_id', 'rating']].values])

# Define a parameter grid
param_grid = { 
    'n_factors': [60], 
    'n_epochs': [50], 
    'random_state': [42]
}

gs = GridSearchCV(NMF, param_grid, measures=['rmse'], cv=2)
gs.fit(train_data)

best_params = gs.best_params['rmse']
best_params
best_nmf = NMF(**best_params)
best_nmf.fit(train_data.build_full_trainset())
predictions = best_nmf.test(testset)

# Reverse bias terms
def reverse_bias_terms(uid, iid, est, user_bias, item_bias, global_mean):
    user_b = user_bias.get(uid, 0)  # Default to 0 if the user/item is not in the training data
    item_b = item_bias.get(iid, 0)
    unbiased_prediction = est - user_b - item_b + global_mean
    return unbiased_prediction

# Rescale predictions by reversing bias terms
def unbiased_predictions(predictions, user_bias, item_bias, global_mean):
    adjusted_predictions = []
    for uid, iid, true_r, est, _ in predictions:
        # Calculate the unbiased prediction
        unbiased_prediction = reverse_bias_terms(uid, iid, est, user_bias, item_bias, global_mean)
        # Clip the rating to the original scale (e.g., 1 to 5)
        unbiased_prediction = min(5, max(1, unbiased_prediction))
        adjusted_predictions.append((uid, iid, true_r, unbiased_prediction, _))
    return adjusted_predictions

# Rescale predictions
adjusted_predictions = unbiased_predictions(predictions, user_bias.to_dict(), item_bias.to_dict(), train_df['rating'].mean())

def precision_recall_ndcg_at_k(predictions, k, threshold):
    """Return precision, recall, and nDCG at k metrics for each user."""
    
    # Helper function to calculate DCG and nDCG
    def dcg_at_k(scores, k):
        return sum([rel / log2(idx + 2) for idx, rel in enumerate(scores[:k])])

    user_est_true = defaultdict(list)
    for uid, _, true_r, est, _ in predictions:
        user_est_true[uid].append((est, true_r))
    
    precisions = dict()
    recalls = dict()
    ndcgs = dict()
    
    for uid, user_ratings in user_est_true.items():
        user_ratings.sort(key=lambda x: x[0], reverse=True)
        
        n_rel = sum((true_r >= threshold) for (_, true_r) in user_ratings)
        n_rec_k = sum((est >= threshold) for (est, _) in user_ratings[:k])
        n_rel_and_rec_k = sum(((true_r >= threshold) and (est >= threshold)) for (est, true_r) in user_ratings[:k])
        
        # Precision@K
        precisions[uid] = n_rel_and_rec_k / n_rec_k if n_rec_k != 0 else 1
        
        # Recall@K
        recalls[uid] = n_rel_and_rec_k / n_rel if n_rel != 0 else 1
        
        # nDCG@K
        actual = [true_r for (_, true_r) in user_ratings]
        ideal = sorted(actual, reverse=True)
        
        idcg = dcg_at_k(ideal, k)
        dcg = dcg_at_k([rel for (est, rel) in user_ratings], k)
        
        ndcgs[uid] = dcg / idcg if idcg > 0 else 0
    
    precision = sum(prec for prec in precisions.values()) / len(precisions)
    recall = sum(rec for rec in recalls.values()) / len(recalls)
    ndcg = sum(ndcg for ndcg in ndcgs.values()) / len(ndcgs)
    
    return precision, recall, ndcg

precision, recall, ndcg = precision_recall_ndcg_at_k(adjusted_predictions, k=10, threshold=2)
print(f'Adjusted Precision: {precision}, Adjusted Recall: {recall}, Adjusted nDCG: {ndcg}')


Adjusted Precision: 0.8964500105172951, Adjusted Recall: 0.38449322037201994, Adjusted nDCG: 0.7555264203937998
