# 1. Setup

In [None]:
%%capture
%pip install pandas
%pip install numpy
%pip install tqdm
%pip install scikit-learn
%pip install scipy
%pip install warnings

In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from __future__ import print_function
from sklearn.metrics.pairwise import cosine_similarity
from scipy import sparse 
from sklearn.linear_model import Ridge
from sklearn import linear_model
import random

import multiprocessing as mp
from functools import partial

import warnings
warnings.filterwarnings('ignore')

In [None]:
def split_no_cold_start(ratings_df, test_size=0.2, min_ratings_per_user=5, min_ratings_per_item=5):
    df = ratings_df.copy()
    
    user_counts = df['user_id'].value_counts()
    item_counts = df['item_id'].value_counts()
    
    valid_users = user_counts[user_counts >= min_ratings_per_user].index
    valid_items = item_counts[item_counts >= min_ratings_per_item].index
    
    df_filtered = df[df['user_id'].isin(valid_users) & df['item_id'].isin(valid_items)]
    
    train_data = []
    test_data = []
    
    # For each user, ensure they appear in both train and test
    for user_id in tqdm(df_filtered['user_id'].unique(), desc="Processing users"):
        user_ratings = df_filtered[df_filtered['user_id'] == user_id]
        
        if len(user_ratings) >= 2:
            # Split user's ratings
            user_train, user_test = train_test_split(
                user_ratings, 
                test_size=test_size, 
                random_state=42
            )
            train_data.append(user_train)
            test_data.append(user_test)
    
    train_df = pd.concat(train_data, ignore_index=True)
    test_df = pd.concat(test_data, ignore_index=True)
    
    return train_df, test_df

def calculate_rmse(model, test_data):
    n_tests = test_data.shape[0]
    SE = 0
    
    for n in tqdm(range(n_tests), desc="Calculating RMSE"):
        pred = model.pred(test_data[n, 0], test_data[n, 1], normalized=0)
        SE += (pred - test_data[n, 2])**2 

    return np.sqrt(SE/n_tests)

In [None]:
# ratings = pd.read_csv("/kaggle/input/movie-lens-1m/ratings.csv", encoding='latin-1')
ratings = pd.read_csv("/kaggle/input/movie-lens-1m/amazon_ratings.csv", encoding="latin-1")

print(f"Dataset shape: {ratings.shape}")
print(f"Unique users: {ratings['user_id'].nunique()}")
print(f"Unique movies: {ratings['item_id'].nunique()}")

In [None]:
rate_train, rate_test = split_no_cold_start(ratings, test_size=0.1)

print(f"Train set shape: {rate_train.shape}")
print(f"Test set shape: {rate_test.shape}")
print(f"Train users: {rate_train['user_id'].nunique()}")
print(f"Test users: {rate_test['user_id'].nunique()}")
print(f"Train movies: {rate_train['item_id'].nunique()}")
print(f"Test movies: {rate_test['item_id'].nunique()}")

# Checking data integrity
assert rate_train['user_id'].nunique() == rate_test['user_id'].nunique(), "Different number of users in train/test"
assert len(set(rate_train['user_id'].unique()) - set(rate_test['user_id'].unique())) == 0, "Cold start users detected"
assert len(set(rate_test['item_id'].unique()) - set(rate_train['item_id'].unique())) == 0, "Cold start movies detected"
assert rate_train.shape[0] + rate_test.shape[0] <= ratings.shape[0], "Data leakage detected"
assert rate_train['rating'].isnull().sum() == 0, "Null ratings in train set"
assert rate_test['rating'].isnull().sum() == 0, "Null ratings in test set"

print("All integrity checks passed!")

In [None]:
rate_train = rate_train[['user_id', 'item_id', 'rating']].values
rate_test = rate_test[['user_id', 'item_id', 'rating']].values

# rate_train[:, :2] -= 1
# rate_test[:, :2] -= 1

# 3. User-User Collaborative Filtering

In [None]:
# class CF(object):
#     def __init__(self, Y_data, k, dist_func=cosine_similarity, uuCF=1):
#         self.uuCF = uuCF
#         self.Y_data = Y_data if uuCF else Y_data[:, [1, 0, 2]]
#         self.k = k
#         self.dist_func = dist_func
#         self.Ybar_data = None
#         self.n_users = int(np.max(self.Y_data[:, 0])) + 1 
#         self.n_items = int(np.max(self.Y_data[:, 1])) + 1
    
#     def add(self, new_data):
#         self.Y_data = np.concatenate((self.Y_data, new_data), axis=0)

#     def normalize_Y(self):
#         users = self.Y_data[:, 0]
#         self.Ybar_data = self.Y_data.copy()
#         self.mu = np.zeros((self.n_users,))
        
#         for n in tqdm(range(self.n_users), desc="Normalizing users"):
#             ids = np.where(users == n)[0].astype(np.int32)
#             ratings = self.Y_data[ids, 2]
#             m = np.mean(ratings) 
#             if np.isnan(m):
#                 m = 0
#             self.mu[n] = m
#             self.Ybar_data[ids, 2] = ratings - self.mu[n]

#         self.Ybar = sparse.coo_matrix((self.Ybar_data[:, 2],
#             (self.Ybar_data[:, 1], self.Ybar_data[:, 0])), (self.n_items, self.n_users))
#         self.Ybar = self.Ybar.tocsr()

#     def similarity(self):
#         eps = 1e-6
#         print("Computing similarity matrix...")
#         self.S = self.dist_func(self.Ybar.T, self.Ybar.T) + eps
        
#     def refresh(self):
#         self.normalize_Y()
#         self.similarity() 
        
#     def fit(self):
#         self.refresh()

#     def __pred(self, u, i, normalized=1):
#         ids = np.where(self.Y_data[:, 1] == i)[0].astype(np.int32)
#         users_rated_i = (self.Y_data[ids, 0]).astype(np.int32)
#         sim = self.S[u, users_rated_i]
#         a = np.argsort(sim)[-self.k:] 
#         nearest_s = sim[a]
#         r = self.Ybar[i, users_rated_i[a]]
        
#         if normalized:
#             return (r*nearest_s)[0]/(np.abs(nearest_s).sum() + 1e-8)
#         return (r*nearest_s)[0]/(np.abs(nearest_s).sum() + 1e-8) + self.mu[u]
    
#     def pred(self, u, i, normalized=1):
#         if self.uuCF: 
#             return self.__pred(u, i, normalized)
#         return self.__pred(i, u, normalized)
    
#     def recommend(self, u):
#         ids = np.where(self.Y_data[:, 0] == u)[0]
#         items_rated_by_u = self.Y_data[ids, 1].tolist()              
#         recommended_items = []
        
#         for i in tqdm(range(self.n_items), desc=f"Recommending for user {u}", leave=False):
#             if i not in items_rated_by_u:
#                 rating = self.__pred(u, i)
#                 if rating > 0: 
#                     recommended_items.append(i)
        
#         return recommended_items

In [None]:
# rs = CF(rate_train, k = 30, uuCF = 1)
# rs.fit()

In [None]:
# train_rmse = calculate_rmse(rs, rate_train)
# print(f'User-user CF, Train RMSE = {train_rmse:.4f}')

# test_rmse = calculate_rmse(rs, rate_test)
# print(f'User-user CF, Test RMSE = {test_rmse:.4f}')

# 2. Item-Item Collaborative Filtering

In [None]:
# rs = CF(rate_train, k = 30, uuCF = 0)
# rs.fit()

In [None]:
# train_rmse = calculate_rmse(rs, rate_train)
# print(f'User-user CF, Train RMSE = {train_rmse:.4f}')

# test_rmse = calculate_rmse(rs, rate_test)
# print(f'User-user CF, Test RMSE = {test_rmse:.4f}')

# 3. Matrix Factorization

In [None]:
class MatrixFactorization(object):
    def __init__(self, Y, K, lam=0.1, Xinit=None, Winit=None,
                 learning_rate=0.5, max_iter=1000, print_every=100):
        self.Y = Y # this is the utility matrix
        self.K = K
        self.lam = lam
        self.learning_rate = learning_rate
        self.max_iter = max_iter
        self.print_every = print_every
        self.n_users = int(np.max(Y[:, 0])) + 1
        self.n_items = int(np.max(Y[:, 1])) + 1
        self.n_ratings = Y.shape[0]
        self.X = np.random.randn(self.n_items, K) if Xinit is None else Xinit
        self.W = np.random.randn(K, self.n_users) if Winit is None else Winit
        self.b = np.random.randn(self.n_items) # item biases
        self.d = np.random.randn(self.n_users)
        
    def loss(self):
        L = 0
        for i in range(self.n_ratings):
            # user_id, item_id, rating
            n, m, rating = int(self.Y[i, 0]), int(self.Y[i, 1]), self.Y[i, 2]
            L += 0.5 * (self.X[m].dot(self.W[:, n]) + self.b[m] + self.d[n] - rating) ** 2  
        L /= self.n_ratings # number of ratings
        L_total = L + 0.5 * self.lam * (np.sum(self.X**2) + np.sum(self.W**2))
        return L_total
    
    def updateXb(self):
        for m in range(self.n_items):
            # obtain all users who rated item m and get the corresponding ratings
            ids = np.where(self.Y[:, 1] == m)[0] # row indices of items m
            user_ids, ratings = self.Y[ids, 0].astype(np.int32), self.Y[ids, 2]
            Wm, dm = self.W[:, user_ids], self.d[user_ids]
            for i in range(30):
                xm = self.X[m]
                error = xm.dot(Wm) + self.b[m] + dm - ratings
                grad_xm = error.dot(Wm.T) / self.n_ratings + self.lam * xm
                grad_bm = np.sum(error) / self.n_ratings
                self.X[m] -= self.learning_rate * grad_xm.reshape(-1)
                self.b[m] -= self.learning_rate * grad_bm
                
    def updateWd(self):
        for n in range(self.n_users):
            # obtain all items rated by user n and get the corresponding ratings
            ids = np.where(self.Y[:, 0] == n)[0]
            item_ids, ratings = self.Y[ids, 1].astype(np.int32), self.Y[ids, 2]
            Xn, bn = self.X[item_ids], self.b[item_ids]
            for i in range(30):
                wn = self.W[:, n]
                error = Xn.dot(wn) + bn + self.d[n] - ratings
                grad_wn = error.dot(Xn) / self.n_ratings + self.lam * wn
                grad_dn = np.sum(error) / self.n_ratings     
                self.W[:, n] -= self.learning_rate * grad_wn.reshape(-1)
                self.d[n] -= self.learning_rate * grad_dn
                
    def fit(self):
        for it in range(self.max_iter):
            self.updateWd()
            self.updateXb()
            if (it + 1) % self.print_every == 0:
                rmse_train = self.evaluate_RMSE(self.Y)
                print('iter = %d, loss = %.4f, RMSE = %.4f' % (it + 1, self.loss(), rmse_train))
                
    def pred(self, user_id, item_id):
        user_id, item_id = int(user_id), int(item_id)
        pred = self.X[item_id, :].dot(self.W[:, user_id]) + self.b[item_id] + self.d[user_id]
        return max(0, min(pred, 5))
    
    def evaluate_RMSE(self, rate_test):
        n_tests = rate_test.shape[0]
        SE = 0
        for n in range(n_tests):
            pred = self.pred(rate_test[n, 0], rate_test[n, 1])
            SE += (pred - rate_test[n, 2]) ** 2     
        RMSE = np.sqrt(SE / n_tests)
        return RMSE

In [None]:
rs = MatrixFactorization(rate_train, K = 30, lam = .01, print_every=1, learning_rate=50,
max_iter = 10)
rs.fit()

# evaluate on test data
RMSE = rs.evaluate_RMSE(rate_test)
print("\nMatrix Factorization CF, RMSE = %.4f" %RMSE)

# 4. Metrics Comparison

In [None]:
def calculate_metrics_sample_parallel(model, test_data, n_users=100, k_values=[10, 100], random_seed=42, n_processes=None):
    if n_processes is None:
        n_processes = min(mp.cpu_count(), 8)
    
    random.seed(random_seed)
    np.random.seed(random_seed)
    
    unique_users = np.unique(test_data[:, 0])
    
    if len(unique_users) <= n_users:
        sample_users = unique_users
    else:
        sample_users = np.random.choice(unique_users, size=n_users, replace=False)
    
    args_list = [(user_id, model, test_data) for user_id in sample_users]
    
    with mp.Pool(processes=n_processes) as pool:
        results = list(tqdm(
            pool.imap(get_user_recommendations, args_list),
            total=len(args_list),
            desc=f"Processing {n_users} random users"
        ))
    
    user_recommendations = {}
    user_test_items = {}
    
    for user_id, recommended, relevant in results:
        user_recommendations[user_id] = recommended[:max(k_values)]
        user_test_items[user_id] = relevant
    
    return user_recommendations, user_test_items, sample_users


def get_user_recommendations(args):
    user_id, model, test_data = args
    user_test_ratings = test_data[test_data[:, 0] == user_id]
    relevant_items = user_test_ratings[user_test_ratings[:, 2] >= 4.0][:, 1].astype(int)
    
    try:
        recommended_items = model.recommend(int(user_id))
        return user_id, recommended_items, relevant_items
    except:
        return user_id, [], relevant_items

def calculate_user_metrics(args):
    user_id, recommended, relevant, k = args
    
    if len(relevant) == 0:
        return None
    
    recommended_k = recommended[:k]
    relevance_scores = [1 if item in relevant else 0 for item in recommended_k]
    
    ndcg_score = ndcg_at_k(relevance_scores, k)
    hr_score = hit_rate_at_k(recommended, relevant, k)
    precision_score = precision_at_k(recommended, relevant, k)
    
    return ndcg_score, hr_score, precision_score

def dcg_at_k(r, k):
    r = np.asfarray(r)[:k]
    if r.size:
        return np.sum(r / np.log2(np.arange(2, r.size + 2)))
    return 0.

def ndcg_at_k(r, k):
    dcg_max = dcg_at_k(sorted(r, reverse=True), k)
    if not dcg_max:
        return 0.
    return dcg_at_k(r, k) / dcg_max

def hit_rate_at_k(recommended, relevant, k):
    recommended_k = recommended[:k]
    return len(set(recommended_k) & set(relevant)) > 0

def precision_at_k(recommended, relevant, k):
    recommended_k = recommended[:k]
    hits = len(set(recommended_k) & set(relevant))
    return hits / min(k, len(recommended_k)) if recommended_k else 0.0

def personalization_at_k(all_recommendations, k):
    recommendations_k = [rec[:k] for rec in all_recommendations]
    total_pairs = 0
    similar_pairs = 0
    
    for i in range(len(recommendations_k)):
        for j in range(i + 1, len(recommendations_k)):
            total_pairs += 1
            overlap = len(set(recommendations_k[i]) & set(recommendations_k[j]))
            if overlap > 0:
                similar_pairs += 1
    
    return 1 - (similar_pairs / total_pairs) if total_pairs > 0 else 0.0

def evaluate_recommendations_parallel(user_recommendations, user_test_items, k_values=[10, 100], n_processes=None):
    if n_processes is None:
        n_processes = min(mp.cpu_count(), 24)
    
    metrics = {}
    all_recommendations = list(user_recommendations.values())
    
    for k in k_values:
        args_list = [
            (user_id, user_recommendations[user_id], user_test_items[user_id], k)
            for user_id in user_recommendations.keys()
        ]
        
        with mp.Pool(processes=n_processes) as pool:
            results = list(tqdm(
                pool.imap(calculate_user_metrics, args_list),
                total=len(args_list),
                desc=f"Calculating metrics @{k}"
            ))
        
        valid_results = [r for r in results if r is not None]
        
        if valid_results:
            ndcg_scores, hr_scores, precision_scores = zip(*valid_results)
            metrics[f'NDCG@{k}'] = np.mean(ndcg_scores)
            metrics[f'HR@{k}'] = np.mean(hr_scores)
            metrics[f'Precision@{k}'] = np.mean(precision_scores)
        else:
            metrics[f'NDCG@{k}'] = 0.0
            metrics[f'HR@{k}'] = 0.0
            metrics[f'Precision@{k}'] = 0.0
        
        metrics[f'PSP@{k}'] = personalization_at_k(all_recommendations, k)
    
    return metrics

In [None]:
def print_metrics_parallel(model, train_data, test_data, n_users=200):
    user_recommendations, user_test_items, sample_users = calculate_metrics_sample_parallel(
        model, test_data, n_users=n_users
    )
    
    metrics = evaluate_recommendations_parallel(user_recommendations, user_test_items)
    
    print(f"\nMetrics computed on {n_users} random users (parallel):")
    print("="*50)
    print(f"NDCG@10: {metrics['NDCG@10']:.4f}")
    print(f"HR@10: {metrics['HR@10']:.4f}")
    print(f"Precision@10: {metrics['Precision@10']:.4f}")
    print(f"PSP@10: {metrics['PSP@10']:.4f}")
    print(f"NDCG@100: {metrics['NDCG@100']:.4f}")
    print(f"HR@100: {metrics['HR@100']:.4f}")
    print(f"Precision@100: {metrics['Precision@100']:.4f}")
    print("="*50)
    
    return metrics, sample_users

print_metrics_parallel(rs, rate_train, rate_test)

In [None]:
assert callable(calculate_metrics_sample_parallel), "Parallel sample function not callable"
assert callable(evaluate_recommendations_parallel), "Parallel evaluation function not callable"
# assert callable(calculate_rmse_parallel), "Parallel RMSE function not callable"
assert callable(print_metrics_parallel), "Parallel print function not callable"
assert mp.cpu_count() > 0, "No CPU cores detected"
print("All parallel functions defined successfully!")

# MovieLens1M
## User-User Collaborative Filtering
+ Train RMSE = 0.7215
+ Test RMSE = 0.9234934723335452 (Noted)
+ NDCG@10: 0.1533 (Noted)
+ HR@10: 0.2800 (Noted)
+ Precision@10: 0.0149 (Noted)
+ Recall@10: 0.0213

+ PSP@10: 0.0036 (Noted)
+ NDCG@100: 0.1968 (Noted)
+ HR@100: 0.6000 (Noted)
+ Precision@100: 0.0340 (Exclude)

## Item-Item Collaborative Filtering
+ Train RMSE = 0.7097
+ Test RMSE = 0.9010
+ NDCG@10: 0.0661
+ HR@10: 0.1200
+ Precision@10: 0.0140
+ PSP@10: 0.5455
+ NDCG@100: 0.0919
+ HR@100: 0.2600
+ Precision@100: 0.0049

## Matrix Factorization
+ Train RMSE = 0.9069 (30 epochs)
+ Test RMSE = 0.9200
+ NDCG@10: 0.2108
+ HR@10: 0.2901
+ Precision@10: 0.0204
+ Recall@10: 0.0231

+ PSP@10: 0.3012
+ NDCG@100: 0.2324
+ HR@100: 0.4500
+ Precision@100: 0.0405

==============================

# AmazonElectronics 500K

## User-User Collaborative Filtering
+ Train RMSE = 0.5052
+ Test RMSE = 1.3143
+ NDCG@10: 0.0008
+ HR@10: 0.0014
+ Precision@10: 0.0007
+ PSP@10: 0.8934
+ NDCG@100: 0.0009
+ HR@100: 0.0074
+ Precision@100: 0.0009

## Item-Item Collaborative Filtering
+ Train RMSE = 0.5379
+ Test RMSE = 1.3366
+ NDCG@10: 0.0008
+ HR@10: 0.0010
+ Precision@10: 0.0009
+ PSP@10: 0.9873
+ NDCG@100: 0.0013
+ HR@100: 0.0085
+ Precision@100: 0.0011


## Matrix Factorization
+ Train RMSE = 1.2748
+ Test RMSE = 1.3987
+ NDCG@10: 0.0010
+ HR@10: 0.0019
+ Precision@10: 0.0010
+ PSP@10: 0.7494
+ NDCG@100: 0.0018
+ HR@100: 0.0090
+ Precision@100: 0.0018