In [9]:
#!pip install cornac --upgrade
import pandas as pd
import cornac
import os
from cornac.eval_methods import RatioSplit
from cornac.data import Reader, Dataset
from cornac.models import MostPop, MF, PMF, BPR, NeuMF, WMF, HPF, VAECF, NMF, UserKNN,LightGCN
from cornac.models import NMF as CornacNMF 
from cornac.metrics import MAE, MSE, RMSE, Precision, Recall, NDCG, AUC, MAP, FMeasure, MRR

import pickle
from collections import defaultdict
from scipy import stats
from numpy.linalg import norm
import time
# !pip install matplotlib==3.7.3








In [10]:
# Loading dataset
df = pd.read_csv('ThemeAnalysedDataset.csv')[['User-ID', 'ISBN', 'Book-Rating']]

# Rename columns to match Cornac expectation
df.columns = ['user', 'item', 'rating']

# Convert to list of tuples
data = list(zip(df['user'], df['item'], df['rating']))
dataset = cornac.data.Dataset.from_uir(data)
rs = RatioSplit(data=data, test_size=0.2, rating_threshold=1, seed=123)

In [11]:
models = [
    MostPop(),
    UserKNN(k=20, similarity='cosine', mean_centered=False, weighting=None, amplify=1.0, num_threads=0, trainable=True, verbose=True, seed=123),
    BPR(k=10, max_iter=200, learning_rate=0.001, lambda_reg=0.01, seed=123),
    MF(k=30, max_iter=100, learning_rate=0.01, lambda_reg=0.001, seed=123),
    PMF(k=10, max_iter=100, learning_rate=0.001, lambda_reg=0.001, seed=123),
    NMF(k=15, max_iter=50, learning_rate=0.005, lambda_u=0.06, lambda_v=0.06, lambda_bu=0.02, lambda_bi=0.02, use_bias=False, verbose=True, seed=123),
    WMF(k=50, max_iter=50, learning_rate=0.001, lambda_u=0.01, lambda_v=0.01, verbose=True, seed=123),
    HPF(k=50, seed=123, hierarchical=False, name="PF")
  ]


metrics = [
    MAE(), MSE(), RMSE(), AUC(), MAP(), MRR(),
    Precision(k=5), Precision(k=10), Precision(k=20), Precision(k=50),
    Recall(k=5), Recall(k=10), Recall(k=20), Recall(k=50),
    NDCG(k=5), NDCG(k=10), NDCG(k=20), NDCG(k=50),
    FMeasure(k=5), FMeasure(k=10), FMeasure(k=20), FMeasure(k=50)
]

In [12]:
exp = cornac.Experiment(eval_method=rs, models=models, metrics=metrics, user_based=True)
exp.run()

  0%|          | 0/6238 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

Optimization finished!


E0000 00:00:1744828502.492485  266995 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744828502.498280  266995 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1744828502.512485  266995 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1744828502.512506  266995 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1744828502.512508  266995 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1744828502.512510  266995 computation_placer.cc:177] computation placer already registered. Please check linka

  0%|          | 0/50 [00:00<?, ?it/s]

Learning completed!
Learning...
Learning completed!

TEST:
...
        |    MAE |     MSE |   RMSE |    AUC |  F1@10 |  F1@20 |   F1@5 |  F1@50 |    MAP |    MRR | NDCG@10 | NDCG@20 | NDCG@5 | NDCG@50 | Precision@10 | Precision@20 | Precision@5 | Precision@50 | Recall@10 | Recall@20 | Recall@5 | Recall@50 | Train (s) | Test (s)
------- + ------ + ------- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------ + ------- + ------------ + ------------ + ----------- + ------------ + --------- + --------- + -------- + --------- + --------- + --------
MostPop | 2.4986 |  9.9494 | 2.7529 | 0.6657 | 0.0106 | 0.0108 | 0.0091 | 0.0094 | 0.0157 | 0.0334 |  0.0177 |  0.0241 | 0.0131 |  0.0352 |       0.0075 |       0.0067 |      0.0079 |       0.0052 |    0.0275 |    0.0471 |   0.0152 |    0.0898 |    0.0027 |  10.9621
UserKNN | 1.3788 |  3.3186 | 1.5223 | 0.4026 | 0.0010 | 0.0011 | 0.0008 | 0.0011 | 0.0023 | 0.0047 |  0.0013 |  0.0021 | 0.0009 |  0.003

In [13]:
import pandas as pd
from collections import OrderedDict
from IPython.display import display

# Initialize list to store all model results
all_results = []

# Process each model's results
for model_result in exp.result:
    # Create a dictionary for the current model
    model_data = OrderedDict()
    model_data['Model'] = model_result.model_name
    
    # Add all metrics from metric_avg_results
    for metric_name, metric_value in model_result.metric_avg_results.items():
        model_data[metric_name] = metric_value
    
    all_results.append(model_data)

# Create DataFrame with consistent column order
metrics_order = [
    'MAE', 'MSE', 'RMSE', 'AUC',
    'F1@5', 'F1@10', 'F1@20', 'F1@50',
    'Precision@5', 'Precision@10', 'Precision@20', 'Precision@50',
    'Recall@5', 'Recall@10', 'Recall@20', 'Recall@50',
    'MAP', 'MRR', 
    'NDCG@5', 'NDCG@10', 'NDCG@20', 'NDCG@50',
    'Train (s)', 'Test (s)'
]

# Convert to DataFrame and reorder columns
results_df = pd.DataFrame(all_results)
results_df = results_df[['Model'] + metrics_order]

# Custom formatting for display
def format_metric(x):
    if isinstance(x, (int, float)):
        return f"{x:.4f}" if abs(x) < 100 else f"{x:.1f}"
    return x

styled_df = (results_df.style
             .format(format_metric)
             .background_gradient(subset=metrics_order, cmap='YlGnBu')
             .set_properties(**{'text-align': 'center'})
             .set_table_styles([{
                 'selector': 'th',
                 'props': [('background-color', '#40466e'), 
                           ('color', 'white'),
                           ('font-weight', 'bold')]
             }]))

display(styled_df)

Unnamed: 0,Model,MAE,MSE,RMSE,AUC,F1@5,F1@10,F1@20,F1@50,Precision@5,Precision@10,Precision@20,Precision@50,Recall@5,Recall@10,Recall@20,Recall@50,MAP,MRR,NDCG@5,NDCG@10,NDCG@20,NDCG@50,Train (s),Test (s)
0,MostPop,2.4986,9.9494,2.7529,0.6657,0.0091,0.0106,0.0108,0.0094,0.0079,0.0075,0.0067,0.0052,0.0152,0.0275,0.0471,0.0898,0.0157,0.0334,0.0131,0.0177,0.0241,0.0352,0.0027,10.9621
1,UserKNN,1.3788,3.3186,1.5223,0.4026,0.0008,0.001,0.0011,0.0011,0.0007,0.0007,0.0007,0.0006,0.0011,0.0023,0.0049,0.0115,0.0023,0.0047,0.0009,0.0013,0.0021,0.0037,1.5019,57.7723
2,BPR,6.5702,46.5363,6.6539,0.6875,0.0093,0.0103,0.0107,0.0094,0.0081,0.0073,0.0066,0.0052,0.0157,0.0269,0.0467,0.0898,0.0159,0.0334,0.0134,0.0175,0.0239,0.0352,1.3634,93.3799
3,MF,1.3155,2.9459,1.4499,0.5074,0.001,0.0011,0.0014,0.0015,0.0009,0.0009,0.0009,0.0008,0.0014,0.0022,0.0052,0.0124,0.0024,0.0056,0.0012,0.0016,0.0026,0.0045,0.2536,93.4412
4,PMF,1.4502,3.326,1.576,0.6044,0.0027,0.0031,0.0033,0.0037,0.0024,0.0022,0.0021,0.002,0.0044,0.0079,0.0141,0.0341,0.0057,0.0113,0.0035,0.0048,0.0069,0.0121,1.4414,16.6697
5,NMF,2.2385,6.7967,2.3741,0.5187,0.0007,0.0007,0.0008,0.0009,0.0007,0.0005,0.0005,0.0005,0.0008,0.0013,0.0031,0.0082,0.002,0.0044,0.0009,0.001,0.0016,0.0029,0.2765,108.8
6,WMF,5.758,37.3594,5.8985,0.7234,0.0205,0.0181,0.0154,0.012,0.0177,0.0127,0.0095,0.0066,0.0331,0.0461,0.0665,0.1121,0.0299,0.0593,0.0313,0.0356,0.0423,0.0543,24.1848,15.3983
7,PF,6.7098,48.3486,6.7918,0.6627,0.011,0.0107,0.0097,0.0075,0.0094,0.0075,0.0059,0.0041,0.0181,0.0279,0.042,0.0703,0.0152,0.0317,0.0151,0.0186,0.0233,0.0308,69.683,17.9158


In [14]:
results_df.to_csv('basic_rec_metrics.csv', index=False)

In [15]:
# Create directory for saved models
os.makedirs("model", exist_ok=True)

# Save each model
for model in exp.models:
    model_name = model.name if hasattr(model, 'name') else model.__class__.__name__
    
    # Method 1: Using Cornac's native save (if available)
    if hasattr(model, 'save'):
        model.save(f"model/{model_name}")
        print(f"Saved {model_name} using Cornac's native save()")
    
    

Saved MostPop using Cornac's native save()
UserKNN model is saved to model/UserKNN/UserKNN/2025-04-16_20-37-09-544247.pkl
Saved UserKNN using Cornac's native save()
Saved BPR using Cornac's native save()
Saved MF using Cornac's native save()
Saved PMF using Cornac's native save()
NMF model is saved to model/NMF/NMF/2025-04-16_20-37-09-642383.pkl
Saved NMF using Cornac's native save()
WMF model is saved to model/WMF/WMF/2025-04-16_20-37-09-658560.pkl
Saved WMF using Cornac's native save()
Saved PF using Cornac's native save()


In [16]:
# import numpy as np

# # Get all unique user IDs
# user_ids = model.user_ids  

# # Recommend top-N items for each user
# top_n = 10 
# recommendations = {}

# for user_id in user_ids:
   
#     recommended = model.recommend(
#         user_id, 
#         k=top_n,
        
#     )
#     recommendations[user_id] = recommended

# # Example: Print top-3 for the first user
# print("Top recommendations for user", user_ids[0], ":", recommendations[user_ids[0]][:10])

In [17]:
#okay so buil-in method works just fine!

In [18]:
#NEXT: GET TOP N, SAVE THEM, THEN WE WILL ANALYSE THEM!!!!1 MANEL'S DIVERSITY ACCURACY TRADEOFF PAPER

In [19]:
def get_top_n(algo_name, n=10):
    top_n = defaultdict(list)
    
    for model in exp.models:
        if model.name == algo_name:
            print(f"{model.name} model is selected:")
            for uid in model.train_set.uid_map.values():
                user_id = list(model.train_set.user_ids)[uid]
                try:
                    item_rank = model.rank(user_idx=uid)[0]  # model.rank: item rank, item_score
                except:
                    item_rank = model.rank(user_idx=int(uid))[0]
                
                # Collect top N items
                item_rank_top = item_rank[:n]
                for iid in item_rank_top:
                    item_id = list(model.train_set.item_ids)[iid]
                    
                    # Check if item_id is an integer (it might be an ISBN as string)
                    try:
                        item_id = str(item_id)  # Treat ISBNs or other IDs as strings
                        top_n[int(user_id)].append((item_id, model.score(uid, iid)))  # Use item_id as string
                    except ValueError:
                        # Handle case where item_id cannot be converted to int
                        top_n[int(user_id)].append((item_id, model.score(uid, iid)))
    
    return top_n

In [20]:
# Generate and save recommendations for each model
model_names = ["MostPop", "UserKNN", "BPR", "MF", "PMF","NMF", "WMF", "HPF"]     #Change HPF TO PF

for name in model_names:
    recommendations = get_top_n(name, n=10)
    print('Recommendations generated for:', name)
    # Save to CSV
    data = []
    for user_id, items in recommendations.items():
        for item_id, score in items:
            data.append([user_id, item_id, float(score)])
    
    df = pd.DataFrame(data, columns=["user_id", "item_id", "score"])
    df.to_csv(f"recommendations_{name}.csv", index=False)
    print(f"Saved recommendations for {name} to recommendations_{name}.csv")

MostPop model is selected:
Recommendations generated for: MostPop
Saved recommendations for MostPop to recommendations_MostPop.csv
UserKNN model is selected:
Recommendations generated for: UserKNN
Saved recommendations for UserKNN to recommendations_UserKNN.csv
BPR model is selected:
Recommendations generated for: BPR
Saved recommendations for BPR to recommendations_BPR.csv
MF model is selected:
Recommendations generated for: MF
Saved recommendations for MF to recommendations_MF.csv
PMF model is selected:
Recommendations generated for: PMF
Saved recommendations for PMF to recommendations_PMF.csv
NMF model is selected:
Recommendations generated for: NMF
Saved recommendations for NMF to recommendations_NMF.csv
WMF model is selected:
Recommendations generated for: WMF
Saved recommendations for WMF to recommendations_WMF.csv
Recommendations generated for: HPF
Saved recommendations for HPF to recommendations_HPF.csv


In [30]:
pd.read_csv('recommendations_HPF.csv')

Unnamed: 0,user_id,item_id,score


In [42]:
#hpf did not generate so we will try again because it is actually PF in model.name instead of HPF
n=get_top_n('PF', n=10)

PF model is selected:


In [48]:
data=[]
for user_id, items in n.items():
    for item_id, score in items:
        data.append([user_id, item_id, float(score)])
    
df = pd.DataFrame(data, columns=["user_id", "item_id", "score"])
# df.to_csv("recommendations_HPF.csv", index=False)
print(f"Saved recommendations to recommendations_HPF.csv")

Saved recommendations to recommendations_HPF.csv


In [50]:
pd.read_csv('recommendations_HPF.csv')

Unnamed: 0,user_id,item_id,score
0,193412,0440222656,0.842066
1,193412,0671021001,0.720964
2,193412,0156027321,0.720323
3,193412,0345370775,0.641132
4,193412,067976402X,0.576193
...,...,...,...
62375,60424,0552998001,0.177778
62376,60424,0684862719,0.175020
62377,60424,0590112899,0.132864
62378,60424,0140620222,0.128308


In [89]:
import random as rd

def get_top_n_random(n=10):
    print("Random model is selected:")
    top_n = defaultdict(list)

    # Get list of internal item IDs
    all_iids = list(rs.train_set.iid_map.values())

    for uid in rs.train_set.uid_map.values():
        user_id = list(rs.train_set.user_ids)[uid]

        # Get n unique random items
        random_items = rd.sample(all_iids, n)

        for iid in random_items:
            item_id = list(rs.train_set.item_ids)[iid]
            top_n[user_id].append((item_id, 1.0))

    return top_n
n=get_top_n_random(n=10)

Random model is selected:


In [91]:
data=[]
for user_id, items in n.items():
    for item_id, score in items:
        data.append([user_id, item_id, float(score)])
    
df = pd.DataFrame(data, columns=["user_id", "item_id", "score"])
# df.to_csv("recommendations_random.csv", index=False)
print(f"Saved recommendations to recommendations_random.csv")

Saved recommendations to recommendations_random.csv
