In [2]:
#!pip install cornac --upgrade
#!pip install tensorflow
#pip install --upgrade torch torchvision torchaudio
import pandas as pd
import cornac
from collections import OrderedDict
from IPython.display import display

# !pip install torch

import torch 

from cornac.eval_methods import RatioSplit
from cornac.data import Reader, Dataset
from cornac.models import MostPop, MF, PMF, BPR, NeuMF, WMF, HPF, VAECF, NMF, UserKNN,LightGCN
from cornac.models import NMF as CornacNMF 
from cornac.metrics import MAE, MSE, RMSE, Precision, Recall, NDCG, AUC, MAP, FMeasure, MRR


from collections import defaultdict
from scipy import stats
from numpy.linalg import norm

# !pip install matplotlib==3.7.3


In [3]:
# Loading dataset
df = pd.read_csv('ThemeAnalysedDataset.csv')[['User-ID', 'ISBN', 'Book-Rating']]

# Rename columns to match Cornac expectation
df.columns = ['user', 'item', 'rating']

# Convert to list of tuples
data = list(zip(df['user'], df['item'], df['rating']))
dataset = cornac.data.Dataset.from_uir(data)
rs = RatioSplit(data=data, test_size=0.2, rating_threshold=1, seed=123)

In [4]:
metrics = [
    MAE(), MSE(), RMSE(), AUC(), MAP(), MRR(),
    Precision(k=5), Precision(k=10), Precision(k=20), Precision(k=50),
    Recall(k=5), Recall(k=10), Recall(k=20), Recall(k=50),
    NDCG(k=5), NDCG(k=10), NDCG(k=20), NDCG(k=50),
    FMeasure(k=5), FMeasure(k=10), FMeasure(k=20), FMeasure(k=50)
]



In [18]:
more_models = [
    NeuMF(num_factors=8, layers=[32, 16, 8], act_fn="tanh", num_epochs=1, num_neg=3, batch_size=256, lr=0.001, seed=42, verbose=True, backend='pytorch'),
   # VAECF(k=10, autoencoder_structure=[20], act_fn='tanh', likelihood='mult',n_epochs=100, batch_size=100, learning_rate=0.001, beta=1.0, trainable=True, verbose=False, seed=123, use_gpu=True),
   # LightGCN(emb_size=64, num_epochs=1000, learning_rate=0.001, batch_size=1024, num_layers=3, early_stopping=None, lambda_reg=0.0001, trainable=True, verbose=False, seed=123)
]

      

In [19]:
exp = cornac.Experiment(eval_method=rs, models=more_models, metrics=metrics, user_based=True)
exp.run()


  0%|          | 0/1 [00:00<?, ?it/s]


TEST:
...
      |    MAE |     MSE |   RMSE |    AUC |  F1@10 |  F1@20 |   F1@5 |  F1@50 |    MAP |    MRR | NDCG@10 | NDCG@20 | NDCG@5 | NDCG@50 | Precision@10 | Precision@20 | Precision@5 | Precision@50 | Recall@10 | Recall@20 | Recall@5 | Recall@50 | Train (s) | Test (s)
----- + ------ + ------- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------ + ------- + ------------ + ------------ + ----------- + ------------ + --------- + --------- + -------- + --------- + --------- + --------
NeuMF | 6.7216 | 48.5020 | 6.8026 | 0.6863 | 0.0102 | 0.0107 | 0.0094 | 0.0093 | 0.0159 | 0.0330 |  0.0174 |  0.0242 | 0.0135 |  0.0352 |       0.0073 |       0.0066 |      0.0082 |       0.0051 |    0.0261 |    0.0476 |   0.0156 |    0.0900 |    3.1943 |  63.1469



In [20]:
all_results = []

# Process each model's results
for model_result in exp.result:
    # Create a dictionary for the current model
    model_data = OrderedDict()
    model_data['Model'] = model_result.model_name
    
    # Add all metrics from metric_avg_results
    for metric_name, metric_value in model_result.metric_avg_results.items():
        model_data[metric_name] = metric_value
    
    all_results.append(model_data)

# Create DataFrame with consistent column order
metrics_order = [
    'MAE', 'MSE', 'RMSE', 'AUC',
    'F1@5', 'F1@10', 'F1@20', 'F1@50',
    'Precision@5', 'Precision@10', 'Precision@20', 'Precision@50',
    'Recall@5', 'Recall@10', 'Recall@20', 'Recall@50',
    'MAP', 'MRR', 
    'NDCG@5', 'NDCG@10', 'NDCG@20', 'NDCG@50',
    'Train (s)', 'Test (s)'
]

# Convert to DataFrame and reorder columns
results_df = pd.DataFrame(all_results)
results_df = results_df[['Model'] + metrics_order]

# Custom formatting for display
def format_metric(x):
    if isinstance(x, (int, float)):
        return f"{x:.4f}" if abs(x) < 100 else f"{x:.1f}"
    return x

styled_df = (results_df.style
             .format(format_metric)
             .background_gradient(subset=metrics_order, cmap='YlGnBu')
             .set_properties(**{'text-align': 'center'})
             .set_table_styles([{
                 'selector': 'th',
                 'props': [('background-color', '#40466e'), 
                           ('color', 'white'),
                           ('font-weight', 'bold')]
             }]))

display(styled_df)

Unnamed: 0,Model,MAE,MSE,RMSE,AUC,F1@5,F1@10,F1@20,F1@50,Precision@5,Precision@10,Precision@20,Precision@50,Recall@5,Recall@10,Recall@20,Recall@50,MAP,MRR,NDCG@5,NDCG@10,NDCG@20,NDCG@50,Train (s),Test (s)
0,NeuMF,6.7216,48.502,6.8026,0.6863,0.0094,0.0102,0.0107,0.0093,0.0082,0.0073,0.0066,0.0051,0.0156,0.0261,0.0476,0.09,0.0159,0.033,0.0135,0.0174,0.0242,0.0352,3.1943,63.1469


In [9]:
# results_df.to_csv('neumf.csv', index=False)

In [29]:
def get_top_n(algo_name, n=10):
    top_n = defaultdict(list)
    
    for model in exp.models:
        if model.name == algo_name:
            print(f"{model.name} model is selected:")
            for uid in model.train_set.uid_map.values():
                user_id = list(model.train_set.user_ids)[uid]
                try:
                    item_rank = model.rank(user_idx=uid)[0]  # model.rank: item rank, item_score
                except:
                    item_rank = model.rank(user_idx=int(uid))[0]
                
                # Collect top N items
                item_rank_top = item_rank[:n]
                for iid in item_rank_top:
                    item_id = list(model.train_set.item_ids)[iid]
                    
                    # Check if item_id is an integer (it might be an ISBN as string)
                    try:
                        item_id = str(item_id)  # Treat ISBNs or other IDs as strings
                        top_n[int(user_id)].append((item_id, model.score(uid, iid)))  # Use item_id as string
                    except ValueError:
                        # Handle case where item_id cannot be converted to int
                        top_n[int(user_id)].append((item_id, model.score(uid, iid)))
    
    return top_n


In [33]:
n=get_top_n('NeuMF', n=10)

NeuMF model is selected:


In [34]:
# Flatten the defaultdict into a DataFrame
data = []
for user_id, items in n.items():
    for item_id, score in items:
        data.append([user_id, item_id, float(score[0])])

df = pd.DataFrame(data, columns=["user_id", "item_id", "score"])
df.to_csv("recommendations_NeuMF.csv", index=False)

In [35]:
# pd.read_csv('recommendations_NeuMF.csv')

Unnamed: 0,user_id,item_id,score
0,193412,0385504209,0.815096
1,193412,0971880107,0.806887
2,193412,0312195516,0.805068
3,193412,0060928336,0.804704
4,193412,0452282152,0.800919
...,...,...,...
62375,60424,059035342X,0.806350
62376,60424,0142001740,0.802623
62377,60424,0786868716,0.801996
62378,60424,0446310786,0.801587


In [36]:
#DOING IT FOR ANOTHER MODEL:
more_models = [
    # NeuMF(num_factors=8, layers=[32, 16, 8], act_fn="tanh", num_epochs=1, num_neg=3, batch_size=256, lr=0.001, seed=42, verbose=True, backend='pytorch'),
    VAECF(k=10, autoencoder_structure=[20], act_fn='tanh', likelihood='mult',n_epochs=100, batch_size=100, learning_rate=0.001, beta=1.0, trainable=True, verbose=False, seed=123, use_gpu=True),
   # LightGCN(emb_size=64, num_epochs=1000, learning_rate=0.001, batch_size=1024, num_layers=3, early_stopping=None, lambda_reg=0.0001, trainable=True, verbose=False, seed=123)
]
exp = cornac.Experiment(eval_method=rs, models=more_models, metrics=metrics, user_based=True)
exp.run()



TEST:
...
      |    MAE |     MSE |   RMSE |    AUC |  F1@10 |  F1@20 |   F1@5 |  F1@50 |    MAP |    MRR | NDCG@10 | NDCG@20 | NDCG@5 | NDCG@50 | Precision@10 | Precision@20 | Precision@5 | Precision@50 | Recall@10 | Recall@20 | Recall@5 | Recall@50 | Train (s) | Test (s)
----- + ------ + ------- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------ + ------- + ------------ + ------------ + ----------- + ------------ + --------- + --------- + -------- + --------- + --------- + --------
VAECF | 6.7216 | 48.5020 | 6.8026 | 0.7760 | 0.0198 | 0.0184 | 0.0193 | 0.0148 | 0.0293 | 0.0595 |  0.0351 |  0.0449 | 0.0282 |  0.0610 |       0.0140 |       0.0113 |      0.0170 |       0.0081 |    0.0507 |    0.0820 |   0.0312 |    0.1437 |   55.2808 |  45.4733



In [37]:
all_results = []

# Process each model's results
for model_result in exp.result:
    # Create a dictionary for the current model
    model_data = OrderedDict()
    model_data['Model'] = model_result.model_name
    
    # Add all metrics from metric_avg_results
    for metric_name, metric_value in model_result.metric_avg_results.items():
        model_data[metric_name] = metric_value
    
    all_results.append(model_data)

# Create DataFrame with consistent column order
metrics_order = [
    'MAE', 'MSE', 'RMSE', 'AUC',
    'F1@5', 'F1@10', 'F1@20', 'F1@50',
    'Precision@5', 'Precision@10', 'Precision@20', 'Precision@50',
    'Recall@5', 'Recall@10', 'Recall@20', 'Recall@50',
    'MAP', 'MRR', 
    'NDCG@5', 'NDCG@10', 'NDCG@20', 'NDCG@50',
    'Train (s)', 'Test (s)'
]

# Convert to DataFrame and reorder columns
results_df = pd.DataFrame(all_results)
results_df = results_df[['Model'] + metrics_order]

In [39]:
results_df.to_csv('VAECF.csv', index=False)

In [49]:
n=get_top_n('VAECF', n=10)

data = []
for user_id, items in n.items():
    for item_id, score in items:
        data.append([user_id, item_id, float(score)])
    
df = pd.DataFrame(data, columns=["user_id", "item_id", "score"])
# df.to_csv("recommendations_VAECF.csv", index=False)
print(f"Saved recommendations to recommendations_vaecf.csv")

VAECF model is selected:
Saved recommendations to recommendations_vaecf.csv


In [51]:
df.to_csv("recommendations_VAECF.csv", index=False)
len(n)

6238

In [52]:
pd.read_csv('recommendations_VAECF.csv')

Unnamed: 0,user_id,item_id,score
0,193412,0312195516,0.009933
1,193412,0142001740,0.009491
2,193412,0452282152,0.006639
3,193412,0156027321,0.006397
4,193412,0060930535,0.006304
...,...,...,...
62375,60424,0061009059,0.004406
62376,60424,0312966970,0.004380
62377,60424,059035342X,0.004116
62378,60424,0142001740,0.003958


In [53]:
pd.read_csv('recommendations_NeuMF.csv')

Unnamed: 0,user_id,item_id,score
0,193412,0385504209,0.815096
1,193412,0971880107,0.806887
2,193412,0312195516,0.805068
3,193412,0060928336,0.804704
4,193412,0452282152,0.800919
...,...,...,...
62375,60424,059035342X,0.806350
62376,60424,0142001740,0.802623
62377,60424,0786868716,0.801996
62378,60424,0446310786,0.801587
