## LightGCN

pure collaborative filtering LightGCN

In [1]:
import numpy as np
import pandas as pd
from libreco.data import random_split, DatasetPure
from libreco.algorithms import LightGCN  # pure data, algorithm LightGCN
from libreco.evaluation import evaluate

Instructions for updating:
non-resource variables are not supported in the long term


In [2]:
df = pd.read_csv("cf_initial.csv")
df= df.rename(columns={"userID":"user", 'itemID':"item", "rating":"label", "song_by": "song"})
df=df[["user","item","label","song"]]
df.nunique()

user       861
item     58320
label        1
song     56714
dtype: int64

In [3]:
# split whole data into three folds for training, evaluating and testing
train_data, eval_data, test_data = random_split(df, multi_ratios=[0.8, 0.1, 0.1])

train_data, data_info = DatasetPure.build_trainset(train_data)
eval_data = DatasetPure.build_evalset(eval_data)
test_data = DatasetPure.build_testset(test_data)
print(data_info)

n_users: 861, n_items: 48529, data density: 0.1596 %


In [11]:
lightgcn = LightGCN(
    task="ranking",
    data_info=data_info,
    loss_type="bpr",
    embed_size=16,
    n_epochs=2,
    lr=1e-3,
    batch_size=50,
    num_neg=1,
    device="cuda",
)

In [27]:
import optuna

def objective(trial):
    # Define hyperparameter search space
    lr = trial.suggest_float("lr", 1e-5, 1e-2)
    batch_size = trial.suggest_int("batch_size", 128, 256)
    embed_size = trial.suggest_int("embed_size", 8, 256)
    num_epochs = trial.suggest_int("num_epochs", 2, 8)

    # Create and train the model with the suggested hyperparameters
    lightgcn = LightGCN(
    task="ranking",
    data_info=data_info,
    loss_type="bpr",
    embed_size=embed_size,
    n_epochs=num_epochs,
    lr=lr,
    batch_size=batch_size,
    num_neg=1,
    device="cuda",
)
    lightgcn.fit(
    train_data,
    neg_sampling=True,
    verbose=2,
    eval_data=eval_data,
    metrics=["loss", "roc_auc", "precision", "recall", "ndcg"],
)

    error = evaluate(
        model=lightgcn,
        data=test_data,
        neg_sampling=True,
        metrics=["loss", "roc_auc", "precision", "recall", "ndcg"],
    )
    return error['precision']

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=10)

# Get the best hyperparameters from the study
best_params = study.best_params

print(best_params)


[I 2023-11-15 00:28:09,305] A new study created in memory with name: no-name-6baae4ce-8ffe-4dd2-a2ff-8d86f164d185


Training start time: [35m2023-11-15 00:28:09[0m


train: 100%|██████████| 282/282 [02:44<00:00,  1.71it/s]


Epoch 1 elapsed: 164.766s
	 [32mtrain_loss: 0.0779[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 116.83it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 510.99it/s]


	 eval log_loss: 0.4879
	 eval roc_auc: 0.9511
	 eval precision@10: 0.0355
	 eval recall@10: 0.0696
	 eval ndcg@10: 0.1400


train: 100%|██████████| 282/282 [02:46<00:00,  1.70it/s]


Epoch 2 elapsed: 166.320s
	 [32mtrain_loss: 0.0057[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 117.50it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 509.23it/s]


	 eval log_loss: 0.4991
	 eval roc_auc: 0.9499
	 eval precision@10: 0.0405
	 eval recall@10: 0.0736
	 eval ndcg@10: 0.1498


train: 100%|██████████| 282/282 [02:45<00:00,  1.71it/s]


Epoch 3 elapsed: 165.133s
	 [32mtrain_loss: 0.0039[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 83.97it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 479.09it/s]


	 eval log_loss: 0.5019
	 eval roc_auc: 0.9505
	 eval precision@10: 0.0378
	 eval recall@10: 0.0702
	 eval ndcg@10: 0.1471


train: 100%|██████████| 282/282 [02:42<00:00,  1.73it/s]


Epoch 4 elapsed: 162.776s
	 [32mtrain_loss: 0.0033[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 103.96it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 493.29it/s]


	 eval log_loss: 0.5096
	 eval roc_auc: 0.9489
	 eval precision@10: 0.0386
	 eval recall@10: 0.0668
	 eval ndcg@10: 0.1503


train: 100%|██████████| 282/282 [02:44<00:00,  1.71it/s]


Epoch 5 elapsed: 164.885s
	 [32mtrain_loss: 0.0031[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 99.62it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 506.98it/s]


	 eval log_loss: 0.5207
	 eval roc_auc: 0.9459
	 eval precision@10: 0.0361
	 eval recall@10: 0.0617
	 eval ndcg@10: 0.1434


train: 100%|██████████| 282/282 [02:38<00:00,  1.78it/s]


Epoch 6 elapsed: 158.051s
	 [32mtrain_loss: 0.0033[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 104.06it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 501.05it/s]


	 eval log_loss: 0.5295
	 eval roc_auc: 0.9442
	 eval precision@10: 0.0356
	 eval recall@10: 0.0635
	 eval ndcg@10: 0.1404


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 102.02it/s]
eval_listwise: 100%|██████████| 689/689 [00:01<00:00, 515.76it/s]
[I 2023-11-15 00:44:46,681] Trial 0 finished with value: 0.03323657474600871 and parameters: {'lr': 0.006925597346938086, 'batch_size': 237, 'embed_size': 140, 'num_epochs': 6}. Best is trial 0 with value: 0.03323657474600871.


Training start time: [35m2023-11-15 00:44:46[0m


train: 100%|██████████| 420/420 [04:02<00:00,  1.73it/s]


Epoch 1 elapsed: 242.263s
	 [32mtrain_loss: 0.3992[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 79.00it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 425.74it/s]


	 eval log_loss: 0.5068
	 eval roc_auc: 0.9354
	 eval precision@10: 0.0314
	 eval recall@10: 0.0593
	 eval ndcg@10: 0.1144


train: 100%|██████████| 420/420 [04:08<00:00,  1.69it/s]


Epoch 2 elapsed: 248.776s
	 [32mtrain_loss: 0.0716[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 99.70it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 461.52it/s]


	 eval log_loss: 0.4663
	 eval roc_auc: 0.9467
	 eval precision@10: 0.0333
	 eval recall@10: 0.0599
	 eval ndcg@10: 0.1227


train: 100%|██████████| 420/420 [03:56<00:00,  1.78it/s]


Epoch 3 elapsed: 236.475s
	 [32mtrain_loss: 0.0259[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 85.88it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 463.65it/s]


	 eval log_loss: 0.4638
	 eval roc_auc: 0.9515
	 eval precision@10: 0.0336
	 eval recall@10: 0.0609
	 eval ndcg@10: 0.1272


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 115.78it/s]
eval_listwise: 100%|██████████| 689/689 [00:01<00:00, 480.23it/s]
[I 2023-11-15 00:57:04,841] Trial 1 finished with value: 0.03149492017416546 and parameters: {'lr': 0.0011060164812060052, 'batch_size': 159, 'embed_size': 145, 'num_epochs': 3}. Best is trial 0 with value: 0.03323657474600871.


Training start time: [35m2023-11-15 00:57:04[0m


train: 100%|██████████| 284/284 [04:25<00:00,  1.07it/s]


Epoch 1 elapsed: 265.744s
	 [32mtrain_loss: 0.0392[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 52.46it/s]
eval_listwise: 100%|██████████| 679/679 [00:02<00:00, 302.61it/s]


	 eval log_loss: 0.4926
	 eval roc_auc: 0.9542
	 eval precision@10: 0.0380
	 eval recall@10: 0.0693
	 eval ndcg@10: 0.1578


train: 100%|██████████| 284/284 [04:25<00:00,  1.07it/s]


Epoch 2 elapsed: 265.436s
	 [32mtrain_loss: 0.0043[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 59.14it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 383.71it/s]


	 eval log_loss: 0.5128
	 eval roc_auc: 0.9452
	 eval precision@10: 0.0370
	 eval recall@10: 0.0676
	 eval ndcg@10: 0.1555


train: 100%|██████████| 284/284 [04:17<00:00,  1.10it/s]


Epoch 3 elapsed: 257.128s
	 [32mtrain_loss: 0.0044[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 63.57it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 368.34it/s]


	 eval log_loss: 0.5408
	 eval roc_auc: 0.9346
	 eval precision@10: 0.0339
	 eval recall@10: 0.0611
	 eval ndcg@10: 0.1355


train: 100%|██████████| 284/284 [04:20<00:00,  1.09it/s]


Epoch 4 elapsed: 260.445s
	 [32mtrain_loss: 0.0048[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 67.14it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 384.28it/s]


	 eval log_loss: 0.5733
	 eval roc_auc: 0.9282
	 eval precision@10: 0.0281
	 eval recall@10: 0.0531
	 eval ndcg@10: 0.1131


train: 100%|██████████| 284/284 [04:17<00:00,  1.10it/s]


Epoch 5 elapsed: 257.649s
	 [32mtrain_loss: 0.0039[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 75.37it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 384.43it/s]


	 eval log_loss: 0.6049
	 eval roc_auc: 0.9209
	 eval precision@10: 0.0292
	 eval recall@10: 0.0508
	 eval ndcg@10: 0.1124


train: 100%|██████████| 284/284 [04:18<00:00,  1.10it/s]


Epoch 6 elapsed: 258.223s
	 [32mtrain_loss: 0.0054[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 73.19it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 389.99it/s]


	 eval log_loss: 0.6902
	 eval roc_auc: 0.9104
	 eval precision@10: 0.0274
	 eval recall@10: 0.0483
	 eval ndcg@10: 0.1058


train: 100%|██████████| 284/284 [04:31<00:00,  1.05it/s]


Epoch 7 elapsed: 271.235s
	 [32mtrain_loss: 0.0066[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 76.13it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 373.50it/s]


	 eval log_loss: 0.7604
	 eval roc_auc: 0.9006
	 eval precision@10: 0.0253
	 eval recall@10: 0.0454
	 eval ndcg@10: 0.0965


train: 100%|██████████| 284/284 [04:18<00:00,  1.10it/s]


Epoch 8 elapsed: 258.104s
	 [32mtrain_loss: 0.0063[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 71.15it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 381.52it/s]


	 eval log_loss: 0.8203
	 eval roc_auc: 0.8958
	 eval precision@10: 0.0239
	 eval recall@10: 0.0411
	 eval ndcg@10: 0.0948


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 79.47it/s]
eval_listwise: 100%|██████████| 689/689 [00:01<00:00, 382.55it/s]
[I 2023-11-15 01:32:23,493] Trial 2 finished with value: 0.021625544267053706 and parameters: {'lr': 0.009396225837719928, 'batch_size': 235, 'embed_size': 248, 'num_epochs': 8}. Best is trial 0 with value: 0.03323657474600871.


Training start time: [35m2023-11-15 01:32:23[0m


train: 100%|██████████| 299/299 [02:26<00:00,  2.04it/s]


Epoch 1 elapsed: 146.364s
	 [32mtrain_loss: 0.113[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 199.99it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 455.48it/s]


	 eval log_loss: 0.4732
	 eval roc_auc: 0.9497
	 eval precision@10: 0.0351
	 eval recall@10: 0.0644
	 eval ndcg@10: 0.1473


train: 100%|██████████| 299/299 [02:22<00:00,  2.09it/s]


Epoch 2 elapsed: 142.725s
	 [32mtrain_loss: 0.0081[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 117.48it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 477.39it/s]


	 eval log_loss: 0.4796
	 eval roc_auc: 0.9531
	 eval precision@10: 0.0356
	 eval recall@10: 0.0648
	 eval ndcg@10: 0.1484


train: 100%|██████████| 299/299 [02:18<00:00,  2.16it/s]


Epoch 3 elapsed: 138.382s
	 [32mtrain_loss: 0.0047[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 158.56it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 444.43it/s]


	 eval log_loss: 0.4859
	 eval roc_auc: 0.9538
	 eval precision@10: 0.0374
	 eval recall@10: 0.0676
	 eval ndcg@10: 0.1511


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 150.24it/s]
eval_listwise: 100%|██████████| 689/689 [00:01<00:00, 505.19it/s]
[I 2023-11-15 01:39:41,537] Trial 3 finished with value: 0.03309143686502177 and parameters: {'lr': 0.005379527965413115, 'batch_size': 223, 'embed_size': 110, 'num_epochs': 3}. Best is trial 0 with value: 0.03323657474600871.


Training start time: [35m2023-11-15 01:39:41[0m


train: 100%|██████████| 359/359 [02:48<00:00,  2.13it/s]


Epoch 1 elapsed: 168.550s
	 [32mtrain_loss: 0.2299[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 132.20it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 519.39it/s]


	 eval log_loss: 0.4648
	 eval roc_auc: 0.9452
	 eval precision@10: 0.0323
	 eval recall@10: 0.0644
	 eval ndcg@10: 0.1301


train: 100%|██████████| 359/359 [03:14<00:00,  1.85it/s]


Epoch 2 elapsed: 194.095s
	 [32mtrain_loss: 0.0202[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 180.94it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 535.84it/s]


	 eval log_loss: 0.4692
	 eval roc_auc: 0.9512
	 eval precision@10: 0.0365
	 eval recall@10: 0.0711
	 eval ndcg@10: 0.1429


train: 100%|██████████| 359/359 [02:51<00:00,  2.09it/s]


Epoch 3 elapsed: 171.892s
	 [32mtrain_loss: 0.0106[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 198.31it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 548.21it/s]


	 eval log_loss: 0.4729
	 eval roc_auc: 0.9545
	 eval precision@10: 0.0359
	 eval recall@10: 0.0670
	 eval ndcg@10: 0.1406


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 194.30it/s]
eval_listwise: 100%|██████████| 689/689 [00:01<00:00, 570.34it/s]
[I 2023-11-15 01:48:45,523] Trial 4 finished with value: 0.03454281567489115 and parameters: {'lr': 0.002433004009239272, 'batch_size': 186, 'embed_size': 116, 'num_epochs': 3}. Best is trial 4 with value: 0.03454281567489115.


Training start time: [35m2023-11-15 01:48:45[0m


train: 100%|██████████| 436/436 [02:30<00:00,  2.89it/s]


Epoch 1 elapsed: 150.992s
	 [32mtrain_loss: 0.3153[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 152.95it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 567.61it/s]


	 eval log_loss: 0.4743
	 eval roc_auc: 0.9386
	 eval precision@10: 0.0281
	 eval recall@10: 0.0522
	 eval ndcg@10: 0.1121


train: 100%|██████████| 436/436 [02:21<00:00,  3.07it/s]


Epoch 2 elapsed: 141.913s
	 [32mtrain_loss: 0.04[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 576.59it/s]


	 eval log_loss: 0.4699
	 eval roc_auc: 0.9472
	 eval precision@10: 0.0308
	 eval recall@10: 0.0570
	 eval ndcg@10: 0.1193


train: 100%|██████████| 436/436 [02:24<00:00,  3.02it/s]


Epoch 3 elapsed: 144.353s
	 [32mtrain_loss: 0.0181[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 199.91it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 533.85it/s]


	 eval log_loss: 0.4749
	 eval roc_auc: 0.9509
	 eval precision@10: 0.0327
	 eval recall@10: 0.0601
	 eval ndcg@10: 0.1296


train: 100%|██████████| 436/436 [02:23<00:00,  3.04it/s]


Epoch 4 elapsed: 143.290s
	 [32mtrain_loss: 0.0119[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 213.03it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 573.23it/s]


	 eval log_loss: 0.4794
	 eval roc_auc: 0.9528
	 eval precision@10: 0.0339
	 eval recall@10: 0.0613
	 eval ndcg@10: 0.1305


train: 100%|██████████| 436/436 [02:22<00:00,  3.07it/s]


Epoch 5 elapsed: 142.086s
	 [32mtrain_loss: 0.0089[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 196.61it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 572.19it/s]


	 eval log_loss: 0.4823
	 eval roc_auc: 0.9542
	 eval precision@10: 0.0349
	 eval recall@10: 0.0641
	 eval ndcg@10: 0.1348


train: 100%|██████████| 436/436 [02:23<00:00,  3.03it/s]


Epoch 6 elapsed: 143.826s
	 [32mtrain_loss: 0.007[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 214.25it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 539.03it/s]


	 eval log_loss: 0.4861
	 eval roc_auc: 0.9549
	 eval precision@10: 0.0359
	 eval recall@10: 0.0648
	 eval ndcg@10: 0.1379


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 216.70it/s]
eval_listwise: 100%|██████████| 689/689 [00:01<00:00, 583.39it/s]
[I 2023-11-15 02:03:25,282] Trial 5 finished with value: 0.03570391872278665 and parameters: {'lr': 0.0019269443023648477, 'batch_size': 153, 'embed_size': 70, 'num_epochs': 6}. Best is trial 5 with value: 0.03570391872278665.


Training start time: [35m2023-11-15 02:03:25[0m


train: 100%|██████████| 269/269 [02:31<00:00,  1.77it/s]


Epoch 1 elapsed: 151.696s
	 [32mtrain_loss: 0.0554[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 102.66it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 447.53it/s]


	 eval log_loss: 0.5023
	 eval roc_auc: 0.9504
	 eval precision@10: 0.0378
	 eval recall@10: 0.0698
	 eval ndcg@10: 0.1488


train: 100%|██████████| 269/269 [02:31<00:00,  1.78it/s]


Epoch 2 elapsed: 151.314s
	 [32mtrain_loss: 0.0044[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 117.09it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 448.30it/s]


	 eval log_loss: 0.5039
	 eval roc_auc: 0.9487
	 eval precision@10: 0.0384
	 eval recall@10: 0.0691
	 eval ndcg@10: 0.1442


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 151.10it/s]
eval_listwise: 100%|██████████| 689/689 [00:01<00:00, 469.96it/s]
[I 2023-11-15 02:08:37,132] Trial 6 finished with value: 0.029898403483309147 and parameters: {'lr': 0.009501705350920672, 'batch_size': 248, 'embed_size': 139, 'num_epochs': 2}. Best is trial 5 with value: 0.03570391872278665.


Training start time: [35m2023-11-15 02:08:37[0m


train: 100%|██████████| 420/420 [04:08<00:00,  1.69it/s]


Epoch 1 elapsed: 248.543s
	 [32mtrain_loss: 0.0591[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 132.71it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 460.05it/s]


	 eval log_loss: 0.4936
	 eval roc_auc: 0.9566
	 eval precision@10: 0.0364
	 eval recall@10: 0.0672
	 eval ndcg@10: 0.1451


train: 100%|██████████| 420/420 [04:03<00:00,  1.73it/s]


Epoch 2 elapsed: 243.121s
	 [32mtrain_loss: 0.0055[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 122.82it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 478.41it/s]


	 eval log_loss: 0.4936
	 eval roc_auc: 0.9554
	 eval precision@10: 0.0352
	 eval recall@10: 0.0633
	 eval ndcg@10: 0.1389


train: 100%|██████████| 420/420 [04:05<00:00,  1.71it/s]


Epoch 3 elapsed: 245.022s
	 [32mtrain_loss: 0.004[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 95.10it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 480.51it/s]


	 eval log_loss: 0.5023
	 eval roc_auc: 0.9521
	 eval precision@10: 0.0359
	 eval recall@10: 0.0648
	 eval ndcg@10: 0.1393


train: 100%|██████████| 420/420 [04:03<00:00,  1.73it/s]


Epoch 4 elapsed: 243.251s
	 [32mtrain_loss: 0.0044[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 130.96it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 485.78it/s]


	 eval log_loss: 0.5195
	 eval roc_auc: 0.9470
	 eval precision@10: 0.0345
	 eval recall@10: 0.0603
	 eval ndcg@10: 0.1332


train: 100%|██████████| 420/420 [04:03<00:00,  1.73it/s]


Epoch 5 elapsed: 243.294s
	 [32mtrain_loss: 0.0032[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 85.27it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 476.38it/s]


	 eval log_loss: 0.5304
	 eval roc_auc: 0.9407
	 eval precision@10: 0.0327
	 eval recall@10: 0.0555
	 eval ndcg@10: 0.1254


train: 100%|██████████| 420/420 [04:01<00:00,  1.74it/s]


Epoch 6 elapsed: 241.375s
	 [32mtrain_loss: 0.0034[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 96.39it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 478.79it/s]


	 eval log_loss: 0.5552
	 eval roc_auc: 0.9359
	 eval precision@10: 0.0325
	 eval recall@10: 0.0528
	 eval ndcg@10: 0.1268


train: 100%|██████████| 420/420 [04:02<00:00,  1.73it/s]


Epoch 7 elapsed: 242.716s
	 [32mtrain_loss: 0.0034[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 124.88it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 480.19it/s]


	 eval log_loss: 0.5836
	 eval roc_auc: 0.9332
	 eval precision@10: 0.0324
	 eval recall@10: 0.0501
	 eval ndcg@10: 0.1283


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 129.31it/s]
eval_listwise: 100%|██████████| 689/689 [00:01<00:00, 471.53it/s]
[I 2023-11-15 02:37:22,103] Trial 7 finished with value: 0.028011611030478955 and parameters: {'lr': 0.006683463654139038, 'batch_size': 159, 'embed_size': 153, 'num_epochs': 7}. Best is trial 5 with value: 0.03570391872278665.


Training start time: [35m2023-11-15 02:37:22[0m


train: 100%|██████████| 379/379 [05:09<00:00,  1.22it/s]


Epoch 1 elapsed: 309.414s
	 [32mtrain_loss: 0.0649[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 80.50it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 429.78it/s]


	 eval log_loss: 0.4748
	 eval roc_auc: 0.9575
	 eval precision@10: 0.0352
	 eval recall@10: 0.0647
	 eval ndcg@10: 0.1415


train: 100%|██████████| 379/379 [05:13<00:00,  1.21it/s]


Epoch 2 elapsed: 313.389s
	 [32mtrain_loss: 0.0046[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 73.65it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 429.54it/s]


	 eval log_loss: 0.4778
	 eval roc_auc: 0.9572
	 eval precision@10: 0.0370
	 eval recall@10: 0.0706
	 eval ndcg@10: 0.1486


train: 100%|██████████| 379/379 [05:10<00:00,  1.22it/s]


Epoch 3 elapsed: 310.918s
	 [32mtrain_loss: 0.0038[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 60.54it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 429.11it/s]


	 eval log_loss: 0.4830
	 eval roc_auc: 0.9560
	 eval precision@10: 0.0395
	 eval recall@10: 0.0756
	 eval ndcg@10: 0.1502


train: 100%|██████████| 379/379 [05:06<00:00,  1.23it/s]


Epoch 4 elapsed: 306.926s
	 [32mtrain_loss: 0.0033[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 73.78it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 427.99it/s]


	 eval log_loss: 0.4951
	 eval roc_auc: 0.9531
	 eval precision@10: 0.0368
	 eval recall@10: 0.0703
	 eval ndcg@10: 0.1483


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 80.12it/s]
eval_listwise: 100%|██████████| 689/689 [00:01<00:00, 393.83it/s]
[I 2023-11-15 02:58:16,072] Trial 8 finished with value: 0.030914368650217707 and parameters: {'lr': 0.005393182751402691, 'batch_size': 176, 'embed_size': 232, 'num_epochs': 4}. Best is trial 5 with value: 0.03570391872278665.


Training start time: [35m2023-11-15 02:58:16[0m


train: 100%|██████████| 457/457 [01:53<00:00,  4.02it/s]


Epoch 1 elapsed: 113.713s
	 [32mtrain_loss: 0.0939[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 440.35it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 643.16it/s]


	 eval log_loss: 0.4993
	 eval roc_auc: 0.9500
	 eval precision@10: 0.0349
	 eval recall@10: 0.0634
	 eval ndcg@10: 0.1376


train: 100%|██████████| 457/457 [01:53<00:00,  4.03it/s]


Epoch 2 elapsed: 113.363s
	 [32mtrain_loss: 0.009[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 296.44it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 652.18it/s]


	 eval log_loss: 0.5126
	 eval roc_auc: 0.9519
	 eval precision@10: 0.0353
	 eval recall@10: 0.0626
	 eval ndcg@10: 0.1438


train: 100%|██████████| 457/457 [01:53<00:00,  4.04it/s]


Epoch 3 elapsed: 113.149s
	 [32mtrain_loss: 0.0062[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 213.97it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 623.07it/s]


	 eval log_loss: 0.5229
	 eval roc_auc: 0.9497
	 eval precision@10: 0.0367
	 eval recall@10: 0.0647
	 eval ndcg@10: 0.1485


train: 100%|██████████| 457/457 [01:54<00:00,  3.98it/s]


Epoch 4 elapsed: 114.847s
	 [32mtrain_loss: 0.0043[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 279.84it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 661.37it/s]


	 eval log_loss: 0.5402
	 eval roc_auc: 0.9477
	 eval precision@10: 0.0355
	 eval recall@10: 0.0615
	 eval ndcg@10: 0.1432


train: 100%|██████████| 457/457 [01:54<00:00,  4.00it/s]


Epoch 5 elapsed: 114.115s
	 [32mtrain_loss: 0.004[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 273.26it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 658.95it/s]


	 eval log_loss: 0.5496
	 eval roc_auc: 0.9462
	 eval precision@10: 0.0364
	 eval recall@10: 0.0621
	 eval ndcg@10: 0.1448


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 232.18it/s]
eval_listwise: 100%|██████████| 689/689 [00:01<00:00, 676.74it/s]
[I 2023-11-15 03:07:55,947] Trial 9 finished with value: 0.036284470246734396 and parameters: {'lr': 0.006874727667973322, 'batch_size': 146, 'embed_size': 53, 'num_epochs': 5}. Best is trial 9 with value: 0.036284470246734396.


{'lr': 0.006874727667973322, 'batch_size': 146, 'embed_size': 53, 'num_epochs': 5}


In [37]:
lightgcn = LightGCN(
    task="ranking",
    data_info=data_info,
    loss_type="bpr",
    embed_size=best_params['embed_size'],
    n_epochs=best_params['num_epochs'],
    lr=best_params['lr'],
    batch_size=best_params['batch_size'],
    num_neg=1,
    device="cuda",
)

In [38]:
# monitor metrics on eval data during training
lightgcn.fit(
    train_data,
    neg_sampling=True,
    verbose=2,
    eval_data=eval_data,
    metrics=["loss", "roc_auc", "precision", "recall", "ndcg"],
)

Training start time: [35m2023-11-15 12:13:00[0m


train: 100%|██████████| 457/457 [02:09<00:00,  3.53it/s]


Epoch 1 elapsed: 129.393s
	 [32mtrain_loss: 0.0939[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 304.27it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 625.38it/s]


	 eval log_loss: 0.4993
	 eval roc_auc: 0.9500
	 eval precision@10: 0.0349
	 eval recall@10: 0.0634
	 eval ndcg@10: 0.1376


train: 100%|██████████| 457/457 [02:05<00:00,  3.63it/s]


Epoch 2 elapsed: 125.733s
	 [32mtrain_loss: 0.009[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 313.57it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 615.34it/s]


	 eval log_loss: 0.5126
	 eval roc_auc: 0.9519
	 eval precision@10: 0.0353
	 eval recall@10: 0.0626
	 eval ndcg@10: 0.1438


train: 100%|██████████| 457/457 [01:56<00:00,  3.93it/s]


Epoch 3 elapsed: 116.250s
	 [32mtrain_loss: 0.0062[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 328.30it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 647.93it/s]


	 eval log_loss: 0.5229
	 eval roc_auc: 0.9497
	 eval precision@10: 0.0367
	 eval recall@10: 0.0647
	 eval ndcg@10: 0.1485


train: 100%|██████████| 457/457 [01:55<00:00,  3.97it/s]


Epoch 4 elapsed: 115.259s
	 [32mtrain_loss: 0.0043[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 333.44it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 606.21it/s]


	 eval log_loss: 0.5402
	 eval roc_auc: 0.9477
	 eval precision@10: 0.0355
	 eval recall@10: 0.0615
	 eval ndcg@10: 0.1432


train: 100%|██████████| 457/457 [01:54<00:00,  3.99it/s]


Epoch 5 elapsed: 114.561s
	 [32mtrain_loss: 0.004[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 228.05it/s]
eval_listwise: 100%|██████████| 679/679 [00:01<00:00, 657.07it/s]

	 eval log_loss: 0.5496
	 eval roc_auc: 0.9462
	 eval precision@10: 0.0364
	 eval recall@10: 0.0621
	 eval ndcg@10: 0.1448





In [39]:
# do final evaluation on test data
evaluate(
    model=lightgcn,
    data=test_data,
    neg_sampling=True,
    metrics=["loss", "roc_auc", "precision", "recall", "ndcg"],
)

eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 242.67it/s]
eval_listwise: 100%|██████████| 689/689 [00:01<00:00, 681.81it/s]


{'loss': 0.5726528606170258,
 'roc_auc': 0.9392017125025194,
 'precision': 0.036284470246734396,
 'recall': 0.06498846057989743,
 'ndcg': 0.1354837731009233}

### Prediction and Evaluation

In [71]:
# importing the evaluation metrics
from recommenders.evaluation.python_evaluation import (rmse, mae, rsquared, exp_var, map_at_k, ndcg_at_k, precision_at_k,
                                                     recall_at_k, get_top_k_items)

In [46]:
# top k items to recommend
TOP_K = 10

In [61]:
train, eval, test = random_split(df, multi_ratios=[0.8, 0.1, 0.1])

In [82]:
users, items, preds = [], [], []
item = list(train.item.unique())
for user in train.user.unique():
    user = [user] * len(item)
    users.extend(user)
    items.extend(item)
    preds.extend(list(lightgcn.predict(user, item)))

all_predictions = pd.DataFrame(data={"user": users, "item":items, "prediction":preds})

merged = pd.merge(train, all_predictions, on=["user", "item"], how="outer")
all_predictions = merged[merged.label.isnull()].drop('label', axis=1)

In [78]:
all_predictions = all_predictions.rename(columns={"user":"userID", 'item':"itemID", "label":"rating", "song_by": "song"})
test = test.rename(columns={"user":"userID", 'item':"itemID", "label":"rating", "song_by": "song"})

In [83]:
scores = evaluate(
    model=lightgcn,
    data=test_data,
    neg_sampling=True,
    metrics=["loss", "roc_auc", "precision", "recall", "ndcg"],
)
scores

eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 141.63it/s]
eval_listwise: 100%|██████████| 689/689 [00:01<00:00, 513.11it/s]


{'loss': 0.5726528606170258,
 'roc_auc': 0.9392017125025194,
 'precision': 0.036284470246734396,
 'recall': 0.06498846057989743,
 'ndcg': 0.1354837731009233}

#### MAP

It is the average precision for each user normalized over all users.

In [79]:
eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
print(f"MAP @ {TOP_K}: {eval_map}")

MAP @ 10: 0.023336243271233598


#### NDCG

Normalized Discounted Cumulative Gain (NDCG) - evaluates how well the predicted items for a user are ranked based on relevance


In [84]:
eval_ndcg = scores['ndcg']
print(f"NDCG: {eval_ndcg}")

NDCG: 0.1354837731009233


In [80]:
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
print(f"NDCG @ {TOP_K}: {eval_ndcg}")

NDCG @ 10: 0.05353667894262494


#### Precision Recall

Precision - this measures the proportion of recommended items that are relevant

Recall - this measures the proportion of relevant items that are recommended

In [85]:
eval_precision = scores['precision']
eval_recall = scores['recall']
print(f"Precision: {eval_precision} \n Recall: {eval_recall}")

Precision: 0.036284470246734396 
 Recall: 0.06498846057989743


In [81]:
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
print(f"Precision @ {TOP_K}: {eval_precision} \n Recall @ {TOP_K}: {eval_recall}")

Precision @ 10: 0.0362844702467344 
 Recall @ 10: 0.06498846057989743
