In [1]:
!pip install LibRecommender
import numpy as np
import pandas as pd
from libreco.data import random_split, DatasetPure
from libreco.algorithms import YouTubeRanking  # pure data
from libreco.evaluation import evaluate

import random
import warnings
import zipfile
from pathlib import Path

import pandas as pd
import tensorflow as tf
import tqdm
warnings.filterwarnings("ignore")



Instructions for updating:
non-resource variables are not supported in the long term


In [2]:
def load_ml_1m():
    # download and extract zip file
    tf.keras.utils.get_file(
        "ml-1m.zip",
        "http://files.grouplens.org/datasets/movielens/ml-1m.zip",
        cache_dir=".",
        cache_subdir=".",
        extract=True,
    )
    # read and merge data into same table
    cur_path = Path(".").absolute()
    ratings = pd.read_csv(
        cur_path / "ml-1m" / "ratings.dat",
        sep="::",
        usecols=[0, 1, 2, 3],
        names=["user", "item", "rating", "time"],
    )
    users = pd.read_csv(
        cur_path / "ml-1m" / "users.dat",
        sep="::",
        usecols=[0, 1, 2, 3],
        names=["user", "sex", "age", "occupation"],
    )
    items = pd.read_csv(
        cur_path / "ml-1m" / "movies.dat",
        sep="::",
        usecols=[0, 2],
        names=["item", "genre"],
        encoding="iso-8859-1",
    )
    items[["genre1", "genre2", "genre3"]] = (
        items["genre"].str.split(r"|", expand=True).fillna("missing").iloc[:, :3]
    )
    items.drop("genre", axis=1, inplace=True)
    data = ratings.merge(users, on="user").merge(items, on="item")
    data.rename(columns={"rating": "label"}, inplace=True)
    # random shuffle data
    data = data.sample(frac=1, random_state=42).reset_index(drop=True)
    return data

In [3]:
data = load_ml_1m()
data.shape

(1000209, 10)

In [4]:
data

Unnamed: 0,user,item,label,time,sex,age,occupation,genre1,genre2,genre3
0,5755,184,3,958280246,F,35,2,Drama,missing,missing
1,4585,519,3,964321944,M,35,7,Sci-Fi,Thriller,missing
2,1503,3114,4,974762175,M,25,12,Animation,Children's,Comedy
3,2166,648,4,974614593,M,1,10,Action,Adventure,Mystery
4,3201,2178,5,968626301,M,45,7,Thriller,missing,missing
...,...,...,...,...,...,...,...,...,...,...
1000204,1509,2716,4,974747653,M,1,0,Comedy,Horror,missing
1000205,3756,1411,5,966096744,M,18,12,Drama,missing,missing
1000206,3609,1552,3,966614629,F,25,17,Action,Adventure,Thriller
1000207,4169,3639,4,971579005,M,50,0,Action,missing,missing


In [5]:
train_data, eval_data, test_data = random_split(data, multi_ratios=[0.8, 0.1, 0.1])
train_data, data_info= DatasetPure.build_trainset(train_data)
eval_data = DatasetPure.build_evalset(eval_data)
test_data = DatasetPure.build_testset(test_data)

In [6]:
youtube = YouTubeRanking(
    task="ranking",
    data_info=data_info,
    loss_type="cross_entropy",
    embed_size=16,
    n_epochs=10,
    lr=1e-3,
    batch_size=2048,
    num_neg=1,
)

In [7]:
# monitor metrics on eval data during training
youtube.fit(
    train_data,
    neg_sampling=True, #for rating, this param is false else True
    verbose=2,
    eval_data=eval_data,
    metrics=["loss"],
)

# do final evaluation on test data
evaluate(
    model=youtube,
    data=test_data,
    neg_sampling=True,
    metrics=["loss"],
)
#for implicit feedback, metrics like precision@k, recall@k, ndcg can be used

Instructions for updating:
Colocations handled automatically by placer.


Training start time: [35m2024-02-21 20:52:31[0m
total params: [33m172,673[0m | embedding params: [33m155,777[0m | network params: [33m16,896[0m


train: 100%|██████████| 782/782 [01:01<00:00, 12.71it/s]


Epoch 1 elapsed: 61.511s
	 [32mtrain_loss: 0.5372[0m


eval_pointwise: 100%|██████████| 25/25 [00:00<00:00, 50.41it/s]


	 eval log_loss: 0.5115


train: 100%|██████████| 782/782 [00:44<00:00, 17.74it/s]


Epoch 2 elapsed: 44.080s
	 [32mtrain_loss: 0.5074[0m


eval_pointwise: 100%|██████████| 25/25 [00:00<00:00, 52.81it/s]


	 eval log_loss: 0.4901


train: 100%|██████████| 782/782 [00:45<00:00, 17.12it/s]


Epoch 3 elapsed: 45.671s
	 [32mtrain_loss: 0.4726[0m


eval_pointwise: 100%|██████████| 25/25 [00:00<00:00, 56.27it/s]


	 eval log_loss: 0.4601


train: 100%|██████████| 782/782 [00:44<00:00, 17.64it/s]


Epoch 4 elapsed: 44.328s
	 [32mtrain_loss: 0.4535[0m


eval_pointwise: 100%|██████████| 25/25 [00:00<00:00, 56.92it/s]


	 eval log_loss: 0.4506


train: 100%|██████████| 782/782 [00:43<00:00, 17.92it/s]


Epoch 5 elapsed: 43.653s
	 [32mtrain_loss: 0.4422[0m


eval_pointwise: 100%|██████████| 25/25 [00:00<00:00, 58.20it/s]


	 eval log_loss: 0.4432


train: 100%|██████████| 782/782 [00:44<00:00, 17.66it/s]


Epoch 6 elapsed: 44.300s
	 [32mtrain_loss: 0.432[0m


eval_pointwise: 100%|██████████| 25/25 [00:00<00:00, 56.94it/s]


	 eval log_loss: 0.4365


train: 100%|██████████| 782/782 [00:44<00:00, 17.53it/s]


Epoch 7 elapsed: 44.632s
	 [32mtrain_loss: 0.4232[0m


eval_pointwise: 100%|██████████| 25/25 [00:00<00:00, 57.69it/s]


	 eval log_loss: 0.4325


train: 100%|██████████| 782/782 [00:42<00:00, 18.46it/s]


Epoch 8 elapsed: 42.358s
	 [32mtrain_loss: 0.4172[0m


eval_pointwise: 100%|██████████| 25/25 [00:00<00:00, 59.73it/s]


	 eval log_loss: 0.4309


train: 100%|██████████| 782/782 [00:43<00:00, 18.02it/s]


Epoch 9 elapsed: 43.391s
	 [32mtrain_loss: 0.4117[0m


eval_pointwise: 100%|██████████| 25/25 [00:00<00:00, 61.15it/s]


	 eval log_loss: 0.4301


train: 100%|██████████| 782/782 [00:41<00:00, 18.69it/s]


Epoch 10 elapsed: 41.841s
	 [32mtrain_loss: 0.4075[0m


eval_pointwise: 100%|██████████| 25/25 [00:00<00:00, 60.00it/s]


	 eval log_loss: 0.4285


eval_pointwise: 100%|██████████| 25/25 [00:00<00:00, 58.18it/s]


{'loss': 0.4278360451722402}

In [8]:
# predict preference of user 5755 to item 110
youtube.predict(user=5755, item=110)

# recommend 10items for user 5755
a = youtube.recommend_user(user=5755, n_rec=10)

In [9]:
print("Evaluation results:")
for k, v in a.items():
    print(f"{k}: {v}")

Evaluation results:
5755: [ 593 2858  608   25 1259 1265  296 1358 2248 1307]
