In [250]:
import pandas as pd
from sqlalchemy import create_engine
import random
import warnings
import zipfile
from pathlib import Path
import pandas as pd
import tensorflow as tf
import tqdm
from dotenv import load_dotenv
import os

load_dotenv()
warnings.filterwarnings("ignore")

In [251]:
POSTGRES_ADDRESS = os.getenv("POSTGRES_ADDRESS")
POSTGRES_PORT = os.getenv("POSTGRES_PORT")
POSTGRES_USERNAME = os.getenv("POSTGRES_USERNAME")
POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD")
POSTGRES_DBNAME = os.getenv("POSTGRES_DBNAME")

postgres_str = ('postgresql://{username}:{password}@{ipaddress}:{port}/{dbname}'
                .format(username=POSTGRES_USERNAME,
                        password=POSTGRES_PASSWORD,
                        ipaddress=POSTGRES_ADDRESS,
                        port=POSTGRES_PORT,
                        dbname=POSTGRES_DBNAME))

cnx = create_engine(postgres_str)

data = pd.read_sql_query("SELECT * FROM get_training_data", cnx)
data.fillna("missing", inplace=True)
data.shape

(1934, 6)

In [252]:
data.iloc[random.choices(range(len(data)), k=10)]

Unnamed: 0,user,item,label,category_1,category_2,category_3
1905,72,73,0,Manga,Giả tưởng,missing
512,30,96,2,Trong nước,Tiểu thuyết,missing
1516,57,108,3,Giả tưởng,Manga,missing
879,41,87,3,Nước ngoài,Tiểu thuyết,missing
818,40,98,3,Trong nước,Tiểu thuyết,missing
1851,71,102,2,Manga,Giả tưởng,missing
1925,74,76,1,Giả tưởng,Manga,missing
1074,44,89,2,Kinh điển,Tiểu thuyết,Nước ngoài
1389,50,74,1,Manga,Giả tưởng,missing
299,24,96,0,Tiểu thuyết,Trong nước,missing


In [253]:
from libreco.data import random_split

train_data, eval_data, test_data = random_split(data, multi_ratios=[0.8, 0.1, 0.1], seed=42)

In [254]:
from libreco.data import DatasetFeat

sparse_col = ["category_1", "category_2", "category_3"]
item_col = ["category_1", "category_2", "category_3"]

train_data, data_info = DatasetFeat.build_trainset(train_data=train_data, user_col=[], item_col=item_col,
                                                   sparse_col=sparse_col, dense_col=[])
eval_data = DatasetFeat.build_evalset(eval_data)
test_data = DatasetFeat.build_testset(test_data)
data_info

n_users: 60, n_items: 52, data density: 49.5513 %

In [255]:
from libreco.algorithms import TwoTower
tf.compat.v1.reset_default_graph()

model = TwoTower(
    task="ranking",
    data_info=data_info,
    embed_size=16,
    n_epochs=30,
    loss_type="cross_entropy",
    lr=0.001,
    batch_size=2048,
    use_bn=True,
    hidden_units=(128, 64, 32),
)

model.fit(
    train_data,
    neg_sampling=True,  # perform negative sampling on training and eval data
    verbose=2,
    shuffle=True,
    eval_data=eval_data,
    metrics=["loss", "roc_auc", "precision", "recall", "ndcg"],
)

model.init_knn(approximate=False, sim_type="cosine")

Training start time: [35m2024-11-16 22:29:23[0m
total params: [33m34,368[0m | embedding params: [33m2,720[0m | network params: [33m31,648[0m


train: 100%|██████████| 2/2 [00:00<00:00,  2.97it/s]


Epoch 1 elapsed: 0.677s
	 [32mtrain_loss: 2.5348[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<?, ?it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 333.38it/s]


	 eval log_loss: 0.6945
	 eval roc_auc: 0.4565
	 eval precision@10: 0.0096
	 eval recall@10: 0.0272
	 eval ndcg@10: 0.0319


train: 100%|██████████| 2/2 [00:00<00:00, 54.06it/s]


Epoch 2 elapsed: 0.040s
	 [32mtrain_loss: 1.7641[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 994.38it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 250.27it/s]


	 eval log_loss: 0.6936
	 eval roc_auc: 0.4859
	 eval precision@10: 0.0115
	 eval recall@10: 0.0304
	 eval ndcg@10: 0.0375


train: 100%|██████████| 2/2 [00:00<00:00, 57.15it/s]

Epoch 3 elapsed: 0.039s
	 [32mtrain_loss: 1.413[0m



eval_pointwise: 100%|██████████| 1/1 [00:00<?, ?it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 249.84it/s]


	 eval log_loss: 0.6927
	 eval roc_auc: 0.5264
	 eval precision@10: 0.0115
	 eval recall@10: 0.0304
	 eval ndcg@10: 0.0384


train: 100%|██████████| 2/2 [00:00<00:00, 54.06it/s]


Epoch 4 elapsed: 0.040s
	 [32mtrain_loss: 1.2123[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 999.83it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 333.68it/s]


	 eval log_loss: 0.6920
	 eval roc_auc: 0.5631
	 eval precision@10: 0.0135
	 eval recall@10: 0.0343
	 eval ndcg@10: 0.0423


train: 100%|██████████| 2/2 [00:00<00:00, 55.80it/s]


Epoch 5 elapsed: 0.041s
	 [32mtrain_loss: 1.0885[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 995.56it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 333.54it/s]


	 eval log_loss: 0.6914
	 eval roc_auc: 0.5872
	 eval precision@10: 0.0154
	 eval recall@10: 0.0439
	 eval ndcg@10: 0.0450


train: 100%|██████████| 2/2 [00:00<00:00, 55.55it/s]


Epoch 6 elapsed: 0.041s
	 [32mtrain_loss: 0.9494[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 999.36it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 332.75it/s]


	 eval log_loss: 0.6909
	 eval roc_auc: 0.6038
	 eval precision@10: 0.0154
	 eval recall@10: 0.0462
	 eval ndcg@10: 0.0491


train: 100%|██████████| 2/2 [00:00<00:00, 58.82it/s]


Epoch 7 elapsed: 0.039s
	 [32mtrain_loss: 0.8917[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<?, ?it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 332.83it/s]


	 eval log_loss: 0.6905
	 eval roc_auc: 0.6138
	 eval precision@10: 0.0173
	 eval recall@10: 0.0654
	 eval ndcg@10: 0.0574


train: 100%|██████████| 2/2 [00:00<00:00, 54.63it/s]


Epoch 8 elapsed: 0.042s
	 [32mtrain_loss: 0.8246[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 1001.74it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 333.46it/s]


	 eval log_loss: 0.6903
	 eval roc_auc: 0.6153
	 eval precision@10: 0.0154
	 eval recall@10: 0.0622
	 eval ndcg@10: 0.0566


train: 100%|██████████| 2/2 [00:00<00:00, 57.39it/s]


Epoch 9 elapsed: 0.039s
	 [32mtrain_loss: 0.7942[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 1003.18it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 333.62it/s]


	 eval log_loss: 0.6900
	 eval roc_auc: 0.6181
	 eval precision@10: 0.0231
	 eval recall@10: 0.0885
	 eval ndcg@10: 0.0737


train: 100%|██████████| 2/2 [00:00<00:00, 57.14it/s]

Epoch 10 elapsed: 0.040s
	 [32mtrain_loss: 0.7666[0m



eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 998.64it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 500.33it/s]


	 eval log_loss: 0.6897
	 eval roc_auc: 0.6199
	 eval precision@10: 0.0269
	 eval recall@10: 0.1109
	 eval ndcg@10: 0.0923


train: 100%|██████████| 2/2 [00:00<00:00, 57.14it/s]


Epoch 11 elapsed: 0.039s
	 [32mtrain_loss: 0.704[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<?, ?it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 333.23it/s]


	 eval log_loss: 0.6895
	 eval roc_auc: 0.6214
	 eval precision@10: 0.0250
	 eval recall@10: 0.1077
	 eval ndcg@10: 0.0911


train: 100%|██████████| 2/2 [00:00<00:00, 58.81it/s]


Epoch 12 elapsed: 0.038s
	 [32mtrain_loss: 0.7187[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<?, ?it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 375.83it/s]


	 eval log_loss: 0.6891
	 eval roc_auc: 0.6235
	 eval precision@10: 0.0250
	 eval recall@10: 0.1077
	 eval ndcg@10: 0.0953


train: 100%|██████████| 2/2 [00:00<00:00, 57.14it/s]


Epoch 13 elapsed: 0.039s
	 [32mtrain_loss: 0.6872[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<?, ?it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 250.02it/s]


	 eval log_loss: 0.6888
	 eval roc_auc: 0.6257
	 eval precision@10: 0.0269
	 eval recall@10: 0.1173
	 eval ndcg@10: 0.1046


train: 100%|██████████| 2/2 [00:00<00:00, 54.05it/s]

Epoch 14 elapsed: 0.042s
	 [32mtrain_loss: 0.6251[0m



eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 993.44it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 334.47it/s]


	 eval log_loss: 0.6885
	 eval roc_auc: 0.6250
	 eval precision@10: 0.0250
	 eval recall@10: 0.1077
	 eval ndcg@10: 0.1022


train: 100%|██████████| 2/2 [00:00<00:00, 52.63it/s]


Epoch 15 elapsed: 0.042s
	 [32mtrain_loss: 0.6418[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 1001.74it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 248.76it/s]


	 eval log_loss: 0.6882
	 eval roc_auc: 0.6260
	 eval precision@10: 0.0288
	 eval recall@10: 0.1269
	 eval ndcg@10: 0.1146


train: 100%|██████████| 2/2 [00:00<00:00, 57.97it/s]


Epoch 16 elapsed: 0.039s
	 [32mtrain_loss: 0.6307[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 1000.55it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 199.91it/s]


	 eval log_loss: 0.6880
	 eval roc_auc: 0.6250
	 eval precision@10: 0.0288
	 eval recall@10: 0.1269
	 eval ndcg@10: 0.1128


train: 100%|██████████| 2/2 [00:00<00:00, 52.63it/s]


Epoch 17 elapsed: 0.043s
	 [32mtrain_loss: 0.6162[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 996.27it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 249.93it/s]


	 eval log_loss: 0.6877
	 eval roc_auc: 0.6255
	 eval precision@10: 0.0288
	 eval recall@10: 0.1269
	 eval ndcg@10: 0.1133


train: 100%|██████████| 2/2 [00:00<00:00, 43.01it/s]


Epoch 18 elapsed: 0.055s
	 [32mtrain_loss: 0.5895[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<?, ?it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 249.96it/s]


	 eval log_loss: 0.6875
	 eval roc_auc: 0.6259
	 eval precision@10: 0.0308
	 eval recall@10: 0.1301
	 eval ndcg@10: 0.1272


train: 100%|██████████| 2/2 [00:00<00:00, 54.38it/s]


Epoch 19 elapsed: 0.040s
	 [32mtrain_loss: 0.5889[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<?, ?it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 333.28it/s]


	 eval log_loss: 0.6872
	 eval roc_auc: 0.6250
	 eval precision@10: 0.0308
	 eval recall@10: 0.1301
	 eval ndcg@10: 0.1286


train: 100%|██████████| 2/2 [00:00<00:00, 55.55it/s]


Epoch 20 elapsed: 0.040s
	 [32mtrain_loss: 0.621[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<?, ?it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 333.86it/s]


	 eval log_loss: 0.6870
	 eval roc_auc: 0.6248
	 eval precision@10: 0.0308
	 eval recall@10: 0.1301
	 eval ndcg@10: 0.1245


train: 100%|██████████| 2/2 [00:00<00:00, 56.11it/s]


Epoch 21 elapsed: 0.041s
	 [32mtrain_loss: 0.5948[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 1000.55it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 249.81it/s]


	 eval log_loss: 0.6868
	 eval roc_auc: 0.6228
	 eval precision@10: 0.0308
	 eval recall@10: 0.1301
	 eval ndcg@10: 0.1219


train: 100%|██████████| 2/2 [00:00<00:00, 54.52it/s]


Epoch 22 elapsed: 0.041s
	 [32mtrain_loss: 0.5862[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 995.09it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 332.64it/s]


	 eval log_loss: 0.6865
	 eval roc_auc: 0.6219
	 eval precision@10: 0.0308
	 eval recall@10: 0.1301
	 eval ndcg@10: 0.1173


train: 100%|██████████| 2/2 [00:00<00:00, 55.56it/s]


Epoch 23 elapsed: 0.042s
	 [32mtrain_loss: 0.563[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 998.17it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 332.64it/s]


	 eval log_loss: 0.6860
	 eval roc_auc: 0.6236
	 eval precision@10: 0.0308
	 eval recall@10: 0.1301
	 eval ndcg@10: 0.1174


train: 100%|██████████| 2/2 [00:00<00:00, 58.84it/s]


Epoch 24 elapsed: 0.038s
	 [32mtrain_loss: 0.5543[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 999.60it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 332.93it/s]


	 eval log_loss: 0.6855
	 eval roc_auc: 0.6254
	 eval precision@10: 0.0288
	 eval recall@10: 0.1205
	 eval ndcg@10: 0.1127


train: 100%|██████████| 2/2 [00:00<00:00, 56.43it/s]


Epoch 25 elapsed: 0.040s
	 [32mtrain_loss: 0.5581[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<?, ?it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 333.46it/s]


	 eval log_loss: 0.6851
	 eval roc_auc: 0.6258
	 eval precision@10: 0.0269
	 eval recall@10: 0.1141
	 eval ndcg@10: 0.1120


train: 100%|██████████| 2/2 [00:00<00:00, 55.55it/s]


Epoch 26 elapsed: 0.040s
	 [32mtrain_loss: 0.5667[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 497.43it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 250.48it/s]


	 eval log_loss: 0.6849
	 eval roc_auc: 0.6233
	 eval precision@10: 0.0288
	 eval recall@10: 0.1173
	 eval ndcg@10: 0.1145


train: 100%|██████████| 2/2 [00:00<00:00, 56.01it/s]


Epoch 27 elapsed: 0.040s
	 [32mtrain_loss: 0.5587[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 997.46it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 250.02it/s]


	 eval log_loss: 0.6847
	 eval roc_auc: 0.6226
	 eval precision@10: 0.0288
	 eval recall@10: 0.1173
	 eval ndcg@10: 0.1139


train: 100%|██████████| 2/2 [00:00<00:00, 57.15it/s]


Epoch 28 elapsed: 0.040s
	 [32mtrain_loss: 0.5439[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<?, ?it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 250.06it/s]


	 eval log_loss: 0.6845
	 eval roc_auc: 0.6213
	 eval precision@10: 0.0269
	 eval recall@10: 0.1141
	 eval ndcg@10: 0.1132


train: 100%|██████████| 2/2 [00:00<00:00, 52.44it/s]

Epoch 29 elapsed: 0.043s
	 [32mtrain_loss: 0.5483[0m



eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 911.21it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 249.99it/s]


	 eval log_loss: 0.6841
	 eval roc_auc: 0.6210
	 eval precision@10: 0.0269
	 eval recall@10: 0.1141
	 eval ndcg@10: 0.1095


train: 100%|██████████| 2/2 [00:00<00:00, 57.14it/s]


Epoch 30 elapsed: 0.041s
	 [32mtrain_loss: 0.5425[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 997.22it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 250.63it/s]


	 eval log_loss: 0.6838
	 eval roc_auc: 0.6195
	 eval precision@10: 0.0269
	 eval recall@10: 0.1141
	 eval ndcg@10: 0.1102


In [256]:
from libreco.evaluation import evaluate

evaluate(
    model=model,
    data=test_data,
    neg_sampling=True,  # perform negative sampling on test data
    metrics=["loss", "roc_auc", "precision", "recall", "ndcg"],
)

eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 731.48it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 333.20it/s]


{'loss': 0.6868372136953229,
 'roc_auc': 0.5805080242321181,
 'precision': 0.009615384615384616,
 'recall': 0.04711538461538462,
 'ndcg': 0.0375278449640126}

In [257]:
data_info.save("models", model_name="two_tower")
model.save("models", model_name="two_tower")