In [13]:
import pandas as pd
from sqlalchemy import create_engine
import random
import warnings
import zipfile
from pathlib import Path
import pandas as pd
import tensorflow as tf
import tqdm
from dotenv import load_dotenv
import os

load_dotenv()
warnings.filterwarnings("ignore")

In [14]:
POSTGRES_ADDRESS = os.getenv("POSTGRES_ADDRESS")
POSTGRES_PORT = os.getenv("POSTGRES_PORT")
POSTGRES_USERNAME = os.getenv("POSTGRES_USERNAME")
POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD")
POSTGRES_DBNAME = os.getenv("POSTGRES_DBNAME")

postgres_str = ('postgresql://{username}:{password}@{ipaddress}:{port}/{dbname}'
                .format(username=POSTGRES_USERNAME,
                        password=POSTGRES_PASSWORD,
                        ipaddress=POSTGRES_ADDRESS,
                        port=POSTGRES_PORT,
                        dbname=POSTGRES_DBNAME))

cnx = create_engine(postgres_str)

data = pd.read_sql_query("SELECT * FROM get_training_data", cnx)
data.fillna("missing", inplace=True)
data.shape

(1934, 6)

In [15]:
data.iloc[random.choices(range(len(data)), k=10)]

Unnamed: 0,user,item,label,category_1,category_2,category_3
1905,72,73,0,Manga,Giả tưởng,missing
512,30,96,2,Trong nước,Tiểu thuyết,missing
1516,57,108,3,Giả tưởng,Manga,missing
879,41,87,3,Nước ngoài,Tiểu thuyết,missing
818,40,98,3,Trong nước,Tiểu thuyết,missing
1851,71,102,2,Manga,Giả tưởng,missing
1925,74,76,1,Giả tưởng,Manga,missing
1074,44,89,2,Kinh điển,Tiểu thuyết,Nước ngoài
1389,50,74,1,Manga,Giả tưởng,missing
299,24,96,0,Tiểu thuyết,Trong nước,missing


In [16]:
from libreco.data import random_split

train_data, eval_data, test_data = random_split(data, multi_ratios=[0.8, 0.1, 0.1], seed=42)

In [17]:
from libreco.data import DatasetFeat

sparse_col = ["category_1", "category_2", "category_3"]
item_col = ["category_1", "category_2", "category_3"]

train_data, data_info = DatasetFeat.build_trainset(train_data=train_data, user_col=[], item_col=item_col,
                                                   sparse_col=sparse_col, dense_col=[])
eval_data = DatasetFeat.build_evalset(eval_data)
test_data = DatasetFeat.build_testset(test_data)
data_info

n_users: 60, n_items: 52, data density: 49.5513 %

In [18]:
from libreco.algorithms import WideDeep
tf.compat.v1.reset_default_graph()

model = WideDeep(
    task="ranking",
    data_info=data_info,
    embed_size=16,
    n_epochs=10,
    loss_type="cross_entropy",
    lr={"wide": 0.05, "deep": 7e-4},
    batch_size=2048,
    use_bn=True,
    hidden_units=(128, 64, 32),
)

model.fit(
    train_data,
    neg_sampling=True,  # perform negative sampling on training and eval data
    verbose=2,
    shuffle=True,
    eval_data=eval_data,
    metrics=["loss", "roc_auc", "precision", "recall", "ndcg"],
)

Training start time: [35m2024-11-19 22:12:26[0m
total params: [33m23,718[0m | embedding params: [33m2,657[0m | network params: [33m21,061[0m


train: 100%|██████████| 2/2 [00:00<00:00,  4.78it/s]


Epoch 1 elapsed: 0.422s
	 [32mtrain_loss: 0.8288[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 14.92it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 124.57it/s]


	 eval log_loss: 0.6931
	 eval roc_auc: 0.5030
	 eval precision@10: 0.0115
	 eval recall@10: 0.0462
	 eval ndcg@10: 0.0395


train: 100%|██████████| 2/2 [00:00<00:00, 83.34it/s]


Epoch 2 elapsed: 0.028s
	 [32mtrain_loss: 0.7568[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 499.92it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 106.11it/s]


	 eval log_loss: 0.6935
	 eval roc_auc: 0.5051
	 eval precision@10: 0.0154
	 eval recall@10: 0.0615
	 eval ndcg@10: 0.0488


train: 100%|██████████| 2/2 [00:00<00:00, 83.36it/s]


Epoch 3 elapsed: 0.028s
	 [32mtrain_loss: 0.7126[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 501.17it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 124.96it/s]


	 eval log_loss: 0.6934
	 eval roc_auc: 0.5118
	 eval precision@10: 0.0192
	 eval recall@10: 0.0760
	 eval ndcg@10: 0.0629


train: 100%|██████████| 2/2 [00:00<00:00, 83.32it/s]


Epoch 4 elapsed: 0.028s
	 [32mtrain_loss: 0.6992[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 333.68it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 111.05it/s]


	 eval log_loss: 0.6929
	 eval roc_auc: 0.5200
	 eval precision@10: 0.0212
	 eval recall@10: 0.0856
	 eval ndcg@10: 0.0686


train: 100%|██████████| 2/2 [00:00<00:00, 86.95it/s]


Epoch 5 elapsed: 0.029s
	 [32mtrain_loss: 0.6885[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 499.14it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 125.05it/s]


	 eval log_loss: 0.6922
	 eval roc_auc: 0.5307
	 eval precision@10: 0.0231
	 eval recall@10: 0.0894
	 eval ndcg@10: 0.0731


train: 100%|██████████| 2/2 [00:00<00:00, 83.34it/s]


Epoch 6 elapsed: 0.029s
	 [32mtrain_loss: 0.6703[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 500.16it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 111.15it/s]


	 eval log_loss: 0.6915
	 eval roc_auc: 0.5414
	 eval precision@10: 0.0250
	 eval recall@10: 0.0958
	 eval ndcg@10: 0.0801


train: 100%|██████████| 2/2 [00:00<00:00, 84.74it/s]


Epoch 7 elapsed: 0.028s
	 [32mtrain_loss: 0.6499[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 501.71it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 125.09it/s]


	 eval log_loss: 0.6909
	 eval roc_auc: 0.5561
	 eval precision@10: 0.0231
	 eval recall@10: 0.0862
	 eval ndcg@10: 0.0772


train: 100%|██████████| 2/2 [00:00<00:00, 86.07it/s]


Epoch 8 elapsed: 0.027s
	 [32mtrain_loss: 0.6435[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 500.16it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 125.02it/s]


	 eval log_loss: 0.6904
	 eval roc_auc: 0.5669
	 eval precision@10: 0.0231
	 eval recall@10: 0.0702
	 eval ndcg@10: 0.0723


train: 100%|██████████| 2/2 [00:00<00:00, 84.46it/s]


Epoch 9 elapsed: 0.027s
	 [32mtrain_loss: 0.6407[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 333.73it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 125.01it/s]


	 eval log_loss: 0.6901
	 eval roc_auc: 0.5707
	 eval precision@10: 0.0231
	 eval recall@10: 0.0760
	 eval ndcg@10: 0.0873


train: 100%|██████████| 2/2 [00:00<00:00, 86.94it/s]


Epoch 10 elapsed: 0.028s
	 [32mtrain_loss: 0.636[0m


eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 724.91it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 124.98it/s]


	 eval log_loss: 0.6900
	 eval roc_auc: 0.5691
	 eval precision@10: 0.0231
	 eval recall@10: 0.0760
	 eval ndcg@10: 0.0875


In [19]:
from libreco.evaluation import evaluate

evaluate(
    model=model,
    data=test_data,
    neg_sampling=True,  # perform negative sampling on test data
    metrics=["loss", "roc_auc", "precision", "recall", "ndcg"],
)

eval_pointwise: 100%|██████████| 1/1 [00:00<00:00, 500.16it/s]
eval_listwise: 100%|██████████| 1/1 [00:00<00:00, 124.99it/s]


{'loss': 0.6884697562607667,
 'roc_auc': 0.5881071314698693,
 'precision': 0.023076923076923075,
 'recall': 0.07825091575091575,
 'ndcg': 0.10545334832226275}

In [20]:
data_info.save("models", model_name="wide_deep")
model.save("models", model_name="wide_deep")