In [4]:
import torch
import json
import csv
from Dataset import Dataset
from Evaluator import Evaluator
from CDAE import CDAE
from utils import train_model
from DataUtils import pre_preprocess_data

# Define hyperparameters, all of this can be fine tune to enhance the accuracy
data_path = "books/train_data.csv"
train_ratio = 0.7
hidden_dim = 50
corruption_ratio = 0.3
num_epochs = 100
batch_size = 512
testing_batch_size = 512
learning_rate = 0.001
early_stop = True
patience = 50
top_k = [5]
activation = "sigmoid"
device = torch.device("cpu")

columns = {"user_id": "user", "book_id": "item", "rating": "rating"}

save_path = "books/user_item_rating.csv"
pre_preprocess_data(
    data_path=data_path,
    save_path=save_path,
    sep=",",
    columns=columns,
    min_ratings_threshold=60,
    min_ratings_count_threshold=50,
)

In [5]:
dataset = Dataset(
    data_path=save_path, save_path="training.json", sep=",", device=device
)

eval_pos, eval_target = dataset.eval_data()
item_popularity = dataset.item_popularity

evaluator = Evaluator(eval_pos, eval_target, item_popularity, top_k)
recommender_model = CDAE(
    num_users=dataset.num_users, num_items=dataset.num_items, hidden_dim=hidden_dim,
    corruption_ratio=corruption_ratio, activation=activation
)

Preprocess start...
Initial users: 33285, items: 1801
Assign new user id from 0..n
Assign new item id from 0..n
Split data into training set and test set
# zero train, test: 0, 0
Preprocess finished.


In [6]:
cade_model, loss_recorder, metric_recorder = train_model(
    recommender_model,
    dataset,
    evaluator,
    batch_size=batch_size,
    test_batch_size=testing_batch_size,
    learning_rate=learning_rate,
    epochs=num_epochs,
    early_stop=early_stop,
)

Training epoch 1/100, 2.96, 1.41 loss = 16.16, Prec@5=0.0145 Recall@5=0.0145 NDCG@5=0.0163 Nov@5=4.5601 Gini-D=0.1056
Training epoch 2/100, 2.47, 1.10 loss = 15.08, Prec@5=0.0224 Recall@5=0.0224 NDCG@5=0.0244 Nov@5=3.8845 Gini-D=0.1170
Training epoch 3/100, 2.46, 1.05 loss = 14.52, Prec@5=0.0842 Recall@5=0.0842 NDCG@5=0.0879 Nov@5=1.3145 Gini-D=0.2656
Training epoch 4/100, 2.19, 1.10 loss = 14.26, Prec@5=0.0916 Recall@5=0.0916 NDCG@5=0.0945 Nov@5=1.1754 Gini-D=0.7624
Training epoch 5/100, 1.86, 0.93 loss = 14.17, Prec@5=0.0917 Recall@5=0.0917 NDCG@5=0.0949 Nov@5=1.1741 Gini-D=0.8581
Training epoch 6/100, 1.80, 0.94 loss = 14.14, Prec@5=0.0917 Recall@5=0.0917 NDCG@5=0.0949 Nov@5=1.1738 Gini-D=0.6698
Training epoch 7/100, 1.78, 0.95 loss = 14.13, Prec@5=0.0913 Recall@5=0.0913 NDCG@5=0.0946 Nov@5=1.1738 Gini-D=0.8573
Training epoch 8/100, 1.78, 0.94 loss = 14.13, Prec@5=0.0912 Recall@5=0.0912 NDCG@5=0.0946 Nov@5=1.1785 Gini-D=1.0000
Training epoch 9/100, 1.78, 0.96 loss = 14.13, Prec@5=0.

In [3]:
with open("model_loss", 'w', newline='') as f:
    wr = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
    wr.writerow(["epoch", "loss"])  # Write header row
    for epoch, loss in enumerate(loss_recorder):
        wr.writerow([epoch, loss])

header = ["epoch", "Prec@5", "Recall@5", "NDCG@5", "Nov@5", "Gini-D"]

with open("model_metrics.csv", 'w', newline='') as f:
    writer = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
    writer.writerow(header)  # Write header row
    for epoch, metrics in enumerate(metric_recorder):
        row = [epoch] + [metrics.get(metric, "") for metric in header[1:]]
        writer.writerow(row)

In [4]:
torch.save(cade_model.state_dict(), "cdae_recommender.pth")

In [8]:
model = CDAE(
    num_users=dataset.num_users, num_items=dataset.num_items, hidden_dim=hidden_dim,
    corruption_ratio=corruption_ratio, activation=activation
)
model.load_state_dict(torch.load('cdae_recommender.pth'))
model.train()
cade_model_2, loss_recorder, metric_recorder = train_model(
    model,
    dataset,
    evaluator,
    batch_size=batch_size,
    test_batch_size=testing_batch_size,
    learning_rate=learning_rate,
    epochs=20,
    early_stop=early_stop,
)

Training epoch 1/20, 2.41, 1.25 loss = 15709952.00, Prec@5=0.1245 Recall@5=0.1245 NDCG@5=0.1261 Nov@5=1.8242 Gini-D=0.1706
Training epoch 2/20, 2.11, 1.07 loss = 15701023.00, Prec@5=0.1249 Recall@5=0.1249 NDCG@5=0.1269 Nov@5=1.8179 Gini-D=0.1657
Training epoch 3/20, 2.03, 0.99 loss = 15692958.00, Prec@5=0.1261 Recall@5=0.1261 NDCG@5=0.1278 Nov@5=1.8315 Gini-D=0.1663
Training epoch 4/20, 2.07, 1.03 loss = 15685243.00, Prec@5=0.1261 Recall@5=0.1261 NDCG@5=0.1277 Nov@5=1.8481 Gini-D=0.1670
Training epoch 5/20, 2.05, 1.01 loss = 15677892.00, Prec@5=0.1264 Recall@5=0.1264 NDCG@5=0.1281 Nov@5=1.8672 Gini-D=0.1736
Training epoch 6/20, 2.07, 1.02 loss = 15670680.00, Prec@5=0.1264 Recall@5=0.1264 NDCG@5=0.1282 Nov@5=1.8858 Gini-D=0.1756
Training epoch 7/20, 2.24, 1.20 loss = 15663698.00, Prec@5=0.1270 Recall@5=0.1270 NDCG@5=0.1287 Nov@5=1.9081 Gini-D=0.1787
Training epoch 8/20, 2.29, 1.02 loss = 15657085.00, Prec@5=0.1274 Recall@5=0.1274 NDCG@5=0.1288 Nov@5=1.9185 Gini-D=0.1779
Training epoch 9

In [7]:
torch.save(cade_model_2.state_dict(), "cdae_recommender.pth")