In [1]:
from pathlib import Path
import torch
import json
import pandas as pd
import tqdm
import math

from aau2v.config import ModelConfig, TrainerConfig
from aau2v.dataset_center import load_dataset_center
from aau2v.trainer import PyTorchTrainer
from aau2v.model import load_model, PyTorchModel

In [2]:
def calc_accuracy(
    model: PyTorchModel,
    num_item: int,
    test_dataset: list[tuple[int, list[int], list[int]]],
    top_k: list[int],
) -> dict[str, float]:
    model.eval()
    hit_counts = {k: 0 for k in top_k}
    seq_index = torch.LongTensor(list(map(lambda d: d[0], test_dataset)))
    context_items = torch.LongTensor(list(map(lambda d: d[1], test_dataset)))
    rec_list = model.output_rec_lists(
        seq_index=seq_index,
        item_indices=context_items,
        cand_item_indices=torch.arange(num_item),
        k=max(top_k),
    )
    for i in range(len(test_dataset)):
        for k in top_k:
            hit_counts[k] += len(set(test_dataset[i][2]) & set(rec_list[i][:k]))

    total_rec = len(test_dataset)
    results = {
        f"Accuracy@{k}": hit_count / total_rec / k
        for k, hit_count in hit_counts.items()
    }
    return results

In [3]:
run_configs = [
    {
        "name": "User2Vec (d=16)",
        "model_name": "user2vec",
        "d_model": 16,
        "epochs": 10,
        "use_weight_tying": True,
        "use_attention": True,
        "use_meta": True,
    },
    {
        "name": "User2Vec (d=32)",
        "model_name": "user2vec",
        "d_model": 32,
        "epochs": 10,
        "use_weight_tying": True,
        "use_attention": True,
        "use_meta": True,
    },
    {
        "name": "User2Vec (d=64)",
        "model_name": "user2vec",
        "d_model": 64,
        "epochs": 10,
        "use_weight_tying": True,
        "use_attention": True,
        "use_meta": True,
    },
    {
        "name": "AAU2V (d=16)",
        "model_name": "aau2v",
        "d_model": 16,
        "epochs": 10,
        "use_weight_tying": True,
        "use_attention": True,
        "use_meta": True,
    },
    {
        "name": "AAU2V (d=32)",
        "model_name": "aau2v",
        "d_model": 32,
        "epochs": 10,
        "use_weight_tying": True,
        "use_attention": True,
        "use_meta": True,
    },
    {
        "name": "AAU2V (d=64)",
        "model_name": "aau2v",
        "d_model": 64,
        "epochs": 10,
        "use_weight_tying": True,
        "use_attention": True,
        "use_meta": True,
    },
    {
        "name": "AAU2V (wo weight-tying)",
        "model_name": "aau2v",
        "d_model": 64,
        "epochs": 10,
        "use_weight_tying": False,
        "use_attention": True,
        "use_meta": True,
    },
    {
        "name": "AAU2V (wo attention)",
        "model_name": "aau2v",
        "d_model": 64,
        "epochs": 10,
        "use_weight_tying": True,
        "use_attention": False,
        "use_meta": True,
    },
    {
        "name": "AAU2V (wo meta)",
        "model_name": "aau2v",
        "d_model": 64,
        "epochs": 10,
        "use_weight_tying": True,
        "use_attention": True,
        "use_meta": False,
    },
]

In [None]:
results = {}

for run_config in run_configs:
    results[run_config["name"]] = {"accuracy": []}
    model_config = ModelConfig(
        weight_decay=0,
        max_embedding_norm=math.sqrt(run_config["d_model"]),
        d_model=run_config["d_model"],
        lr=5e-5,
        use_attention=run_config["use_attention"],
        use_meta=run_config["use_meta"],
        use_weight_tying=run_config["use_weight_tying"],
    )
    trainer_config = TrainerConfig(
        dataset_name="movielens",
        model_name=run_config["model_name"],
        load_dataset=False,
        save_dataset=False,
        ignore_saved_model=True,
        epochs=run_config["epochs"],
    )

    print(model_config)
    print(trainer_config)

    dataset_center = load_dataset_center(
        dataset_name=trainer_config.dataset_name,
        dataset_dir=trainer_config.dataset_dir,
        data_dir="../data/",
        load_dataset=trainer_config.load_dataset,
        save_dataset=trainer_config.save_dataset,
        window_size=model_config.window_size,
    )
    print(
        "train:",
        len(dataset_center.train_dataset),
        "valid:",
        len(dataset_center.valid_dataset),
    )
    model = load_model(
        dataset_center=dataset_center,
        trainer_config=trainer_config,
        model_config=model_config,
    )
    trainer = PyTorchTrainer(
        model=model,
        dataset_center=dataset_center,
        trainer_config=trainer_config,
        model_config=model_config,
    )

    def on_epoch_end(epoch: int) -> None:
        result = calc_accuracy(
            model=model,
            num_item=dataset_center.num_item,
            test_dataset=dataset_center.test_datasets["test"],
            top_k=[10, 20, 30, 40, 50],
        )
        print(f"epoch: {epoch}, result: {result}")
        results[run_config["name"]]["accuracy"].append(result)
        torch.save(model, f"cache/model/movielens/{run_config['name']}-{epoch}.pt")

    losses = trainer.fit(on_epoch_end=on_epoch_end)
    results[run_config["name"]]["loss"] = losses

    with open("data/result.json", "w") as f:
        json.dump(results, f)

In [16]:
with open("./result-paper.json", "r") as f:
    results = json.load(f)

In [17]:
top_k = list(range(10, 51, 20))
data = {}
for method, result in results.items():
    data[method] = []
    for k in top_k:
        a = max(map(lambda r: r[f"Accuracy@{k}"], result["accuracy"]))
        data[method].append(a)

In [18]:
pd.options.display.precision = 4
df = pd.DataFrame(data).T
df.columns = [f"Accuracy@{k}" for k in top_k]
df

Unnamed: 0,Accuracy@10,Accuracy@30,Accuracy@50
User2Vec (d=16),0.1863,0.1803,0.1753
User2Vec (d=32),0.2098,0.1986,0.1874
User2Vec (d=64),0.2113,0.2027,0.1918
AU2V (d=16),0.2157,0.2062,0.1951
AU2V (d=32),0.2268,0.2094,0.1958
AU2V (d=64),0.229,0.2127,0.1984
AU2V (wo weight-tying),0.2268,0.2105,0.1973
AU2V (wo attention),0.2291,0.2115,0.1973
AU2V (wo meta),0.228,0.2096,0.1952
