In [1]:
CONFIG_FILE = 'config/config_synthetic.json'
LOG_FILE_NAME = 'main.log'
N_REPEAT_EXP = 10

In [2]:
from recommenders.models.ncf.ncf_singlenode import NCF
from recommenders.models.ncf.dataset import Dataset as NCFDataset
import logging
import torch
from torch_geometric.data import HeteroData
import pandas as pd
import os

from diff_fnn.utils import logging_decorator, Config
from diff_fnn.metrics import evaluation_at_k
from diff_fnn.utils import load_config
from diff_fnn.data.data import load_preprocessed_data
from diff_fnn.tables_and_visualisations import generate_tables_and_visualisations

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# init logging
logging.basicConfig(
level=logging.INFO, 
format='[%(asctime)s] [%(levelname)s] %(message)s',
handlers=[
    logging.StreamHandler(),
    logging.FileHandler(LOG_FILE_NAME)
]
)
config = load_config(CONFIG_FILE)
train_data, val_data, train_plus_val_data, test_data = load_preprocessed_data(config)

INFO:root:Load config file...
INFO:root:config=Config(
  data=DatasetConfig(
    name='synthetic',
    good_rating_threshold=0.5,
    rating_edge_name=('user', 'rates', 'movie')
  ),
  model=ModelConfig(
    with_neural_network=False,
    num_of_learned_atoms=0,
    num_of_horn_clauses=4
  ),
  training=TrainingConfig(
    learning_rates={'horn_layers': 0.05, 'neural_net': 1e-05},
    num_of_epochs=300,
    batch_size=-1,
    loss_fn=MSELoss(),
    l1_lambda=0.2
  ),
  evaluation=EvaluationConfig(
    val_size=0.1,
    test_size=0.2,
    lightgcn_lr=0.05,
    lightgcn_epochs=50
  )
  results_path='results/synthetic/final'
  use_final_testset='True'
)
INFO:root:Load config file done.
INFO:root:Load preprocessed data...
INFO:root:Preprocess data...
INFO:root:Generate synthetic data...
INFO:root:Generate synthetic data done.
INFO:root:synthetic_graph=HeteroData(
  movie={
    x=[3883, 5],
    x_names=[5],
  },
  user={
    x=[6040, 4],
    x_names=[4],
  },
  (user, rates, movie)={
    ed

In [None]:
# see also https://github.com/recommenders-team/recommenders/blob/main/examples/00_quick_start/ncf_movielens.ipynb
# hyperparameter settings are taken from the original NCF paper
@logging_decorator("Test NCF baseline")
def test_baseline_ncf(config: Config, n_repeat_exp, train_data: HeteroData, evaluation_data: HeteroData):
    if config.use_final_testset:
        data = NCFDataset(
            train_file=f'data/{config.data.name}/processed/train_plus_val_ratings.csv'
        )
    else:
        data = NCFDataset(
            train_file=f'data/{config.data.name}/processed/train_ratings.csv'
        )

    item_name = config.data.rating_edge_name[-1]

    num_of_users = train_data['user'].num_nodes
    num_of_items = train_data[item_name].num_nodes

    # NOTE: disable user and item id mapping, so that the model also work with the test users and items, which are not in the training data
    data.user2id = {i: i for i in range(num_of_users)}
    data.item2id = {i: i for i in range(num_of_items)}
    data.id2user = {i: i for i in range(num_of_users)}
    data.id2item = {i: i for i in range(num_of_items)}

    results_at_k = dict()
    for i in range(n_repeat_exp):
        # Model parameters
        EPOCHS = 10
        BATCH_SIZE = 256

        model = NCF(
            n_users=num_of_users, 
            n_items=num_of_items,
            model_type="NeuMF",
            n_factors=8,
            layer_sizes=[32,16,8],
            n_epochs=EPOCHS,
            batch_size=BATCH_SIZE,
            learning_rate=0.001,
            verbose=1
        )

        model.fit(data)

        eval_user_indices_list = evaluation_data[config.data.rating_edge_name].edge_label_index[0].tolist()
        eval_item_indices_list = evaluation_data[config.data.rating_edge_name].edge_label_index[1].tolist()
        test_pred = model.predict(eval_user_indices_list, eval_item_indices_list, is_list=True)
        test_pred = torch.tensor(test_pred)

        # compute scores for all user-item pairs to get top-k results
        results_at_k_this_run = evaluation_at_k(config, test_pred, evaluation_data[config.data.rating_edge_name].edge_label, evaluation_data['user'].num_nodes, evaluation_data[config.data.rating_edge_name].edge_label_index)
        logging.info(f"{results_at_k_this_run=}")
        if i == 0:
            for key in results_at_k_this_run:
                results_at_k[key] = [results_at_k_this_run[key]]
        else:
            for key in results_at_k_this_run:
                results_at_k[key].append(results_at_k_this_run[key])

    return {
        key: results_at_k[key]
        for key in results_at_k
    }

In [None]:
if config.use_final_testset:
    ncf_dict = {
        'NCF': test_baseline_ncf(config, N_REPEAT_EXP, train_plus_val_data, test_data)
    }
else:
    ncf_dict = {
        'NCF': test_baseline_ncf(config, N_REPEAT_EXP, train_data, val_data)
    }

ncf_results_df = pd.DataFrame({
    _approach_: {_metric_: _values_ for _metric_, _values_ in _results_.items()}
    for _approach_, _results_ in ncf_dict.items()
}).transpose()

# add ncf to results csv
results_df = pd.read_csv(os.path.join(config.results_path, "results.csv"), index_col=0)
results_df = pd.concat([results_df, ncf_results_df], axis=0)
results_df.to_csv(os.path.join(config.results_path, "results.csv"))

INFO:root:Test NeuMF baseline...
INFO:recommenders.models.ncf.dataset:Indexing data/synthetic/processed/train_plus_val_ratings.csv ...
I0000 00:00:1746342593.205009 11106595 mlir_graph_optimization_pass.cc:425] MLIR V1 optimization pass is not enabled
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 1 [40.27s]: train_loss = 0.259734 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 2 [41.58s]: train_loss = 0.248783 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 3 [44.53s]: train_loss = 0.243517 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 4 [42.15s]: train_loss = 0.240940 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 5 [41.07s]: train_loss = 0.239348 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 6 [40.19s]: train_loss = 0.238115 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 7 [39.92s]: train_loss = 0.237119 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 8 [41.51s]: train_loss = 0.236251 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 9 [39.40s]: train_

In [7]:
generate_tables_and_visualisations(config, N_REPEAT_EXP)

INFO:root:Generate tables and visualisations...
INFO:root:Generate tables and visualisations done.
