In [1]:
# load dataframe
# load datasets
# initialize dataset class
# initialize model class

In [2]:
import pandas as pd
import numpy as np
import argparse
import yaml
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.utils.data import DataLoader

from src.models.ncf import SimpleNCF, DeepNCF
from src.training.eval import collect_user_predictions, compute_metrics
from src.training.train_mlp import train_model, evaluate_model
from src.data.datasets import PointwiseImplicitDataset, OfflineImplicitDataset
from src.utils.hparam_search import param_comb
from src.data.samplers import GlobalUniformNegativeSampler

In [3]:
def load_config(path):
    with open(path, "r") as f:
        return yaml.safe_load(f)

In [4]:
from src.utils.constants import (
    DEFAULT_USER_COL as USER,
    DEFAULT_ITEM_COL as ITEM,
    DEFAULT_TARGET_COL as TARGET,
    DEFAULT_TIMESTAMP_COL as TIMESTAMP,
)

MODEL_TYPE = "SimpleNCF"
TUNE = False
CONFIG = load_config("src/config/ncf.yml")

DEVICE = CONFIG["system"]["device"]
LOCATION = CONFIG["data"]["location"]
if TUNE:
    MODEL_CONFIG = CONFIG[MODEL_TYPE]["tuning"]
    TRAIN_FILE = "train"
    TEST_FILE = "val"
else:
    MODEL_CONFIG = CONFIG[MODEL_TYPE]["optim_params"]
    TRAIN_FILE = "train_val"
    TEST_FILE = "test"

In [5]:
df_train = pd.read_parquet(f"{LOCATION}/{TRAIN_FILE}.parquet")
df_test = pd.read_parquet(f"{LOCATION}/{TEST_FILE}.parquet")
df_interactions = pd.read_parquet(f"{LOCATION}/interactions.parquet")

user_positive_items = df_interactions.groupby(USER)[ITEM].apply(set).to_dict()

n_users = df_interactions[USER].max() + 1
n_items = df_interactions[ITEM].max() + 1

negative_sampler = GlobalUniformNegativeSampler(n_items, user_positive_items)

In [None]:
hparam_combinations = param_comb(config=MODEL_CONFIG, is_tune=TUNE)

for hparams in hparam_combinations:
    # MERGE: Combine fixed settings with current trial settings
    # This ensures 'step_size' and 'gamma' are available

    print(f"Testing: {hparams}")

    # ------------------------------------------------------------------------------
    # ------ Model Related Parameters
    # ------------------------------------------------------------------------------

    EPOCHS = hparams["epochs"]
    N_NEGATIVES = hparams["n_negatives"]
    BATCH_SIZE = hparams["batch_size"]
    N_WORKERS = hparams["n_workers"]
    STEP_SIZE = hparams["step_size"]
    GAMMA = hparams["gamma"]
    LOG_EVERY = hparams["log_every"]
    THRESHOLD = hparams["threshold"]

    # ------------------------------------------------------------------------------
    # ------ Prepare Dataset / Loader
    # ------------------------------------------------------------------------------

    # data = NCFDataset()

    # data.train_dataset
    # data.train_loader
    # data.test_dataset
    # data.test_loader

    train_dataset = PointwiseImplicitDataset(
        users=df_train[USER].values,
        items=df_train[ITEM].values,
        timestamps=df_train[TIMESTAMP].values,
        negative_sampler=negative_sampler,
        n_negatives=N_NEGATIVES,
    )
    train_loader = DataLoader(
        train_dataset, batch_size=BATCH_SIZE, num_workers=N_WORKERS, shuffle=True
    )

    test_dataset = OfflineImplicitDataset(
        users=df_test[USER].values,
        items=df_test[ITEM].values,
        targets=df_test[TARGET].values,
    )

    test_loader = DataLoader(
        test_dataset, batch_size=BATCH_SIZE, num_workers=N_WORKERS, shuffle=False
    )