In [1]:
import sys

sys.path.append("../")

In [2]:
from torch.utils.data import DataLoader
import os
import pandas as pd
import numpy as np

In [3]:
n_users = 226570
n_items = 231637

batch_size = 4086

train_path = "../Preprocessing/processed_dataframes/train.csv"
val_path = "../Preprocessing/processed_dataframes/val.csv"
test_path = "../Preprocessing/processed_dataframes/test.csv"

In [4]:
def get_dataloader(csv_path, dataset, has_rating_column, batch_size, num_workers, shuffle, **kwargs):
    return DataLoader(
        dataset(
            interactions_file=csv_path,
            n_users=n_users,
            n_items=n_items,
            has_rating_column=has_rating_column,
            **kwargs,
        ), 
        batch_size=batch_size,
        num_workers=num_workers,
        shuffle=shuffle,
    )


## Simple NeuMF

In [5]:
from DataLoader.simple_dataset import SimpleFoodRatingsDataset

In [6]:
simple_train_dataloader = get_dataloader(train_path, SimpleFoodRatingsDataset, True, batch_size, 8, True)
simple_val_dataloader = get_dataloader(val_path, SimpleFoodRatingsDataset, True, batch_size, 2, True)

In [7]:
from Training.simple.simple_train_functions import fit, eval_test

In [8]:
losses = fit(
    simple_train_dataloader, 
    simple_val_dataloader,
    n_users,
    n_items,
    k_gmf=8,
    k_mlp=8,
    layer_sizes=[16, 32, 16, 8],
    alpha=0.5,
    lr=0.001,
    epochs=5,
    weight_path="simple_neumf",
    run_number=1,
    random_state=None,
    show_loss=True,
)

  0%|          | 0/5 [00:00<?, ?it/s]

epoch: 1, train_loss: 3.8409165013406676, val_loss: 1.0295366074283645
epoch: 2, train_loss: 0.9368984924623099, val_loss: 0.9129273652293595
epoch: 3, train_loss: 0.8714276718189435, val_loss: 0.9031313386957992
epoch: 4, train_loss: 0.8341657614770769, val_loss: 0.9071860467164737
epoch: 5, train_loss: 0.773603942745666, val_loss: 0.93413598380945


In [9]:
print("TEST SIMPLE NEUMF")
print(eval_test(
    f"simple_neumf/run_1/{os.listdir('simple_neumf/run_1/')[0]}",
    test_path,
    SimpleFoodRatingsDataset,
    n_users,
    n_items,
    k_gmf=8,
    k_mlp=8,
    layer_sizes=[16, 32, 16, 8],
))

TEST SIMPLE NEUMF
0.9041135066647327


## Extra feature branches

In [10]:
recipes_path = "../Preprocessing/processed_dataframes/sorted_recipes.csv"

In [11]:
from DataLoader.extra_features_dataset import ExtraFoodRatingsDataset
from Training.extra_features.features_train_functions import fit as fit_extra, eval_test as eval_test_extra

extra_train_dataloader = get_dataloader(train_path, ExtraFoodRatingsDataset, True, batch_size, 8, True, **{"recipes_file": recipes_path})
extra_val_dataloader = get_dataloader(val_path, ExtraFoodRatingsDataset, True, batch_size, 2, True, **{"recipes_file": recipes_path})

In [14]:
n_features = 10
n_txt = 120900

losses = fit_extra(
    extra_train_dataloader, 
    extra_val_dataloader,
    n_users,
    n_items,
    n_features,
    n_txt,
    k_gmf=8,
    k_mlp=8,
    layer_sizes=[32, 16, 8],
    feature_layers=[16, 8],
    txt_layers=[128, 64, 32, 16, 8],
    lr=0.001,
    epochs=5,
    weight_path="extra_features_mf",
    run_number=1,
    random_state=None,
    show_loss=True,
)

  0%|          | 0/5 [00:00<?, ?it/s]

epoch: 1, train_loss: 2.9382506842624214, val_loss: 1.1112619885522537
epoch: 2, train_loss: 0.9523174373347412, val_loss: 0.9162208042967354
epoch: 3, train_loss: 0.8508080328377202, val_loss: 0.9158563189240325
epoch: 4, train_loss: 0.79488682428806, val_loss: 0.929753346204601
epoch: 5, train_loss: 0.7346583125073958, val_loss: 0.945062777037183


In [15]:
print("TEST EXTRA FEATURES NEUMF")
print(eval_test_extra(
    f"extra_features_mf/run_1/{os.listdir('extra_features_mf/run_1/')[0]}",
    test_path,
    ExtraFoodRatingsDataset,
    n_users,
    n_items,
    n_features,
    n_txt,
    k_gmf=8,
    k_mlp=8,
    layer_sizes=[32, 16, 8],
    feature_layers=[16, 8],
    txt_layers=[128, 64, 32, 16, 8],
    recipes_file=recipes_path,
))

TEST EXTRA FEATURES NEUMF
0.916086670894964
