In [1]:
import pandas as pd

In [2]:
interactions = pd.read_csv("../../Preprocessing/processed_dataframes/interactions.csv")
recipes = pd.read_csv("../../Preprocessing/processed_dataframes/recipes_with_text.csv")
recipes.sort_values(by=["recipe_id"], inplace=True)

Average ratings per recipe

In [3]:
avg_ratings = interactions.drop(["user_id"], axis=1).groupby("recipe_id", as_index=False).mean().sort_values(by=["rating"], ascending=False)

Show Top 20 most rated recipes

In [4]:
# for _, row in avg_ratings.head(50).iterrows():
#     recipe_data = recipes.iloc[int(row["recipe_id"])]
    
#     print("Name:", recipe_data["name"], f"({int(row['recipe_id'])})")
#     print()
#     print(", ".join(eval(recipe_data["steps"])))
#     print("-"*50)

## Extend training data

In [5]:
train_csv = pd.read_csv("../../Preprocessing/processed_dataframes/train.csv")
my_user_id = interactions["user_id"].max() + 1
my_ratings = pd.DataFrame.from_dict({
    "user_id": [my_user_id]*16,
    "recipe_id": [134161, 134170, 134171, 134191, 134159, 134152, 134151, 134128, 134235, 134257, 134138, 134139, 134189, 134192, 134153, 134193],
    "rating": [2, 5, 3, 4, 2, 1, 4, 1, 3, 5, 5, 3, 1, 1, 1, 1]
})
my_ratings.to_csv("alt_my_ratings.csv", index=False)

extended_train = pd.concat([train_csv, my_ratings])
extended_train.to_csv("alt_extended_train.csv", index=False)

## Train on extended data

In [6]:
import sys
sys.path.append("../../")

from torch.utils.data import DataLoader

n_users = 226570+1
n_items = 231637

In [7]:
batch_size = 4086

train_path = "alt_extended_train.csv"
val_path = "../../Preprocessing/processed_dataframes/val.csv"

def get_dataloader(csv_path, dataset, has_rating_column, batch_size, num_workers, shuffle, **kwargs):
    return DataLoader(
        dataset(
            interactions_file=csv_path,
            n_users=n_users,
            n_items=n_items,
            has_rating_column=has_rating_column,
            **kwargs,
        ), 
        batch_size=batch_size,
        num_workers=num_workers,
        shuffle=shuffle,
    )


from DataLoader.simple_dataset import SimpleFoodRatingsDataset

simple_train_dataloader = get_dataloader(train_path, SimpleFoodRatingsDataset, True, batch_size, 8, True)
simple_val_dataloader = get_dataloader(val_path, SimpleFoodRatingsDataset, True, batch_size, 2, True)

from Training.simple.simple_train_functions import fit, eval_test

losses = fit(
    simple_train_dataloader, 
    simple_val_dataloader,
    n_users,
    n_items,
    k_gmf=8,
    k_mlp=8,
    layer_sizes=[16, 32, 16, 8],
    alpha=0.5,
    lr=0.001,
    epochs=5,
    weight_path="simple_neumf",
    run_number=2,
    random_state=None,
    show_loss=True,
)

  0%|          | 0/5 [00:00<?, ?it/s]

epoch: 1, train_loss: 4.17614877402404, val_loss: 1.5319633986725931
epoch: 2, train_loss: 0.9857126393460692, val_loss: 0.928312345053225
epoch: 3, train_loss: 0.8715833616586862, val_loss: 0.9097214673214177
epoch: 4, train_loss: 0.831595067670928, val_loss: 0.9113662852893862
epoch: 5, train_loss: 0.7955926341205799, val_loss: 0.9230358689015259


In [8]:
import os
from Training.simple.simple_train_functions import define_model
import torch

trained_model = define_model(n_users, n_items, k_gmf=8, k_mlp=8, layer_sizes=[16, 32, 16, 8], alpha=0.5)
trained_model.load_state_dict(torch.load(f"simple_neumf/run_2/3-0.9097214673214177.pt"))

<All keys matched successfully>

Get items ids that are not rated by the user

In [9]:
unrated_ids = []
for i in range(n_items):
    if i not in my_ratings["recipe_id"].tolist():
        unrated_ids.append(i)

In [10]:
with torch.no_grad():
    predictions_for_me = trained_model(
        torch.tensor([my_user_id]*len(unrated_ids)).cuda(), 
        torch.tensor(unrated_ids).cuda()
    ).ravel().cpu().numpy()
    print(predictions_for_me.shape)

(231621,)


Top 10 recommended recipes

In [11]:
id_rating_pair_list = [(prediction, recipe_id) for recipe_id, prediction in zip(unrated_ids, predictions_for_me)]
id_rating_pair_list.sort(reverse=True)

In [12]:
for _, recipe_id in id_rating_pair_list[:10]:
    recipe_data = recipes.iloc[recipe_id]
#     print(recipe_data["name"])
    print("Name:", recipe_data["name"])
    print()
    print(", ".join(eval(recipe_data["steps"])))
    print("-"*50)

Name: kittencal s famous barbecue sauce for chicken and ribs

heat oil over medium heat, add in onion and saute for about 3 minutes , then add in garlic and saute for 2 minutes, add in chili powder , paprika , cayenne , salt and black pepper, stir for 1 minute, add in all remaining ingredients, bring to a boil stirring with a wooden spoon to combine, reduce heat to low and simmer uncovered stirring occasionally for 1 hour over low heat, cool to room temperature then cover and refrigerate for 24 hours or more before using
--------------------------------------------------
Name: the clockmaker s caramel coated popcorn  a haunted recipe

in a large pot , heat the margarine , brown sugar , karo syrup , and salt together over medium heat, while stirring , bring mixture to a boil and allow to boil for five minutes, remove from heat, add baking soda and vanilla, mix well, pour over freshly popped popcorn, stir until the popcorn is well coated, enjoy !
-----------------------------------------