# How to use

## In Google Coalbo
1. Download this notebook
2. Uploda to Google Coalbo
3. Excute


## In Local

NOTICE: Required CUDA 12.2, Run on Windows 11 64bit
1. Clone Repository
```bash
git clone git@github.com:rtr-x8/GNNMF-CL.git
```
2. Create conda environment by Anaconda Prompt
```bash
conda create -n GNNMF-CL python=3.11.11 -y
conda activate GNNMF-CL
conda install -c anaconda ipykernel -y
python -m ipykernel install --user --name GNNMF-CL --display-name "GNNMF-CL Env"

```
3. Select Conda Environment in your Edirot(ex. VSCODE)


In [None]:
!python --version
# expect Python 3.11.11

In [None]:
# Run Once
"""
!pip uninstall torch -y
!pip install -q --no-cache-dir torch==2.4.1 --index-url https://download.pytorch.org/whl/cu121
!pip uninstall torch-scatter torch-sparse pyg-lib torch-geometric torchvision -y
!pip install -q --no-cache-dir torch-geometric
!pip install -q --no-cache-dir pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.4.0+cu121.html
!pip install -q --no-cache-dir torchvision==0.19.1 --index-url https://download.pytorch.org/whl/cu121
!pip -q --no-cache-dir install torchmetrics

!pip install jupyter_contrib_nbextensions
!pip install --upgrade ipywidgets
!jupyter contrib nbextension install --user
!jupyter nbextension enable --py widgetsnbextension

!pip install scikit-learn tqdm pandas numpy setuptools datetime pytz sentence-transformers matplotlib seaborn
!pip install -q python-dotenv wandb
!pip install torchinfo
"""

import torch
import torch_geometric
import torch_scatter
import torch_sparse
import pyg_lib
import torchvision
import torchmetrics

In [None]:
import sys
import os


if "vingat" in sys.modules:
    del sys.modules["vingat"]

sys.path.append('..')

from vingat.logger import get_run_name, log_metrics
from vingat.assertion import assert_package_versions
from vingat.loader import (
    core_file_loader, load_recipe_nutrients, load_ingredients,
    load_recipe_ingredients, load_recipe_cooking_directions,
    load_ingredients_with_embeddings, load_recipe_image_embeddings, load_recipe_image_embeddings_ft,
    load_recipe_image_vlm_caption,
    load_recipe_cooking_directions_embeddings,
    load_recipe_image_vlm_caption_embeddings,
    load_user_embeddings, load_alternative_ingredients,
    train_dataclustering, calculate_cluster, load_clouster_centers)
from vingat.loss import BPRLoss
from vingat.model import RecommendationModel
from vingat.functions import evaluate_model, save_model, train_func, get_item_popularity
from vingat import __version__ as vingat_version
from vingat.dataloader import create_base_hetero, mask_hetero, create_dataloader
from vingat.visualizer import visualize_node_pca
from vingat.preprocess import filter_recipe_ingredient, NutrientStandardPreprocess

os.environ['CUDA_LAUNCH_BLOCKING']="1"
os.environ['TORCH_USE_CUDA_DSA'] = "1"
assert_package_versions() # assert versions


In [None]:
import torch
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

In [None]:
from dotenv import load_dotenv
load_dotenv()

import os
import wandb

wandab_api = os.getenv('WANDB_API')

In [None]:
from torch_geometric.data import HeteroData
from sklearn.preprocessing import LabelEncoder
import torch
from enum import Enum
import numpy as np
from vingat.loader import use_nutritions
import pandas as pd
from torch_geometric.loader import LinkNeighborLoader

import pandas as pd
import json


device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [None]:
import random
PROJECT_NAME = "vingat-v3_local"
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print("device: ", device)

CONFIG = {
    "architecture": "LightGCN, HGT, GAT",
    "_mode" : "in_local",
    "batch_size": 256,
    "bprloss_reg_lambda": 0.001,
    "cl_loss_rate": 0.2,
    "cluster_margin": 0.5,
    "cluster_loss_weight": 1.2,
    "criterion": "",
    "default_decay": 0.00002,
    "device": device,
    "dropout_rate": 0.3,
    "epochs": 20,
    "filter_ingredient_sim_score": 0.7,
    "fusion_gnn_after_dropout_rate": 0.2,
    "fusion_gnn_dropout_rate": 0.2,
    "fusion_gnn_resisual_alpha": 0.5,
    "fusion_layers": 1,
    "hidden_dimention": 128,
    "image_encoder_low_rank_dim": 64,
    "input_cooking_direction_dim": 384,
    "input_image_dim": 1024,
    "input_ingredient_dim": 384,
    "input_vlm_caption_dim": 384,
    "intention_cl_after_dropout_rate": 0.2,
    "intention_layers": 1,
    "item_encoder_dropout_rate": 0.2,
    "item_encoder_low_rank_dim": 64,
    "learning_rate": 0.00002,
    "link_predictor_dropout_rate": 0.2,
    "link_predictor_leaky_relu_slope": 0.3,
    "max_grad_norm": 30,
    "multi_head": 1,
    "node_embeding_dimmention": 32,
    "nutrient_dim": 20,
    "patience": 20,  #  Early stop at least, * validation_interval
    "pyg_lib v": pyg_lib.__version__,
    "rating_threshold": 3.5,
    "scheduler_gamma": 0.975,
    "scheduler_size": 10,
    "seed": 2020,
    "sencing_layers": 1,
    "sensing_gnn_resisual_alpha": 0.5,
    "taste_gnn_after_dropout_rate": 0.2,
    "taste_gnn_dropout_rate": 0.2,
    "temperature": 0.1,
    "torch v": torch.__version__,
    "torch_geometric v": torch_geometric.__version__,
    "torch_scatter v": torch_scatter.__version__,
    "torch_sparse v": torch_sparse.__version__,
    "user_encoder_dropout_rate": 0.2,
    "user_encoder_low_rank_dim": 64,
    "user_encoder_weight_decay": 0.000001,
    "validation_interval": 5,
    "vingat_v": vingat_version,

}

In [None]:
random.seed(CONFIG["seed"])
np.random.seed(CONFIG["seed"])
torch.manual_seed(CONFIG["seed"])
torch.cuda.manual_seed_all(CONFIG["seed"])

run_name = get_run_name()
run_name = f"{run_name}_{vingat_version}"

In [None]:
wandb.login(key=wandab_api)
wandb.init(
  project=PROJECT_NAME,
  name=run_name,
  config=CONFIG,
  tags=["in_local", "use_mini_data"]
)

In [None]:
PATH = os.path.join(os.getcwd(), '..', 'data', 'mini')

# File loader
core_recipes, core_train_rating, core_test_rating, core_val_rating = core_file_loader(PATH, CONFIG["rating_threshold"])
core_recipe_indices = core_recipes.index.values
recipe_nutrients = load_recipe_nutrients(PATH, core_recipes.copy())
ingredients = load_ingredients(PATH, core_recipes.copy())
alternative_ingredients = load_alternative_ingredients(PATH, core_recipes.copy(), CONFIG["device"])
recipe_ingredients = load_recipe_ingredients(PATH, core_recipes.copy())
recipe_cooking_directions = load_recipe_cooking_directions(PATH, core_recipes.copy())
ingredients_with_embeddings = load_ingredients_with_embeddings(PATH, ingredients.copy())
recipe_image_embeddings = load_recipe_image_embeddings_ft(PATH, core_recipes.copy(), CONFIG["device"])
recipe_image_vlm_caption = load_recipe_image_vlm_caption(PATH)
recipe_cooking_directions_embeddings = load_recipe_cooking_directions_embeddings(PATH, recipe_cooking_directions.copy())
recipe_ingredients = filter_recipe_ingredient(recipe_ingredients, alternative_ingredients, CONFIG["filter_ingredient_sim_score"])
ingredients = ingredients[ingredients.index.isin(recipe_ingredients["ingredient_id"])]
recipe_image_vlm_caption_embeddings = load_recipe_image_vlm_caption_embeddings(PATH, recipe_image_vlm_caption.copy())

train_nutrients, kmeans_model, scaler = train_dataclustering(
    train_data=recipe_nutrients.loc[recipe_nutrients.index.isin(core_train_rating.recipe_id)][use_nutritions],
    n_cluster=6,
    path=PATH
)
test_recipe_ids = set(core_train_rating.recipe_id) - set()
test_nutrients = calculate_cluster(
    data=recipe_nutrients.loc[recipe_nutrients.index.isin(core_test_rating.recipe_id)][use_nutritions],
    path=PATH,
    scaler=scaler,
    kmeans_model=kmeans_model
)
valid_nutrients = calculate_cluster(
    data=recipe_nutrients.loc[recipe_nutrients.index.isin(core_val_rating.recipe_id)][use_nutritions],
    path=PATH,
    scaler=scaler,
    kmeans_model=kmeans_model
)
recipe_nutrients = pd.concat([
    train_nutrients, test_nutrients, valid_nutrients
])
recipe_nutrients = recipe_nutrients.loc[~recipe_nutrients.index.duplicated(keep='first')]
recipe_cluster_centers = load_clouster_centers(kmeanth_model=kmeans_model, device=device)

train_nutrients.describe()

In [None]:
set([1, 2, 3, 4, 5]) - set([4, 5, 6, 7])

In [None]:
nutri_scalar = NutrientStandardPreprocess(use_nutrients=use_nutritions)
recipe_nutrients = nutri_scalar.do(
  train_recipe_ids=core_train_rating.recipe_id.unique(),
  test_recipe_ids=core_test_rating.recipe_id.unique(),
  val_recipe_ids=core_val_rating.recipe_id.unique(),
  recipe_nutrients=recipe_nutrients
)
recipe_cluster_centers = nutri_scalar.transform_from_tensor(recipe_cluster_centers)
recipe_nutrients.loc[recipe_nutrients.index.isin(core_train_rating.recipe_id), "cluster"].value_counts()

In [None]:
#create mini data
"""
mini_path = os.path.join(os.getcwd(), '..', 'data', 'mini')
mini_core_train_rating = core_train_rating.sample(frac=0.02, random_state=1)
mini_core_test_rating = core_test_rating.sample(frac=0.02, random_state=1)
mini_core_val_rating = core_val_rating.sample(frac=0.02, random_state=1)
user_ids = pd.concat([mini_core_train_rating, mini_core_test_rating, mini_core_val_rating]).user_id.unique()
recipe_ids = pd.concat([mini_core_train_rating, mini_core_test_rating, mini_core_val_rating]).recipe_id.unique()
mini_core_recipes = core_recipes.loc[core_recipes.index.isin(recipe_ids)]
mini_recipe_ingredients = recipe_ingredients.loc[recipe_ingredients["recipe_id"].isin(recipe_ids)]
mini_ingredients = ingredients.loc[ingredients.index.isin(mini_recipe_ingredients["ingredient_id"])]
mini_recipe_nutrients = recipe_nutrients.loc[recipe_nutrients.index.isin(recipe_ids)]
mini_recipe_image_embeddings = recipe_image_embeddings.loc[recipe_image_embeddings.index.isin(recipe_ids)]
mini_recipe_image_vlm_caption_embeddings = recipe_image_vlm_caption_embeddings.loc[recipe_image_vlm_caption_embeddings.index.isin(recipe_ids)]
mini_recipe_cooking_directions_embeddings = recipe_cooking_directions_embeddings.loc[recipe_cooking_directions_embeddings.index.isin(recipe_ids)]
mini_ingredients_with_embeddings = ingredients_with_embeddings.loc[ingredients_with_embeddings.index.isin(mini_ingredients.index)]
mini_recipe_cooking_directions = recipe_cooking_directions.loc[recipe_cooking_directions.index.isin(recipe_ids)]
mini_recipe_image_vlm_caption = recipe_image_vlm_caption.loc[recipe_image_vlm_caption.index.isin(recipe_ids)]
mini_alternative_ingredients = load_alternative_ingredients(mini_path, mini_core_recipes.copy(), CONFIG["device"])
mini_recipe_ingredients = filter_recipe_ingredient(mini_recipe_ingredients, mini_alternative_ingredients, CONFIG["filter_ingredient_sim_score"])
mini_ingredients = mini_ingredients[mini_ingredients.index.isin(mini_recipe_ingredients["ingredient_id"])]

diff_ing_id = list(set(mini_recipe_ingredients.ingredient_id) - set(mini_ingredients.index))
mini_recipe_ingredients = mini_recipe_ingredients.loc[~mini_recipe_ingredients["ingredient_id"].isin(diff_ing_id)]

mini_core_train_rating.to_csv(os.path.join(mini_path, "core-data-train_rating.csv"))
mini_core_test_rating.to_csv(os.path.join(mini_path, "core-data-test_rating.csv"))
mini_core_val_rating.to_csv(os.path.join(mini_path, "core-data-valid_rating.csv"))
mini_recipe_ingredients.to_csv(os.path.join(mini_path, "recipe_ingredients.csv"))
mini_ingredients.to_csv(os.path.join(mini_path, "ingredients.csv"))
mini_recipe_image_embeddings.to_csv(os.path.join(mini_path, "recipe_image_embeddings_ft.csv"))
mini_recipe_image_vlm_caption_embeddings.to_csv(os.path.join(mini_path, "recipe_image_vlm_caption_embeddings.csv"))
mini_recipe_cooking_directions_embeddings.to_csv(os.path.join(mini_path, "recipe_cooking_directions_embeddings.csv"))
mini_ingredients_with_embeddings.to_csv(os.path.join(mini_path, "ingredients_embeddings.csv"))
mini_core_recipes.to_csv(os.path.join(mini_path, "core-data_recipe.csv"))
mini_recipe_nutrients.to_csv(os.path.join(mini_path, "recipe_nutrients.csv"))
mini_alternative_ingredients.to_csv(os.path.join(mini_path, "alternative_ingredients.csv"))
mini_recipe_cooking_directions.to_csv(os.path.join(mini_path, "recipe_cooking_directions.csv"))
mini_recipe_image_vlm_caption.to_csv(os.path.join(mini_path, "recipe_image_vlm_caption.csv"))

"""

recipe_nutrients.loc[recipe_nutrients.index.isin(core_train_rating["recipe_id"])].describe()

In [None]:
# pd.DataFrame(nutri_scalar.transform(recipe_nutrients.loc[recipe_nutrients.index.isin(core_train_rating["recipe_id"])])).describe()

In [None]:
# pd.DataFrame(recipe_cluster_centers.cpu().numpy()).describe()

In [None]:
#stop()

data, user_lencoder, item_lencoder, ing_lencoder = create_base_hetero(
    core_train_rating=core_train_rating,
    core_test_rating=core_test_rating,
    core_val_rating=core_val_rating,
    ingredients=ingredients,
    recipe_nutrients=recipe_nutrients,
    recipe_image_embeddings=recipe_image_embeddings,
    recipe_image_vlm_caption_embeddings=recipe_image_vlm_caption_embeddings,
    recipe_cooking_directions_embeddings=recipe_cooking_directions_embeddings,
    ingredients_with_embeddings=ingredients_with_embeddings,
    directory_path=PATH,
    device=CONFIG["device"],
    hidden_dim=CONFIG["hidden_dimention"],
    input_image_dim=CONFIG["input_image_dim"],
    input_vlm_caption_dim=CONFIG["input_vlm_caption_dim"],
    input_ingredient_dim=CONFIG["input_ingredient_dim"],
    input_cooking_direction_dim=CONFIG["input_cooking_direction_dim"],
)

train_data, ss = mask_hetero(data, core_train_rating, recipe_ingredients, user_lencoder, item_lencoder, ing_lencoder, is_train=True)
test_data, _ = mask_hetero(data, core_test_rating, recipe_ingredients, user_lencoder, item_lencoder, ing_lencoder, is_train=False, scalar_preprocess=ss)
val_data, _ = mask_hetero(data, core_val_rating, recipe_ingredients, user_lencoder, item_lencoder, ing_lencoder, is_train=False, scalar_preprocess=ss)

mini_train = test_data.clone()
pops = get_item_popularity(device, item_lencoder, PATH, CONFIG["rating_threshold"])

train_loader = create_dataloader(train_data, CONFIG["batch_size"], num_workers=2, neg_sampling_ratio=1.0, popularity=pops, is_abration_cl=False)
test_loader = create_dataloader(test_data, CONFIG["batch_size"], shuffle=False, neg_sampling_ratio=0.0, is_abration_cl=False)
val_loader = create_dataloader(val_data, CONFIG["batch_size"], shuffle=False, neg_sampling_ratio=0.0, is_abration_cl=False)


In [None]:
print("\n\n\nデータ構造")
train_data["intention"]["cluster"].unique()

In [None]:
import torch.nn as nn

model = RecommendationModel(
    dropout_rate=CONFIG["dropout_rate"],
    device=CONFIG["device"],
    hidden_dim=CONFIG["hidden_dimention"],
    node_embeding_dimmention=CONFIG["node_embeding_dimmention"],
    num_user=len(user_lencoder.classes_),
    num_item=len(item_lencoder.classes_),
    nutrient_dim=CONFIG["nutrient_dim"],
    num_heads=CONFIG["multi_head"],
    sencing_layers=CONFIG["sencing_layers"],
    fusion_layers=CONFIG["fusion_layers"],
    intention_layers=CONFIG["intention_layers"],
    temperature=CONFIG["temperature"],
    cl_loss_rate=CONFIG["cl_loss_rate"],
    input_image_dim=CONFIG["input_image_dim"],
    input_vlm_caption_dim=CONFIG["input_vlm_caption_dim"],
    input_ingredient_dim=CONFIG["input_ingredient_dim"],
    input_cooking_direction_dim=CONFIG["input_cooking_direction_dim"],
    user_encoder_low_rank_dim=CONFIG["user_encoder_low_rank_dim"],
    item_encoder_low_rank_dim=CONFIG["item_encoder_low_rank_dim"],
    user_encoder_dropout_rate=CONFIG["user_encoder_dropout_rate"],
    item_encoder_dropout_rate=CONFIG["item_encoder_dropout_rate"],
    intention_cl_after_dropout_rate=CONFIG["intention_cl_after_dropout_rate"],
    taste_gnn_dropout_rate=CONFIG["taste_gnn_dropout_rate"],
    taste_gnn_after_dropout_rate=CONFIG["taste_gnn_after_dropout_rate"],
    fusion_gnn_dropout_rate=CONFIG["fusion_gnn_dropout_rate"],
    fusion_gnn_after_dropout_rate=CONFIG["fusion_gnn_after_dropout_rate"],
    link_predictor_dropout_rate=CONFIG["link_predictor_dropout_rate"],
    link_predictor_leaky_relu_slope=CONFIG["link_predictor_leaky_relu_slope"],
    sensing_gnn_resisual_alpha=CONFIG["sensing_gnn_resisual_alpha"],
    fusion_gnn_resisual_alpha=CONFIG["fusion_gnn_resisual_alpha"],
    is_abration_wo_cl=False,
    is_abration_wo_taste=False,
    image_encoder_low_rank_dim=CONFIG["image_encoder_low_rank_dim"],
    cluster_centers=recipe_cluster_centers,
    cluster_margin=CONFIG["cluster_margin"],
    cluster_loss_weight=CONFIG["cluster_loss_weight"],
)
model.to(device)

user_encoder_params = list(model.user_encoder.parameters())

other_params = [
    p for p in model.parameters()
    if not any(torch.equal(p.data, up.data) for up in user_encoder_params)
]

optimizer = torch.optim.Adam(
    [
        {
            "params": user_encoder_params,
            "weight_decay": CONFIG["user_encoder_weight_decay"],
        },
        {
            "params": other_params,
            "weight_decay": CONFIG["default_decay"],
        }
    ],
    lr=CONFIG["learning_rate"],
    # weight_decay=CONFIG["default_decay"],
)

# criterion = BPRLoss(reg_lambda=CONFIG["bprloss_reg_lambda"])
# criterion = nn.BCELoss()

scheduler = torch.optim.lr_scheduler.StepLR(
    optimizer,
    step_size=CONFIG["scheduler_size"],
    gamma=CONFIG["scheduler_gamma"]
)

In [None]:
from torchinfo import summary

model_summary = summary(model, verbose=0)

summary_text = str(model_summary)
summary_text = summary_text.replace("\n", "<br>")

wandb.log({"model_summary": wandb.Html(summary_text)})
print("\n\n\nモデルの構造")
print(model_summary)

wandb.watch(model, log="gradients", log_freq=100)

In [20]:
def wandb_logger(**kwargs):
    try:
        wandb.log(**kwargs)
    except:
        print(kwargs)
        pass

def wandb_tagger(*args):
    try:
        wandb.run.tags = list(wandb.run.tags) + list(args)
    except:
        print(args)
        pass

def wandb_scatter(df, step, title):
    df["step"] = step
    table = wandb.Table(data=df, columns=["PC1", "PC2", "node_type"])
    color_cahrt = wandb.plot.scatter(
        table,
        "PC1",
        "PC2",
        title=title,
    )
    wandb.run.log({f"scatter_step_{step}": color_cahrt},
                step=step)


pca_cols = ["intention", "taste", "image"]

criterion = BPRLoss(reg_lambda=CONFIG["bprloss_reg_lambda"])


train_func(
    train_loader,
    val_data,
    model,
    optimizer,
    scheduler,
    criterion,
    epochs=CONFIG["epochs"],
    device=device,
    wbLogger=wandb_logger,
    wbTagger=wandb_tagger,
    wbScatter=wandb_scatter,
    directory_path=PATH,
    project_name=PROJECT_NAME,
    experiment_name=run_name,
    popularities=pops,
    patience=CONFIG["patience"],
    validation_interval=CONFIG["validation_interval"],
    max_grad_norm=CONFIG["max_grad_norm"],
    pca_cols=pca_cols,
    recommendation_loss_weight=1
)



KeyboardInterrupt: 

In [None]:
score_statics, t_mhandler = evaluate_model(
    model=model,
    data=test_data,
    device=CONFIG["device"],
    freq_tensor=pops,
    desc="[Test]"
)


print("Score Statics: ")
print(score_statics.log(prefix="test-score-statics", num_round=4))
wandb_logger(data=score_statics.log(prefix="test-score-statics"))

print("handler Result: ")
print(t_mhandler.log(prefix="test-handler", num_round=4))
wandb_logger(data=t_mhandler.log(prefix="test-handler", num_round=8))

In [None]:
wandb.config["criterion"] = str(criterion.__class__.__name__)
wandb.run.tags = list(wandb.run.tags) + ["image_fine_tuned"]

#wandb.run.tags = list(wandb.run.tags) + ["crashed"]
#wandb.run.tags = list(wandb.run.tags) + ["all_same_result"]
#wandb.run.tags = list(wandb.run.tags) + ["manual_stopped"]
#wandb.run.tags = list(wandb.run.tags) + ["over_learning"]
#wandb.run.tags = list(wandb.run.tags) + ["not_learning"]
#wandb.run.tags = list(wandb.run.tags) + ["not_satisfied"]
#wandb.run.tags = list(wandb.run.tags) + ["is_trial_model"]
wandb.finish()