In [1]:
import pandas as pd
import numpy as np
import torch
from base_model import EmbeddingHead
from dataset import TrainType
import copy

In [2]:
models_paths = [
    "../../models/just_text_model.ckpt",
    "../../models/graph_model.ckpt",
    "../../models/model_combination.ckpt",
]

In [None]:
# path_to_recipes_embeddings = "../../data/recipe_embeddings.parquet"
# recipes = pd.read_parquet(path_to_recipes_embeddings)

In [3]:
# From text embeddings (jina)
new_embeddings = []
model = EmbeddingHead(TrainType.text)
checkpoint = torch.load(models_paths[0], map_location=torch.device("cpu"))
model.load_state_dict(checkpoint["state_dict"])
model.eval()
model.to("cpu")
print("Model loaded")

path_to_recipes_embeddings = "../../data/recipe_embeddings.parquet"
recipes = pd.read_parquet(path_to_recipes_embeddings)
recipe_embeddings = recipes["recipe_embeddings"].values
recipe_embeddings = np.vstack(recipe_embeddings).astype(np.float32)


for embedding in recipe_embeddings:
    embedding = torch.from_numpy(embedding)
    new_embeddings.append(model.model_recipe(embedding).detach().numpy())
# from new_embeddings create new parquet file
recipes["recipe_embeddings"] = new_embeddings
recipes.to_parquet(
    f"../../data/recipe_embeddings_{TrainType.text.name}.parquet")

Model loaded


In [4]:
recipes

Unnamed: 0,id,recipe_embeddings,recipe_merged_info
0,467357,"[-0.026986673, -0.025686143, 0.009676915, -0.0...",\nDISH DESCRIPTION:\nautumn squash soup\n\nif ...
1,50899,"[0.011414191, -0.006069881, -0.021527622, 0.01...",\nDISH DESCRIPTION:\nautumn stew\n\nthis is an...
2,388259,"[-0.014244658, 0.019238979, -0.0184876, 0.0219...",\nDISH DESCRIPTION:\nautumn stuffed pork loin ...
3,93910,"[-0.008388701, -0.009652335, -0.0284896, 0.008...",\nDISH DESCRIPTION:\nautumn sweet potato or p...
4,73265,"[0.0035407434, 0.027416365, -0.05761968, -0.06...",\nDISH DESCRIPTION:\nautumn tea\n\nfrom taste ...
...,...,...,...
20904,421747,"[0.022771403, 0.017633244, -0.0050613196, 0.04...",\nDISH DESCRIPTION:\nbetter batter rye bread\n...
20905,162447,"[-0.017486883, -0.00070857815, -0.003457962, 0...",\nDISH DESCRIPTION:\nbetter beans on toast\n\n...
20906,229170,"[0.030263178, 0.00801071, 0.016603831, 0.06642...",\nDISH DESCRIPTION:\nbetter beer bread\n\nwith...
20907,384220,"[0.012995133, 0.019628698, 0.012931913, 0.0004...",\nDISH DESCRIPTION:\nbetter beet butter\n\nthi...


In [5]:
# From graph embeddings
new_embeddings = []
model = EmbeddingHead(TrainType.graph)
checkpoint = torch.load(models_paths[1], map_location=torch.device("cpu"))
model.load_state_dict(checkpoint["state_dict"])
model.eval()
model.to("cpu")
print("Model loaded")

path_to_recipes_embeddings = "../../data/graph_recipe_embeddings.parquet"
recipes_graph = pd.read_parquet(path_to_recipes_embeddings)
recipe_embeddings_graph = recipes_graph["embedding"].values
recipe_embeddings_graph = np.vstack(recipe_embeddings_graph).astype(np.float32)


for embedding in recipe_embeddings_graph:
    embedding = torch.from_numpy(embedding)
    new_embeddings.append(model.model_recipe(embedding).detach().numpy())
# from new_embeddings create new parquet file
recipes_graph["embedding"] = new_embeddings
recipes_graph.to_parquet(
    f"../../data/recipe_embeddings_{TrainType.graph.name}.parquet")

Model loaded


In [7]:
recipes_graph

Unnamed: 0,recipe_id,embedding
0,188446,"[0.00081450865, -0.057489958, 0.0031556655, -0..."
1,16215,"[0.0061513316, 0.021842953, -0.021255428, -0.0..."
2,187129,"[-0.030361224, 0.016516268, -0.02780363, 0.046..."
3,271527,"[0.023536742, 0.008154625, 0.021236138, -0.051..."
4,33947,"[-0.03213763, -0.04133912, -0.0120988935, -0.0..."
...,...,...
20904,178676,"[0.02268915, 0.015256621, 0.018183013, -0.0198..."
20905,33202,"[-0.03348412, 0.015353784, 0.014249254, -0.044..."
20906,160482,"[0.004920371, -0.017426262, -0.021493502, -0.0..."
20907,150591,"[-0.012536462, -0.010839184, 0.014256638, -0.0..."


In [5]:
# From text + graph embeddings
new_embeddings = []
model = EmbeddingHead(TrainType.both)
checkpoint = torch.load(models_paths[2], map_location=torch.device('cpu'))
model.load_state_dict(checkpoint['state_dict'])
model.eval()
model.to("cpu")
print("Model loaded")


recipes_combined = []
for a, b in zip(recipe_embeddings, recipe_embeddings_graph):
    recipes_combined.append(np.concatenate((a, b), axis=None))
recipes_combined = np.vstack(recipes_combined).astype(np.float32)
for embedding in recipes_combined:
    embedding = torch.from_numpy(embedding)
    new_embeddings.append(model.model_recipe(embedding).detach().numpy())
# from new_embeddings create new parquet file
recipes_both = copy.deepcopy(recipes)
recipes_both["recipe_embeddings"] = new_embeddings
recipes_both.to_parquet(
    f"../../data/recipe_embeddings_{TrainType.both.name}.parquet")

Model loaded


In [8]:
path_to_recipes_embeddings_graph = "../../data/recipe_embeddings_graph.parquet"
recipes_graph = pd.read_parquet(path_to_recipes_embeddings_graph)
recipes_graph

Unnamed: 0,id,recipe_embeddings,recipe_merged_info
0,467357,"[-0.026986673, -0.025686143, 0.009676915, -0.0...",\nDISH DESCRIPTION:\nautumn squash soup\n\nif ...
1,50899,"[0.011414191, -0.006069881, -0.021527622, 0.01...",\nDISH DESCRIPTION:\nautumn stew\n\nthis is an...
2,388259,"[-0.014244658, 0.019238979, -0.0184876, 0.0219...",\nDISH DESCRIPTION:\nautumn stuffed pork loin ...
3,93910,"[-0.008388701, -0.009652335, -0.0284896, 0.008...",\nDISH DESCRIPTION:\nautumn sweet potato or p...
4,73265,"[0.0035407434, 0.027416365, -0.05761968, -0.06...",\nDISH DESCRIPTION:\nautumn tea\n\nfrom taste ...
...,...,...,...
20904,421747,"[0.022771403, 0.017633244, -0.0050613196, 0.04...",\nDISH DESCRIPTION:\nbetter batter rye bread\n...
20905,162447,"[-0.017486883, -0.00070857815, -0.003457962, 0...",\nDISH DESCRIPTION:\nbetter beans on toast\n\n...
20906,229170,"[0.030263178, 0.00801071, 0.016603831, 0.06642...",\nDISH DESCRIPTION:\nbetter beer bread\n\nwith...
20907,384220,"[0.012995133, 0.019628698, 0.012931913, 0.0004...",\nDISH DESCRIPTION:\nbetter beet butter\n\nthi...
