In [14]:
import sys
import os
from os.path import join as osp

script_dir = os.getcwd()
parent_dir = os.path.abspath(os.path.join(script_dir, '..'))
grand_dir = os.path.abspath(os.path.join(parent_dir, '..'))
sys.path.extend([parent_dir, grand_dir])
from constant import *

In [13]:
import pandas as pd
import ast
from transformers import AutoTokenizer, OpenAIGPTModel
import torch

tokenizer = AutoTokenizer.from_pretrained("openai-community/openai-gpt")
class Node:
    def __init__(self, value):
        self.value = value
        self.left = None
        self.right = None

def insert(root, value):
    if root is None:
        return Node(value)
    if value < root.value:
        root.left = insert(root.left, value)
    elif value > root.value:
        root.right = insert(root.right, value)
    return root

def build_bst(lst):
    root = None
    for sublist in lst:
        for item in sublist:
            root = insert(root, item)
    return root

def inorder_traversal(node, result_set):
    if node is not None:
        inorder_traversal(node.left, result_set)
        result_set.add(node.value)
        inorder_traversal(node.right, result_set)

def set_from_list_column(df, column_name):
    unique_set = set()
    root = build_bst(df[column_name])
    inorder_traversal(root, unique_set)
    return unique_set

def parse_string_to_list(string):
    result = ast.literal_eval(string)
    if isinstance(result[0], list):
        result = tokenizer.batch_decode(result)
    else:
        result = tokenizer.decode(result)
    return result


In [15]:
data_interactions_train = pd.read_csv(osp(PROJECT_ROOT, 'data/interactions_train.csv'))
data_interactions_train_rate = data_interactions_train.drop(columns=["user_id", "date", "recipe_id", "u"])
data_interactions_train_rate = data_interactions_train_rate.groupby("i", as_index=False).mean()
data_interactions_train_rate

Unnamed: 0,i,rating
0,0,4.50
1,1,4.00
2,2,5.00
3,3,3.80
4,4,4.50
...,...,...
160896,178255,5.00
160897,178256,5.00
160898,178257,5.00
160899,178261,5.00


In [16]:
data_pp_recipes = pd.read_csv(osp(PROJECT_ROOT, r'data/PP_recipes.csv'))
data_pp_recipes = data_pp_recipes.drop(columns=["name_tokens", "ingredient_tokens", "steps_tokens", "techniques", "ingredient_ids"])
data_pp_recipes

Unnamed: 0,id,i,calorie_level
0,424415,23,0
1,146223,96900,0
2,312329,120056,1
3,74301,168258,0
4,76272,109030,0
...,...,...,...
178260,323143,76862,1
178261,149114,145962,0
178262,34200,65066,2
178263,30618,77358,0


In [17]:
merged_df_1 = pd.merge(data_interactions_train_rate, data_pp_recipes, on='i', how='inner')
merged_df_1

Unnamed: 0,i,rating,id,calorie_level
0,0,4.50,40893,0
1,1,4.00,44394,0
2,2,5.00,85009,2
3,3,3.80,134728,1
4,4,4.50,200236,2
...,...,...,...,...
160896,178255,5.00,40514,0
160897,178256,5.00,190261,0
160898,178257,5.00,290157,0
160899,178261,5.00,492861,0


In [18]:
data_raw_recipes = pd.read_csv(osp(PROJECT_ROOT, r'data/RAW_recipes.csv'))
data_raw_recipes["tags"] = data_raw_recipes["tags"].map(ast.literal_eval)
merged_df_2 = pd.merge(merged_df_1, data_raw_recipes, on='id', how='inner').drop(columns=["contributor_id", "submitted"])
merged_df_2

Unnamed: 0,i,rating,id,calorie_level,name,minutes,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
0,0,4.50,40893,0,white bean green chile pepper soup,495,"[weeknight, time-to-make, course, main-ingredi...","[204.8, 5.0, 9.0, 26.0, 24.0, 2.0, 10.0]",4,"['combine beans , onion , chilies , 1 / 2 teas...",easy soup for the crockpot.,"['great northern beans', 'yellow onion', 'dice...",9
1,1,4.00,44394,0,devilicious cookie cake delights,20,"[30-minutes-or-less, time-to-make, course, mai...","[132.3, 11.0, 39.0, 5.0, 4.0, 11.0, 5.0]",5,"['blend together cake mix , oil and eggs', 'ad...",,"[""devil's food cake mix"", 'vegetable oil', 'eg...",4
2,2,5.00,85009,2,baked potato toppings,10,"[15-minutes-or-less, time-to-make, course, mai...","[2786.2, 342.0, 134.0, 290.0, 161.0, 301.0, 42.0]",3,['pick whichever topping you want to use and c...,these toppings sure makes a nice change from p...,"['mayonnaise', 'salsa', 'cheddar cheese', 'ref...",13
3,3,3.80,134728,1,kfc honey bbq strips,40,"[60-minutes-or-less, time-to-make, main-ingred...","[316.0, 4.0, 40.0, 37.0, 78.0, 4.0, 10.0]",10,"['mix flour , salt and pepper in bowl', 'set a...",these are so yummy and they do taste just like...,"['chicken tenders', 'flour', 'garlic powder', ...",12
4,4,4.50,200236,2,lamb stew with tomatoes chickpeas and spices,150,"[time-to-make, course, main-ingredient, cuisin...","[606.5, 65.0, 12.0, 34.0, 65.0, 83.0, 7.0]",14,"['heat oven to 250', 'toss lamb with salt and ...",north african spices with a basic meat stew re...,"['lamb shoulder', 'salt', 'ground black pepper...",16
...,...,...,...,...,...,...,...,...,...,...,...,...,...
160896,178255,5.00,40514,0,sun dried tomato bruschetta with goat cheese,15,"[15-minutes-or-less, time-to-make, course, mai...","[94.6, 5.0, 4.0, 8.0, 8.0, 10.0, 3.0]",17,"['preheat broiler', 'slice your bread , if you...",these are good. if you haven't tried goat chee...,"['goat cheese', 'italian bread', 'sun-dried to...",5
160897,178256,5.00,190261,0,anise carrots,20,"[30-minutes-or-less, time-to-make, course, mai...","[210.3, 18.0, 55.0, 13.0, 3.0, 36.0, 8.0]",7,['cook / steam carrots in salted water until c...,anise and carrots go so well together.,"['carrot', 'onion', 'butter', 'anise', 'aprico...",7
160898,178257,5.00,290157,0,mediterranean spice mix,5,"[15-minutes-or-less, time-to-make, course, pre...","[10.9, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0]",4,"['mix all ingredients in a bowl', 'sprinkle on...","sprinkle dry on new baby potatoes, meat or mix...","['dried rosemary', 'ground cumin', 'ground cor...",6
160899,178261,5.00,492861,0,omani coffee,13,"[15-minutes-or-less, time-to-make, course, cui...","[16.3, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]",13,['preparing the beans: roast the ground beans ...,the omani people are well known for their hosp...,"['water', 'coffee', 'ground cardamom', 'whole ...",4
