In [17]:
import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import os
import json
import re
import nltk
import zipfile
import ast

from datetime import datetime
from sklearn.preprocessing import LabelEncoder
from nltk.stem import WordNetLemmatizer

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.feature_extraction.text import CountVectorizer

from deep_translator import GoogleTranslator

In [9]:
from openai import OpenAI
from dotenv import load_dotenv
from collections import defaultdict

import os
import re
import csv

In [4]:
# i already tuned and stored the results
with open('logit_us.pkl', 'rb') as file:
    clf = pickle.load(file)

In [10]:
# Load API key from .env file
load_dotenv()

key = os.getenv('OPENAI_API_KEY')
if key is None:
    raise ValueError("The OPENAI_API_KEY environment variable is not set \
                     or .env file is missing.")

client = OpenAI(
    api_key=key
)

In [11]:
def call_openai_api(user_prompt, system_prompt, n_runs=1, model="gpt-4-turbo-2024-04-09"):
    responses = []
    for run_number in range(1, n_runs + 1):
        completion = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt},
            ],
        )
        response_content = completion.choices[0].message.content
        print(response_content)
        print("========================================next call")
        responses.append(response_content)
    return responses

In [12]:
def load_prompt_from_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return file.read()

def generate_user_prompt(file_path):
    return load_prompt_from_file(file_path)

def generate_system_prompt(file_path):
    return load_prompt_from_file(file_path)


In [31]:
def create_dataframe_from_responses(responses):
    dishes = []
    error_responses = []
    
    for i, response in enumerate(responses):
        try:
            response_list = ast.literal_eval(response)  # Safely evaluate the string representation of the list
            for dish in response_list:
                dishes.append(dish)
        except (ValueError, SyntaxError) as e:
            print(f"Error parsing response at index {i}: {e}")
            error_responses.append((i, response))

    # Create DataFrame and filter necessary columns
    df = pd.DataFrame(dishes)
    if 'ingredients' in df.columns:
        df = df[['name', 'ingredients']]
    else:
        print("Ingredients column not found in the response.")
    
    # Convert all strings to lowercase
    df['name'] = df['name'].str.lower()
    df['ingredients'] = df['ingredients'].apply(lambda x: [ingredient.lower() for ingredient in x])
    
    return df, error_responses


In [329]:
def create_dataframe_from_responses_2(responses):
    dishes = []
    error_responses = []
    
    for i, response in enumerate(responses):
        try:
            # Safely evaluate the string representation of the list
            response_list = ast.literal_eval(response)
            for dish in response_list:
                if 'mga sangkap' in dish:
                    dish['ingredients'] = dish.pop('mga sangkap')
            dishes.extend(response_list)  # Add all normalized dishes to the list
        except (ValueError, SyntaxError) as e:
            print(f"Error parsing response at index {i}: {e}")
            error_responses.append((i, response))

    # Create DataFrame from the list of dishes
    df = pd.DataFrame(dishes)
    
    # Check if the necessary columns are present
    if 'ingredients' in df.columns:
        # Select only required columns and handle ingredients' case
        df = df[['name', 'ingredients']]
        df['name'] = df['name'].str.lower()
        # Ensure ingredients is a list before applying the function
        df['ingredients'] = df['ingredients'].apply(lambda x: [ingredient.lower() for ingredient in x] if isinstance(x, list) else x)
    else:
        print("Ingredients column not found in the responses.")
    
    return df, error_responses

In [99]:
def translate_to_eng(responses):
    
    translator = GoogleTranslator(source='auto', target='en')
    translated_texts = []

    for response in responses:
        # Ensure the text does not exceed the 5000 character limit
        if len(response) <= 5000:
            try:
                # Translate the text and convert to lowercase
                translated = translator.translate(response).lower()
                translated_texts.append(translated)
            except Exception as e:
                print(f"Failed to translate text due to: {e}")
                translated_texts.append("Translation failed")
        else:
            print("Text too long to translate:", response)
            translated_texts.append("Text too long and was not translated")

    return translated_texts

### Collect English Version

In [8]:
user_prompt_path = 'user_prompt.txt'
system_prompt_path = 'system_prompt.txt'

In [9]:
user_prompt = generate_user_prompt(user_prompt_path)
system_prompt = generate_system_prompt(system_prompt_path)

In [10]:
eng_resp = call_openai_api(user_prompt, system_prompt, 30)
eng_resp

[{'name': 'quinoa salad',
  'ingredients': ['quinoa', 'cherry tomatoes', 'cucumber', 'feta cheese', 'olive oil', 'lemon juice', 'fresh parsley']},
 {'name': 'vegetable stir-fry',
  'ingredients': ['broccoli', 'carrots', 'bell peppers', 'soy sauce', 'ginger', 'garlic', 'sesame oil', 'tofu']},
 {'name': 'smoothie bowl',
  'ingredients': ['frozen berries', 'banana', 'spinach', 'almond milk', 'chia seeds', 'granola', 'coconut flakes']},
 {'name': 'lentil soup',
  'ingredients': ['lentils', 'carrots', 'celery', 'tomatoes', 'onion', 'garlic', 'vegetable broth', 'thyme']},
 {'name': 'chickpea curry',
  'ingredients': ['chickpeas', 'coconut milk', 'onion', 'tomatoes', 'curry powder', 'spinach', 'rice']},
 {'gluten-free oat pancakes',
  'ingredients': ['gluten-free oats', 'banana', 'eggs', 'baking powder', 'vanilla extract', 'maple syrup']},
 {'name': 'kale salad',
  'ingredients': ['kale', 'avocado', 'almonds', 'parmesan cheese', 'lemon juice', 'olive oil', 'black pepper']},
 {'name': 'baked s

["[{'name': 'quinoa salad',\n  'ingredients': ['quinoa', 'cherry tomatoes', 'cucumber', 'feta cheese', 'olive oil', 'lemon juice', 'fresh parsley']},\n {'name': 'vegetable stir-fry',\n  'ingredients': ['broccoli', 'carrots', 'bell peppers', 'soy sauce', 'ginger', 'garlic', 'sesame oil', 'tofu']},\n {'name': 'smoothie bowl',\n  'ingredients': ['frozen berries', 'banana', 'spinach', 'almond milk', 'chia seeds', 'granola', 'coconut flakes']},\n {'name': 'lentil soup',\n  'ingredients': ['lentils', 'carrots', 'celery', 'tomatoes', 'onion', 'garlic', 'vegetable broth', 'thyme']},\n {'name': 'chickpea curry',\n  'ingredients': ['chickpeas', 'coconut milk', 'onion', 'tomatoes', 'curry powder', 'spinach', 'rice']},\n {'gluten-free oat pancakes',\n  'ingredients': ['gluten-free oats', 'banana', 'eggs', 'baking powder', 'vanilla extract', 'maple syrup']},\n {'name': 'kale salad',\n  'ingredients': ['kale', 'avocado', 'almonds', 'parmesan cheese', 'lemon juice', 'olive oil', 'black pepper']},\n {

In [16]:
# with open('eng_resp.pkl', 'wb') as f:
#     pickle.dump(eng_resp, f)

In [5]:
with open('eng_resp.pkl', 'rb') as f:
    eng_resp = pickle.load(f)

In [6]:
eng_resp

["[{'name': 'quinoa salad',\n  'ingredients': ['quinoa', 'cherry tomatoes', 'cucumber', 'feta cheese', 'olive oil', 'lemon juice', 'fresh parsley']},\n {'name': 'vegetable stir-fry',\n  'ingredients': ['broccoli', 'carrots', 'bell peppers', 'soy sauce', 'ginger', 'garlic', 'sesame oil', 'tofu']},\n {'name': 'smoothie bowl',\n  'ingredients': ['frozen berries', 'banana', 'spinach', 'almond milk', 'chia seeds', 'granola', 'coconut flakes']},\n {'name': 'lentil soup',\n  'ingredients': ['lentils', 'carrots', 'celery', 'tomatoes', 'onion', 'garlic', 'vegetable broth', 'thyme']},\n {'name': 'chickpea curry',\n  'ingredients': ['chickpeas', 'coconut milk', 'onion', 'tomatoes', 'curry powder', 'spinach', 'rice']},\n {'gluten-free oat pancakes',\n  'ingredients': ['gluten-free oats', 'banana', 'eggs', 'baking powder', 'vanilla extract', 'maple syrup']},\n {'name': 'kale salad',\n  'ingredients': ['kale', 'avocado', 'almonds', 'parmesan cheese', 'lemon juice', 'olive oil', 'black pepper']},\n {

In [32]:
eng_df, eng_err = create_dataframe_from_responses(eng_resp)

Error parsing response at index 0: invalid syntax (<unknown>, line 12)
Error parsing response at index 18: invalid character '‘' (U+2018) (<unknown>, line 2)
Error parsing response at index 28: unexpected indent (<unknown>, line 21)


In [33]:
eng_df

Unnamed: 0,name,ingredients
0,quinoa salad,"[quinoa, cucumber, cherry tomatoes, red onion,..."
1,avocado toast,"[whole grain bread, ripe avocado, lemon juice,..."
2,smoothie bowl,"[frozen berries, banana, spinach, almond milk,..."
3,chickpea curry,"[chickpeas, coconut milk, onion, tomato, garli..."
4,stuffed bell peppers,"[bell peppers, quinoa, black beans, corn, toma..."
...,...,...
265,spinach and feta stuffed chicken,"[chicken breasts, spinach, feta cheese, garlic..."
266,baked salmon,"[salmon fillets, lemon, garlic, olive oil, ros..."
267,avocado toast,"[whole grain bread, avocado, lemon juice, salt..."
268,kale salad,"[kale, avocado, almonds, parmesan cheese, lemo..."


In [41]:
eng_err

[(0,
  "[{'name': 'quinoa salad',\n  'ingredients': ['quinoa', 'cherry tomatoes', 'cucumber', 'feta cheese', 'olive oil', 'lemon juice', 'fresh parsley']},\n {'name': 'vegetable stir-fry',\n  'ingredients': ['broccoli', 'carrots', 'bell peppers', 'soy sauce', 'ginger', 'garlic', 'sesame oil', 'tofu']},\n {'name': 'smoothie bowl',\n  'ingredients': ['frozen berries', 'banana', 'spinach', 'almond milk', 'chia seeds', 'granola', 'coconut flakes']},\n {'name': 'lentil soup',\n  'ingredients': ['lentils', 'carrots', 'celery', 'tomatoes', 'onion', 'garlic', 'vegetable broth', 'thyme']},\n {'name': 'chickpea curry',\n  'ingredients': ['chickpeas', 'coconut milk', 'onion', 'tomatoes', 'curry powder', 'spinach', 'rice']},\n {'gluten-free oat pancakes',\n  'ingredients': ['gluten-free oats', 'banana', 'eggs', 'baking powder', 'vanilla extract', 'maple syrup']},\n {'name': 'kale salad',\n  'ingredients': ['kale', 'avocado', 'almonds', 'parmesan cheese', 'lemon juice', 'olive oil', 'black pepper']

In [85]:
all_dishes = []

# Regex to find dishes with different 'name' labels
dish_pattern = r"\{\s*'(?:m|the )?name'\s*:\s*'([^']+)',\s*'ingredients'\s*:\s*\[([^]]+)\]\}"

# Process each item in the data list
for _, dishes_str in eng_err:
    # Adjust quotes for matching
    adjusted_str = dishes_str.replace('"', "'")
    # Find all matches in the current string
    matches = re.findall(dish_pattern, adjusted_str)
    if not matches:
        print("No matches found in:", dishes_str)
    for match in matches:
        dish_name = match[0]
        # Remove extra quotes and split ingredients into a list
        ingredients_list = [ingredient.strip().strip("'") for ingredient in match[1].split(',')]
        all_dishes.append({'name': dish_name, 'ingredients': ingredients_list})

In [86]:
eng_err_df = pd.DataFrame(all_dishes)

In [87]:
eng_err_df

Unnamed: 0,name,ingredients
0,quinoa salad,"[quinoa, cherry tomatoes, cucumber, feta chees..."
1,vegetable stir-fry,"[broccoli, carrots, bell peppers, soy sauce, g..."
2,smoothie bowl,"[frozen berries, banana, spinach, almond milk,..."
3,lentil soup,"[lentils, carrots, celery, tomatoes, onion, ga..."
4,chickpea curry,"[chickpeas, coconut milk, onion, tomatoes, cur..."
5,kale salad,"[kale, avocado, almonds, parmesan cheese, lemo..."
6,baked salmon,"[salmon fillets, lemon, garlic, dill, olive oi..."
7,zucchini noodles,"[zucchini, cherry tomatoes, pesto sauce, parme..."
8,cauliflower rice,"[cauliflower, onion, garlic, peas, carrots, so..."
9,quinoa salad,"[quinoa, cherry tomatoes, cucumber, ‘red onion..."


In [88]:
eng_result_df = pd.concat([eng_df, eng_err_df], axis=0)

In [89]:
eng_result_df

Unnamed: 0,name,ingredients
0,quinoa salad,"[quinoa, cucumber, cherry tomatoes, red onion,..."
1,avocado toast,"[whole grain bread, ripe avocado, lemon juice,..."
2,smoothie bowl,"[frozen berries, banana, spinach, almond milk,..."
3,chickpea curry,"[chickpeas, coconut milk, onion, tomato, garli..."
4,stuffed bell peppers,"[bell peppers, quinoa, black beans, corn, toma..."
...,...,...
23,lentil soup,"[lentils, carrots, celery, onions, tomatoes, v..."
24,cauliflower rice,"[cauliflower, onions, garlic, olive oil, salt,..."
25,baked salmon,"[salmon fillets, lemon, dill, olive oil, salt,..."
26,chicken avocado salad,"[chicken breast, avocado, corn, cilantro, lime..."


In [90]:
new_data = pd.DataFrame({
    'name': ['gluten-free oat pancakes', 'kale chips'],
    'ingredients': [
        ['gluten-free oats', 'banana', 'eggs', 'baking powder', 'vanilla extract', 'maple syrup'],
        ['kale', 'olive oil', 'salt']
    ]
})

In [92]:
eng_result_df = pd.concat([eng_result_df, new_data], axis=0)

In [95]:
eng_result_df = eng_result_df.reset_index(drop=True)

In [96]:
eng_result_df

Unnamed: 0,name,ingredients
0,quinoa salad,"[quinoa, cucumber, cherry tomatoes, red onion,..."
1,avocado toast,"[whole grain bread, ripe avocado, lemon juice,..."
2,smoothie bowl,"[frozen berries, banana, spinach, almond milk,..."
3,chickpea curry,"[chickpeas, coconut milk, onion, tomato, garli..."
4,stuffed bell peppers,"[bell peppers, quinoa, black beans, corn, toma..."
...,...,...
295,baked salmon,"[salmon fillets, lemon, dill, olive oil, salt,..."
296,chicken avocado salad,"[chicken breast, avocado, corn, cilantro, lime..."
297,smoothie bowl,"[frozen berries, banana, spinach, almond milk,..."
298,gluten-free oat pancakes,"[gluten-free oats, banana, eggs, baking powder..."


In [434]:
eng_result_df.reset_index(drop=True, inplace=True)

In [435]:
eng_result_df.to_pickle("../dataset/eng_result.pkl")

### Collect Chinese Version

In [11]:
user_prompt_chn = generate_user_prompt('user_prompt_chn.txt')
system_prompt_chn = generate_system_prompt('system_prompt_chn.txt')

In [12]:
chn_resp = call_openai_api(user_prompt_chn, system_prompt_chn, 30)
chn_resp

[{'名字': '绿色蔬菜沙拉', 
  '成分': ['菠菜', '羽衣甘蓝', '小黄瓜', '樱桃番茄', '牛油果', '柠檬汁', '橄榄油', '盐', '黑胡椒']},
 {'名字': '鹰嘴豆泥',
  '成分': ['鹰嘴豆', '大蒜', '芝麻酱', '橄榄油', '柠檬汁', '盐', '辣椒粉']},
 {'名字': '烤鲑鱼',
  '成分': ['鲑鱼', '橄榄油', '柠檬', '迷迭香', '盐', '黑胡檒']},
 {'名字': '蔬菜汤',
  '成分': ['胡萝卜', '洋葱', '大蒜', '芹菜', '西兰花', '番茄', '水', '盐', '胡椒粉']},
 {'名字': '燕麦早餐',
  '成分': ['燕麦', '牛奶', '水果', '蜂蜜', '坚果']},
 {'名字': '素食卷',
  '成分': ['大米纸', '生菜', '胡萝卜丝', '黄瓜丝', '牛油果', '香菜', '蘸水']},
 {'名字': '鸡胸肉沙拉',
  '成分': ['鸡胸肉', '生菜', '番茄', '黄瓜', '鳄梨', '橄榄油', '红酒醋', '盐', '黑胡椒']},
 {'名字': '藜麦拌饭',
  '成分': ['藜麦', '红椒', '黄椒', '黑豆', '玉米', '香菜', '青柠汁', '辣椒粉']},
 {'名字': '水果酸奶',
  '成分': ['希腊酸奶', '新鲜水果', '蜂蜜', '坚果']},
 {'名字': '番茄烤鸡',
  '成分': ['鸡腿肉', '橄榄油', '番茄', '洋葱', '大蒜', '罗勒', '盐', '胡椒']}]

[{'名字': '蒸鲈鱼',
  '成分': ['鲈鱼', '生姜', '葱', '酱油', '料酒', '香油']},

 {'名字': '番茄炒蛋',
  '成分': ['鸡蛋', '新鲜番茄', '盐', '糖', '大蒜', '葱']},

 {'名字': '炒蘑菇菠菜',
  '成分': ['菠菜', '蘑菇', '大蒜', '橄榄油', '盐']},

 {'名字': '酸辣汤',
  '成分': ['猪肉细丝', '木耳', '豆芽', '鸡蛋', '葱', '香菜', '盐', '醋', '辣椒油']},

 

["[{'名字': '绿色蔬菜沙拉', \n  '成分': ['菠菜', '羽衣甘蓝', '小黄瓜', '樱桃番茄', '牛油果', '柠檬汁', '橄榄油', '盐', '黑胡椒']},\n {'名字': '鹰嘴豆泥',\n  '成分': ['鹰嘴豆', '大蒜', '芝麻酱', '橄榄油', '柠檬汁', '盐', '辣椒粉']},\n {'名字': '烤鲑鱼',\n  '成分': ['鲑鱼', '橄榄油', '柠檬', '迷迭香', '盐', '黑胡檒']},\n {'名字': '蔬菜汤',\n  '成分': ['胡萝卜', '洋葱', '大蒜', '芹菜', '西兰花', '番茄', '水', '盐', '胡椒粉']},\n {'名字': '燕麦早餐',\n  '成分': ['燕麦', '牛奶', '水果', '蜂蜜', '坚果']},\n {'名字': '素食卷',\n  '成分': ['大米纸', '生菜', '胡萝卜丝', '黄瓜丝', '牛油果', '香菜', '蘸水']},\n {'名字': '鸡胸肉沙拉',\n  '成分': ['鸡胸肉', '生菜', '番茄', '黄瓜', '鳄梨', '橄榄油', '红酒醋', '盐', '黑胡椒']},\n {'名字': '藜麦拌饭',\n  '成分': ['藜麦', '红椒', '黄椒', '黑豆', '玉米', '香菜', '青柠汁', '辣椒粉']},\n {'名字': '水果酸奶',\n  '成分': ['希腊酸奶', '新鲜水果', '蜂蜜', '坚果']},\n {'名字': '番茄烤鸡',\n  '成分': ['鸡腿肉', '橄榄油', '番茄', '洋葱', '大蒜', '罗勒', '盐', '胡椒']}]\n",
 "[{'名字': '蒸鲈鱼',\n  '成分': ['鲈鱼', '生姜', '葱', '酱油', '料酒', '香油']},\n\n {'名字': '番茄炒蛋',\n  '成分': ['鸡蛋', '新鲜番茄', '盐', '糖', '大蒜', '葱']},\n\n {'名字': '炒蘑菇菠菜',\n  '成分': ['菠菜', '蘑菇', '大蒜', '橄榄油', '盐']},\n\n {'名字': '酸辣汤',\n  '成分': ['猪肉细丝', '木耳', '豆芽', '鸡

In [17]:
# with open('chn_resp.pkl', 'wb') as f:
#     pickle.dump(chn_resp, f)

In [101]:
with open('responses/chn_resp.pkl', 'rb') as f:
    chn_resp = pickle.load(f)

In [102]:
chn_resp

["[{'名字': '绿色蔬菜沙拉', \n  '成分': ['菠菜', '羽衣甘蓝', '小黄瓜', '樱桃番茄', '牛油果', '柠檬汁', '橄榄油', '盐', '黑胡椒']},\n {'名字': '鹰嘴豆泥',\n  '成分': ['鹰嘴豆', '大蒜', '芝麻酱', '橄榄油', '柠檬汁', '盐', '辣椒粉']},\n {'名字': '烤鲑鱼',\n  '成分': ['鲑鱼', '橄榄油', '柠檬', '迷迭香', '盐', '黑胡檒']},\n {'名字': '蔬菜汤',\n  '成分': ['胡萝卜', '洋葱', '大蒜', '芹菜', '西兰花', '番茄', '水', '盐', '胡椒粉']},\n {'名字': '燕麦早餐',\n  '成分': ['燕麦', '牛奶', '水果', '蜂蜜', '坚果']},\n {'名字': '素食卷',\n  '成分': ['大米纸', '生菜', '胡萝卜丝', '黄瓜丝', '牛油果', '香菜', '蘸水']},\n {'名字': '鸡胸肉沙拉',\n  '成分': ['鸡胸肉', '生菜', '番茄', '黄瓜', '鳄梨', '橄榄油', '红酒醋', '盐', '黑胡椒']},\n {'名字': '藜麦拌饭',\n  '成分': ['藜麦', '红椒', '黄椒', '黑豆', '玉米', '香菜', '青柠汁', '辣椒粉']},\n {'名字': '水果酸奶',\n  '成分': ['希腊酸奶', '新鲜水果', '蜂蜜', '坚果']},\n {'名字': '番茄烤鸡',\n  '成分': ['鸡腿肉', '橄榄油', '番茄', '洋葱', '大蒜', '罗勒', '盐', '胡椒']}]\n",
 "[{'名字': '蒸鲈鱼',\n  '成分': ['鲈鱼', '生姜', '葱', '酱油', '料酒', '香油']},\n\n {'名字': '番茄炒蛋',\n  '成分': ['鸡蛋', '新鲜番茄', '盐', '糖', '大蒜', '葱']},\n\n {'名字': '炒蘑菇菠菜',\n  '成分': ['菠菜', '蘑菇', '大蒜', '橄榄油', '盐']},\n\n {'名字': '酸辣汤',\n  '成分': ['猪肉细丝', '木耳', '豆芽', '鸡

In [104]:
trans_chn_resp = translate_to_eng(chn_resp)

In [105]:
trans_chn_resp

["[{'name': 'green salad', \n'ingredients': ['spinach', 'kale', 'cucumber', 'cherry tomatoes', 'avocado', 'lemon juice', 'olive oil', 'salt', 'black pepper']},\n{'name': 'hummus',\n'ingredients': ['chickpeas', 'garlic', 'tahini', 'olive oil', 'lemon juice', 'salt', 'chili powder']},\n{'name': 'grilled salmon',\n'ingredients': ['salmon', 'olive oil', 'lemon', 'rosemary', 'salt', 'black bean']},\n{'name': 'vegetable soup',\n'ingredients': ['carrot', 'onion', 'garlic', 'celery', 'broccoli', 'tomato', 'water', 'salt', 'pepper']},\n{'name': 'oatmeal breakfast',\n'ingredients': ['oatmeal', 'milk', 'fruit', 'honey', 'nuts']},\n{'name': 'vegetarian wrap',\n'ingredients': ['rice paper', 'lettuce', 'shredded carrot', 'shredded cucumber', 'avocado', 'coriander', 'dipping water']},\n{'name': 'chicken breast salad',\n'ingredients': ['chicken breast', 'lettuce', 'tomato', 'cucumber', 'avocado', 'olive oil', 'red wine vinegar', 'salt', 'black pepper']},\n{'name': 'quinoa rice',\n'ingredients': ['quin

In [111]:
chn_df, chn_err = create_dataframe_from_responses_2(trans_chn_resp)

Error parsing response at index 3: invalid syntax (<unknown>, line 1)
Error parsing response at index 8: closing parenthesis ']' does not match opening parenthesis '{' (<unknown>, line 1)


In [112]:
chn_df

Unnamed: 0,name,ingredients
0,green salad,"[spinach, kale, cucumber, cherry tomatoes, avo..."
1,hummus,"[chickpeas, garlic, tahini, olive oil, lemon j..."
2,grilled salmon,"[salmon, olive oil, lemon, rosemary, salt, bla..."
3,vegetable soup,"[carrot, onion, garlic, celery, broccoli, toma..."
4,oatmeal breakfast,"[oatmeal, milk, fruit, honey, nuts]"
...,...,...
275,healthy avocado salad,"[avocado, pomegranate seeds, green lettuce, wa..."
276,mixed bean stew,"[seven kinds of beans, tomato, onion, ginger, ..."
277,corn and cucumber salad,"[corn, cucumber, red pepper, onion, green pepp..."
278,mushrooms with eggs,"[mushrooms, eggs, onions, garlic, olive oil, s..."


In [113]:
chn_err

[(3,
  "here are the ingredient lists for 10 healthy recipes:\n\n[{'name': 'avocado salad',\n'ingredients': ['avocado', 'tomato', 'purple onion', 'cucumber', 'lemon juice', 'olive oil', 'salt', 'black pepper']},\n{'name': 'garlic grilled chicken',\n'ingredients': ['chicken breast', 'olive oil', 'garlic', 'lemon juice', 'rosemary', 'salt', 'black pepper']},\n{'name': 'grilled sweet potato',\n'ingredients': ['sweet potato', 'olive oil', 'chili powder', 'honey', 'lemon juice', 'salt']},\n{'name': 'boiled eggs',\n'ingredients': ['eggs', 'water']},\n{'name': 'caesar salad',\n'ingredients': ['romaine lettuce', 'parmesan cheese', 'whole wheat bread crumbs', 'olive oil', 'lemon juice', 'garlic', 'chili', 'rosemary', 'salt', 'black pepper']},\n{'name': 'tofu stir-fry',\n'ingredients': ['tofu', 'carrot', 'sweet peas', 'green pepper', 'soy sauce', 'ginger', 'garlic', 'sesame oil']},\n{'name': 'green bean soup',\n'ingredients': ['green bean', 'cilantro', 'ginger', 'garlic', 'onion', 'vegetable oil

In [114]:
all_dishes = []

# Regex to find dishes with different 'name' labels
dish_pattern = r"\{\s*'(?:m|the )?name'\s*:\s*'([^']+)',\s*'ingredients'\s*:\s*\[([^]]+)\]\}"

# Process each item in the data list
for _, dishes_str in chn_err:
    # Adjust quotes for matching
    adjusted_str = dishes_str.replace('"', "'")
    # Find all matches in the current string
    matches = re.findall(dish_pattern, adjusted_str)
    if not matches:
        print("No matches found in:", dishes_str)
    for match in matches:
        dish_name = match[0]
        # Remove extra quotes and split ingredients into a list
        ingredients_list = [ingredient.strip().strip("'") for ingredient in match[1].split(',')]
        all_dishes.append({'name': dish_name, 'ingredients': ingredients_list})

In [115]:
chn_err_df = pd.DataFrame(all_dishes)

In [116]:
chn_err_df

Unnamed: 0,name,ingredients
0,avocado salad,"[avocado, tomato, purple onion, cucumber, lemo..."
1,garlic grilled chicken,"[chicken breast, olive oil, garlic, lemon juic..."
2,grilled sweet potato,"[sweet potato, olive oil, chili powder, honey,..."
3,boiled eggs,"[eggs, water]"
4,caesar salad,"[romaine lettuce, parmesan cheese, whole wheat..."
5,tofu stir-fry,"[tofu, carrot, sweet peas, green pepper, soy s..."
6,green bean soup,"[green bean, cilantro, ginger, garlic, onion, ..."
7,grilled salmon,"[salmon, olive oil, garlic, lemon, rosemary, s..."
8,cucumber juice,"[cucumber, apple, lemon, honey]"
9,nuts and grains bowl,"[brown rice, quinoa, walnut, almond, mung bean..."


In [117]:
new_data = pd.DataFrame({
    'name': ['quichou'],
    'ingredients': [
        ['oatmeal', 'honey', 'almonds', 'walnuts', 'yucca nectar', 'fresh blueberries']
    ]
})

In [118]:
chn_result_df = pd.concat([chn_df, chn_err_df], axis=0)
chn_result_df = pd.concat([chn_result_df, new_data], axis=0)
chn_result_df

Unnamed: 0,name,ingredients
0,green salad,"[spinach, kale, cucumber, cherry tomatoes, avo..."
1,hummus,"[chickpeas, garlic, tahini, olive oil, lemon j..."
2,grilled salmon,"[salmon, olive oil, lemon, rosemary, salt, bla..."
3,vegetable soup,"[carrot, onion, garlic, celery, broccoli, toma..."
4,oatmeal breakfast,"[oatmeal, milk, fruit, honey, nuts]"
...,...,...
15,grilled mackerel with lime,"[mackerel, lime, ginger slices, garlic, sour b..."
16,vegetable bean stew,"[bean, potato, carrot, cabbage, tomato, zucchi..."
17,roasted pumpkin salad with apples,"[pumpkin, apple, walnut, cranberry, jackfruit,..."
18,sugar-free cake,"[almond powder, coconut powder, vanilla extrac..."


In [424]:
with open('../dataset/chn_result.pkl', 'rb') as f:
    chn_result = pickle.load(f)

In [483]:
chn_result.reset_index(drop=True, inplace=True)

In [488]:
chn_result

Unnamed: 0,name,ingredients
0,green salad,"[spinach, kale, cucumber, cherry tomatoes, avo..."
1,hummus,"[chickpeas, garlic, tahini, olive oil, lemon j..."
2,grilled salmon,"[salmon, olive oil, lemon, rosemary, salt, bla..."
3,vegetable soup,"[carrot, onion, garlic, celery, broccoli, toma..."
4,oatmeal breakfast,"[oatmeal, milk, fruit, honey, nuts]"
...,...,...
295,grilled mackerel with lime,"[mackerel, lime, ginger slices, garlic, sour b..."
296,vegetable bean stew,"[bean, potato, carrot, cabbage, tomato, zucchi..."
297,roasted pumpkin salad with apples,"[pumpkin, apple, walnut, cranberry, jackfruit,..."
298,sugar-free cake,"[almond powder, coconut powder, vanilla extrac..."


In [485]:
float_rows = chn_result['ingredients'].apply(lambda x: isinstance(x, float))
rows_with_floats = chn_result[float_rows]
print("Rows containing floats:")
print(rows_with_floats)

Rows containing floats:
Empty DataFrame
Columns: [name, ingredients]
Index: []


In [428]:
# manually add back the ones that didn't get parsed successfully
# the ingredients exist in the translated text, just didn't get parsed
correct_ingredients = {
    56: ['chicken breast', 'olive oil', 'carrot', 'leek', 'sweet pepper', 'rosemary', 'black pepper', 'seasalt'],
    87: ['oatmeal', 'almond milk', 'walnut', 'hazelnut', 'honey', 'cinnamon', 'vanilla extract']
}

for idx, ingredients in correct_ingredients.items():
    chn_result.at[idx, 'ingredients'] = ingredients

In [486]:
chn_result.at[87, 'ingredients']

['oatmeal',
 'almond milk',
 'walnut',
 'hazelnut',
 'honey',
 'cinnamon',
 'vanilla extract']

In [490]:
chn_result.to_pickle("../dataset/chn_result.pkl")

### Collect Portuguese Version

In [13]:
user_prompt_port = generate_user_prompt('user_prompt_port.txt')
system_prompt_port = generate_system_prompt('system_prompt_port.txt')

In [18]:
port_resp = call_openai_api(user_prompt_port, system_prompt_port, 30)
port_resp

[{'nome': 'salada de quinoa com legumes',
  'ingredientes': ['quinoa', 'pepino', 'tomate cereja', 'pimentão', 'limão', 'azeite de oliva', 'sal', 'pimenta']},
 {'nome': 'smoothie verde',
  'ingredientes': ['espinafre', 'banana', 'manga', 'leite de amêndoas', 'mel', 'chia']},
 {'nome': 'omelete de claras com espinafre',
  'ingredientes': ['claras de ovo', 'espinafre', 'tomate', 'queijo feta', 'sal', 'pimenta']},
 {'nome': 'sopa de lentilha',
  'ingredientes': ['lentilhas', 'cenoura', 'cebola', 'alho', 'tomilho', 'caldo de vegetais', 'sal', 'pimenta']},
 {'nome': 'salada de frango grelhado',
  'ingredientes': ['peito de frango', 'alface romana', 'croutons', 'queijo parmesão', 'molho César', 'limão']},
 {'nome': 'tacos de peixe',
  'ingredientes': ['filetes de tilápia', 'pimentão', 'cebola roxa', 'coentro', 'tortilhas de milho', 'limão', 'sal']},
 {'nome': 'hambúrguer de grão-de-bico',
  'ingredientes': ['grão-de-bico', 'cebola', 'alho', 'salsinha', 'cominho', 'pão integral', 'tomate', 'al

["[{'nome': 'salada de quinoa com legumes',\n  'ingredientes': ['quinoa', 'pepino', 'tomate cereja', 'pimentão', 'limão', 'azeite de oliva', 'sal', 'pimenta']},\n {'nome': 'smoothie verde',\n  'ingredientes': ['espinafre', 'banana', 'manga', 'leite de amêndoas', 'mel', 'chia']},\n {'nome': 'omelete de claras com espinafre',\n  'ingredientes': ['claras de ovo', 'espinafre', 'tomate', 'queijo feta', 'sal', 'pimenta']},\n {'nome': 'sopa de lentilha',\n  'ingredientes': ['lentilhas', 'cenoura', 'cebola', 'alho', 'tomilho', 'caldo de vegetais', 'sal', 'pimenta']},\n {'nome': 'salada de frango grelhado',\n  'ingredientes': ['peito de frango', 'alface romana', 'croutons', 'queijo parmesão', 'molho César', 'limão']},\n {'nome': 'tacos de peixe',\n  'ingredientes': ['filetes de tilápia', 'pimentão', 'cebola roxa', 'coentro', 'tortilhas de milho', 'limão', 'sal']},\n {'nome': 'hambúrguer de grão-de-bico',\n  'ingredientes': ['grão-de-bico', 'cebola', 'alho', 'salsinha', 'cominho', 'pão integral'

In [19]:
# with open('port_resp.pkl', 'wb') as f:
#     pickle.dump(port_resp, f)

In [120]:
with open('responses/port_resp.pkl', 'rb') as f:
    port_resp = pickle.load(f)

In [121]:
port_resp

["[{'nome': 'salada de quinoa com legumes',\n  'ingredientes': ['quinoa', 'pepino', 'tomate cereja', 'pimentão', 'limão', 'azeite de oliva', 'sal', 'pimenta']},\n {'nome': 'smoothie verde',\n  'ingredientes': ['espinafre', 'banana', 'manga', 'leite de amêndoas', 'mel', 'chia']},\n {'nome': 'omelete de claras com espinafre',\n  'ingredientes': ['claras de ovo', 'espinafre', 'tomate', 'queijo feta', 'sal', 'pimenta']},\n {'nome': 'sopa de lentilha',\n  'ingredientes': ['lentilhas', 'cenoura', 'cebola', 'alho', 'tomilho', 'caldo de vegetais', 'sal', 'pimenta']},\n {'nome': 'salada de frango grelhado',\n  'ingredientes': ['peito de frango', 'alface romana', 'croutons', 'queijo parmesão', 'molho César', 'limão']},\n {'nome': 'tacos de peixe',\n  'ingredientes': ['filetes de tilápia', 'pimentão', 'cebola roxa', 'coentro', 'tortilhas de milho', 'limão', 'sal']},\n {'nome': 'hambúrguer de grão-de-bico',\n  'ingredientes': ['grão-de-bico', 'cebola', 'alho', 'salsinha', 'cominho', 'pão integral'

In [122]:
trans_port_resp = translate_to_eng(port_resp)

In [123]:
trans_port_resp

["[{'name': 'quinoa salad with vegetables',\n  'ingredients': ['quinoa', 'cucumber', 'cherry tomato', 'pepper', 'lemon', 'olive oil', 'salt', 'pepper']},\n {'name': 'green smoothie',\n  'ingredients': ['spinach', 'banana', 'mango', 'almond milk', 'honey', 'chia']},\n {'name': 'egg white omelet with spinach',\n  'ingredients': ['egg whites', 'spinach', 'tomato', 'feta cheese', 'salt', 'pepper']},\n {'name': 'lentil soup',\n  'ingredients': ['lentils', 'carrots', 'onions', 'garlic', 'thyme', 'vegetable broth', 'salt', 'pepper']},\n {'name': 'grilled chicken salad',\n  'ingredients': ['chicken breast', 'romaine lettuce', 'croutons', 'parmesan cheese', 'caesar sauce', 'lemon']},\n {'name': 'fish tacos',\n  'ingredients': ['tilapia fillets', 'peppers', 'red onion', 'cilantro', 'corn tortillas', 'lime', 'salt']},\n {'name': 'chickpea burger',\n  'ingredients': ['chickpeas', 'onion', 'garlic', 'parsley', 'cumin', 'whole grain bread', 'tomato', 'lettuce']},\n {'name': 'fruit chia pudding',\n  

In [124]:
port_df, port_err = create_dataframe_from_responses_2(trans_port_resp)

Error parsing response at index 22: unterminated string literal (detected at line 10) (<unknown>, line 10)


In [125]:
port_df

Unnamed: 0,name,ingredients
0,quinoa salad with vegetables,"[quinoa, cucumber, cherry tomato, pepper, lemo..."
1,green smoothie,"[spinach, banana, mango, almond milk, honey, c..."
2,egg white omelet with spinach,"[egg whites, spinach, tomato, feta cheese, sal..."
3,lentil soup,"[lentils, carrots, onions, garlic, thyme, vege..."
4,grilled chicken salad,"[chicken breast, romaine lettuce, croutons, pa..."
...,...,...
285,detox green juice,"[spinach, green apple, lemon, ginger, cucumber..."
286,chicken and avocado wrap,"[whole tortillas, chicken breast, avocado, let..."
287,beetroot and walnut salad,"[beetroot, nuts, goat cheese, arugula, balsami..."
288,night oats,"[rolled oats, almond milk, hiss, banana, honey..."


In [126]:
port_err

[(22,
  "[{'name': 'quinoa vegetable salad',\n  'ingredients': ['quinoa', 'cucumber', 'cherry tomato', 'red pepper', 'red onion', 'lemon', 'olive oil', 'cilantro', 'salt', 'pepper'] },\n {'name': 'spinach and banana smoothie',\n  'ingredients': ['spinach', 'banana', 'almond milk', 'greek yogurt', 'honey', 'chia seeds']},\n {'name': 'greek salad',\n  'ingredients': ['tomato', 'cucumber', 'red onion', 'kalamata olive', 'feta cheese', 'olive oil', 'balsamic vinegar', 'oregano']},\n {'name': 'avocado chicken wrap',\n  'ingredients': ['cooked chicken breast', 'avocado', 'tomato', 'lettuce', 'whole-grain tortillas', 'greek yogurt', 'lemon', 'cayenne pepper']},\n {'name': 'lentil soup',\n  'ingredients': ['lentils', 'carrots', 'onions', 'garlic', 'tomatoes', 'vegetable broth', 'cumin', 'paprika', 'salt', 'pepper', 'olive oil' olive']},\n {'name': 'vegetarian poke bowl',\n  'ingredients': ['brown rice', 'tofu', 'mango', 'avocado', 'cucumber', 'grated carrot', 'soy sauce', 'sesame oil', 'sesame

In [127]:
all_dishes = []

# Regex to find dishes with different 'name' labels
dish_pattern = r"\{\s*'(?:m|the )?name'\s*:\s*'([^']+)',\s*'ingredients'\s*:\s*\[([^]]+)\]\}"

# Process each item in the data list
for _, dishes_str in port_err:
    # Adjust quotes for matching
    adjusted_str = dishes_str.replace('"', "'")
    # Find all matches in the current string
    matches = re.findall(dish_pattern, adjusted_str)
    if not matches:
        print("No matches found in:", dishes_str)
    for match in matches:
        dish_name = match[0]
        # Remove extra quotes and split ingredients into a list
        ingredients_list = [ingredient.strip().strip("'") for ingredient in match[1].split(',')]
        all_dishes.append({'name': dish_name, 'ingredients': ingredients_list})

In [129]:
port_err_df = pd.DataFrame(all_dishes)
port_err_df

Unnamed: 0,name,ingredients
0,spinach and banana smoothie,"[spinach, banana, almond milk, greek yogurt, h..."
1,greek salad,"[tomato, cucumber, red onion, kalamata olive, ..."
2,avocado chicken wrap,"[cooked chicken breast, avocado, tomato, lettu..."
3,lentil soup,"[lentils, carrots, onions, garlic, tomatoes, v..."
4,vegetarian poke bowl,"[brown rice, tofu, mango, avocado, cucumber, g..."
5,guacamole,"[avocado, tomato, onion, cilantro, jalapeño, l..."
6,night oats,"[oats, almond milk, greek yogurt, honey, banan..."
7,açaí bowl,"[açaí pulp, banana, strawberry, granola, honey..."
8,tuna salad,"[tuna in water, light mayonnaise, celery, carr..."


In [130]:
new_data = pd.DataFrame({
    'name': ['quinoa vegetable salad'],
    'ingredients': [
        ['quinoa', 'cucumber', 'cherry tomato', 'red pepper', 'red onion', 'lemon', 'olive oil', 'cilantro', 'salt', 'pepper']
    ]
})

In [131]:
port_result_df = pd.concat([port_df, port_err_df], axis=0)
port_result_df = pd.concat([port_result_df, new_data], axis=0)
port_result_df

Unnamed: 0,name,ingredients
0,quinoa salad with vegetables,"[quinoa, cucumber, cherry tomato, pepper, lemo..."
1,green smoothie,"[spinach, banana, mango, almond milk, honey, c..."
2,egg white omelet with spinach,"[egg whites, spinach, tomato, feta cheese, sal..."
3,lentil soup,"[lentils, carrots, onions, garlic, thyme, vege..."
4,grilled chicken salad,"[chicken breast, romaine lettuce, croutons, pa..."
...,...,...
5,guacamole,"[avocado, tomato, onion, cilantro, jalapeño, l..."
6,night oats,"[oats, almond milk, greek yogurt, honey, banan..."
7,açaí bowl,"[açaí pulp, banana, strawberry, granola, honey..."
8,tuna salad,"[tuna in water, light mayonnaise, celery, carr..."


In [556]:
port_result_df.reset_index(drop=True, inplace=True)

In [557]:
port_result_df

Unnamed: 0,name,ingredients
0,quinoa salad with vegetables,"[quinoa, cucumber, cherry tomato, pepper, lemo..."
1,green smoothie,"[spinach, banana, mango, almond milk, honey, c..."
2,egg white omelet with spinach,"[egg whites, spinach, tomato, feta cheese, sal..."
3,lentil soup,"[lentils, carrots, onions, garlic, thyme, vege..."
4,grilled chicken salad,"[chicken breast, romaine lettuce, croutons, pa..."
...,...,...
295,guacamole,"[avocado, tomato, onion, cilantro, jalapeño, l..."
296,night oats,"[oats, almond milk, greek yogurt, honey, banan..."
297,açaí bowl,"[açaí pulp, banana, strawberry, granola, honey..."
298,tuna salad,"[tuna in water, light mayonnaise, celery, carr..."


In [558]:
port_result_df.to_pickle("../dataset/port_result.pkl")

### Collect Spanish Version

In [21]:
user_prompt_spanish = generate_user_prompt('user_prompt_spanish.txt')
system_prompt_spanish = generate_system_prompt('system_prompt_spanish.txt')

In [22]:
spanish_resp = call_openai_api(user_prompt_spanish, system_prompt_spanish, 30)
spanish_resp

[{'nombre': 'ensalada mediterránea',
  'ingredientes': ['tomates cherry', 'pepino', 'cebolla morada', 'queso feta', 'aceitunas kalamata', 'aceite de oliva', 'vinagre balsámico', 'oregano']},
 {'nombre': 'batido de kale y plátano',
  'ingredientes': ['kale', 'plátano', 'yogur griego', 'miel', 'almendras', 'leche de almendra']},
 {'nombre': 'quinoa con verduras',
  'ingredientes': ['quinoa', 'calabacín', 'pimiento rojo', 'cebolla', 'ajo', 'caldo de verduras', 'aceite de oliva']},
 {'nombre': 'sopa de lentejas',
  'ingredientes': ['lentejas', 'tomate', 'zanahoria', 'apio', 'cebolla', 'ajo', 'comino', 'caldo de verduras']},
 {'nombre': 'salteado de tofu',
  'ingredientes': ['tofu', 'brócoli', 'zanahoria', 'pimiento', 'salsa de soja', 'ajo', 'jengibre', 'aceite de sésamo']},
 {'nombre': 'avena nocturna',
  'ingredientes': ['avena', 'leche de almendra', 'chia', 'miel', 'frutos rojos']},
 {'nombre': 'ensalada de garbanzos',
  'ingredientes': ['garbanzos', 'pimiento verde', 'tomate', 'cebolla'

["[{'nombre': 'ensalada mediterránea',\n  'ingredientes': ['tomates cherry', 'pepino', 'cebolla morada', 'queso feta', 'aceitunas kalamata', 'aceite de oliva', 'vinagre balsámico', 'oregano']},\n {'nombre': 'batido de kale y plátano',\n  'ingredientes': ['kale', 'plátano', 'yogur griego', 'miel', 'almendras', 'leche de almendra']},\n {'nombre': 'quinoa con verduras',\n  'ingredientes': ['quinoa', 'calabacín', 'pimiento rojo', 'cebolla', 'ajo', 'caldo de verduras', 'aceite de oliva']},\n {'nombre': 'sopa de lentejas',\n  'ingredientes': ['lentejas', 'tomate', 'zanahoria', 'apio', 'cebolla', 'ajo', 'comino', 'caldo de verduras']},\n {'nombre': 'salteado de tofu',\n  'ingredientes': ['tofu', 'brócoli', 'zanahoria', 'pimiento', 'salsa de soja', 'ajo', 'jengibre', 'aceite de sésamo']},\n {'nombre': 'avena nocturna',\n  'ingredientes': ['avena', 'leche de almendra', 'chia', 'miel', 'frutos rojos']},\n {'nombre': 'ensalada de garbanzos',\n  'ingredientes': ['garbanzos', 'pimiento verde', 'tom

In [23]:
# with open('spanish_resp.pkl', 'wb') as f:
#     pickle.dump(spanish_resp, f)

In [133]:
with open('responses/spanish_resp.pkl', 'rb') as f:
    spanish_resp = pickle.load(f)

In [134]:
spanish_resp

["[{'nombre': 'ensalada mediterránea',\n  'ingredientes': ['tomates cherry', 'pepino', 'cebolla morada', 'queso feta', 'aceitunas kalamata', 'aceite de oliva', 'vinagre balsámico', 'oregano']},\n {'nombre': 'batido de kale y plátano',\n  'ingredientes': ['kale', 'plátano', 'yogur griego', 'miel', 'almendras', 'leche de almendra']},\n {'nombre': 'quinoa con verduras',\n  'ingredientes': ['quinoa', 'calabacín', 'pimiento rojo', 'cebolla', 'ajo', 'caldo de verduras', 'aceite de oliva']},\n {'nombre': 'sopa de lentejas',\n  'ingredientes': ['lentejas', 'tomate', 'zanahoria', 'apio', 'cebolla', 'ajo', 'comino', 'caldo de verduras']},\n {'nombre': 'salteado de tofu',\n  'ingredientes': ['tofu', 'brócoli', 'zanahoria', 'pimiento', 'salsa de soja', 'ajo', 'jengibre', 'aceite de sésamo']},\n {'nombre': 'avena nocturna',\n  'ingredientes': ['avena', 'leche de almendra', 'chia', 'miel', 'frutos rojos']},\n {'nombre': 'ensalada de garbanzos',\n  'ingredientes': ['garbanzos', 'pimiento verde', 'tom

In [135]:
trans_spanish_resp = translate_to_eng(spanish_resp)

In [136]:
trans_spanish_resp

["[{'name': 'mediterranean salad',\n  'ingredients': ['cherry tomatoes', 'cucumber', 'red onion', 'feta cheese', 'kalamata olives', 'olive oil', 'balsamic vinegar', 'oregano']},\n {'name': 'kale and banana smoothie',\n  'ingredients': ['kale', 'banana', 'greek yogurt', 'honey', 'almonds', 'almond milk']},\n {'name': 'quinoa with vegetables',\n  'ingredients': ['quinoa', 'zucchini', 'red pepper', 'onion', 'garlic', 'vegetable broth', 'olive oil']},\n {'name': 'lentil soup',\n  'ingredients': ['lentils', 'tomato', 'carrot', 'celery', 'onion', 'garlic', 'cumin', 'vegetable broth']},\n {'name': 'tofu stir fry',\n  'ingredients': ['tofu', 'broccoli', 'carrot', 'pepper', 'soy sauce', 'garlic', 'ginger', 'sesame oil']},\n {'name': 'night oats',\n  'ingredients': ['oats', 'almond milk', 'chia', 'honey', 'red berries']},\n {'name': 'chickpea salad',\n  'ingredients': ['chickpeas', 'green pepper', 'tomato', 'onion', 'cilantro', 'lemon', 'olive oil']},\n {'name': 'chicken and avocado wrap',\n  'i

In [137]:
spanish_df, spanish_err = create_dataframe_from_responses_2(trans_spanish_resp)

Error parsing response at index 4: closing parenthesis '}' does not match opening parenthesis '[' (<unknown>, line 16)


In [138]:
spanish_df

Unnamed: 0,name,ingredients
0,mediterranean salad,"[cherry tomatoes, cucumber, red onion, feta ch..."
1,kale and banana smoothie,"[kale, banana, greek yogurt, honey, almonds, a..."
2,quinoa with vegetables,"[quinoa, zucchini, red pepper, onion, garlic, ..."
3,lentil soup,"[lentils, tomato, carrot, celery, onion, garli..."
4,tofu stir fry,"[tofu, broccoli, carrot, pepper, soy sauce, ga..."
...,...,...
285,lemon chicken with broccoli,"[chicken, lemon, broccoli, garlic, olive oil, ..."
286,caprese salad,"[tomato, mozzarella, basil, olive oil, salt, p..."
287,baked salmon with asparagus,"[salmon, asparagus, lemon, olive oil, salt, pe..."
288,acai bowl,"[acai powder, banana, strawberries, granola, h..."


In [139]:
spanish_err

[(4,
  "[{'name': 'quinoa and vegetable salad',\n'ingredients': ['quinoa', 'cherry tomatoes', 'cucumber', 'spinach', 'lemon', 'olive oil']},\n{'name': 'kale and apple smoothie',\n'ingredients': ['kale', 'green apple', 'banana', 'coconut water', 'honey']},\n{'name': 'lentil soup',\n'ingredients': ['lentils', 'carrot', 'celery', 'onion', 'garlic', 'tomato', 'cumin']},\n{'name': 'tofu and broccoli stir-fry',\n'ingredients': ['tofu', 'broccoli', 'red pepper', 'soy sauce', 'sesame oil', 'garlic']},\n{'name': 'whole wheat pasta with pesto',\n'ingredients': ['whole wheat pasta', 'basil', 'pine nuts', 'garlic', 'parmesan cheese', 'olive oil']},\n{'name': 'vegetable and hummus wrap',\n'ingredients': ['whole wheat tortilla', 'hummus', 'romaine lettuce', 'tomato', 'cucumber', 'carrot']},\n{'name': 'oatmeal and fruit bowl',\n'ingredients': ['oatmeal', 'almond milk', 'banana', 'blueberries', 'almonds', 'honey']},\n{'name': 'chickpea and spinach salad',\n'ingredients': ['chickpeas', 'spinach', 'red 

In [140]:
all_dishes = []

# Regex to find dishes with different 'name' labels
dish_pattern = r"\{\s*'(?:m|the )?name'\s*:\s*'([^']+)',\s*'ingredients'\s*:\s*\[([^]]+)\]\}"

# Process each item in the data list
for _, dishes_str in spanish_err:
    # Adjust quotes for matching
    adjusted_str = dishes_str.replace('"', "'")
    # Find all matches in the current string
    matches = re.findall(dish_pattern, adjusted_str)
    if not matches:
        print("No matches found in:", dishes_str)
    for match in matches:
        dish_name = match[0]
        # Remove extra quotes and split ingredients into a list
        ingredients_list = [ingredient.strip().strip("'") for ingredient in match[1].split(',')]
        all_dishes.append({'name': dish_name, 'ingredients': ingredients_list})

In [141]:
spanish_err_df = pd.DataFrame(all_dishes)

In [142]:
spanish_err_df

Unnamed: 0,name,ingredients
0,quinoa and vegetable salad,"[quinoa, cherry tomatoes, cucumber, spinach, l..."
1,kale and apple smoothie,"[kale, green apple, banana, coconut water, honey]"
2,lentil soup,"[lentils, carrot, celery, onion, garlic, tomat..."
3,tofu and broccoli stir-fry,"[tofu, broccoli, red pepper, soy sauce, sesame..."
4,whole wheat pasta with pesto,"[whole wheat pasta, basil, pine nuts, garlic, ..."
5,vegetable and hummus wrap,"[whole wheat tortilla, hummus, romaine lettuce..."
6,oatmeal and fruit bowl,"[oatmeal, almond milk, banana, blueberries, al..."
7,chickpea and spinach salad,"[chickpeas, spinach, red pepper, onion' ['purp..."
8,salmon baked with vegetables,"[salmon, asparagus, lemon, pepper, olive oil]"
9,carrot cream,"[carrot, onion, garlic, vegetable broth, cocon..."


In [143]:
spanish_result_df = pd.concat([spanish_df, spanish_err_df], axis=0)

In [438]:
spanish_result_df.reset_index(drop=True, inplace=True)

In [439]:
spanish_result_df

Unnamed: 0,name,ingredients
0,mediterranean salad,"[cherry tomatoes, cucumber, red onion, feta ch..."
1,kale and banana smoothie,"[kale, banana, greek yogurt, honey, almonds, a..."
2,quinoa with vegetables,"[quinoa, zucchini, red pepper, onion, garlic, ..."
3,lentil soup,"[lentils, tomato, carrot, celery, onion, garli..."
4,tofu stir fry,"[tofu, broccoli, carrot, pepper, soy sauce, ga..."
...,...,...
295,vegetable and hummus wrap,"[whole wheat tortilla, hummus, romaine lettuce..."
296,oatmeal and fruit bowl,"[oatmeal, almond milk, banana, blueberries, al..."
297,chickpea and spinach salad,"[chickpeas, spinach, red pepper, onion' ['purp..."
298,salmon baked with vegetables,"[salmon, asparagus, lemon, pepper, olive oil]"


In [440]:
spanish_result_df.to_pickle("../dataset/spanish_result.pkl")

### Collect French Version

In [157]:
user_prompt_french = generate_user_prompt('prompts/user_prompt_french.txt')
system_prompt_french = generate_system_prompt('prompts/system_prompt_french.txt')

In [26]:
french_resp = call_openai_api(user_prompt_french, system_prompt_french, 30)
french_resp

[{'name': 'salade grecque',
  'ingrédients': ['tomates',
   'concombre',
   'oignon rouge',
   'olives noires',
   'feta',
   'huile d'olive',
   'vinaigre de vin rouge',
   'origan']},
 {'name': 'smoothie aux épinards et à la banane',
  'ingrédients': ['épinards frais',
   'banane',
   'lait d'amande',
   'graines de chia',
   'miel']},
 {'name': 'omelette aux légumes',
  'ingrédients': ['œufs',
   'poivrons',
   'oignons',
   'épinards',
   'tomates',
   'fromage feta']},
 {'name': 'buddha bowl',
  'ingrédients': ['quinoa',
   'haricots noirs',
   'avocat',
   'carottes râpées',
   'chou rouge',
   'pousses d’épinards',
   'vinaigrette tahini']},
 {'name': 'soupe de lentilles',
  'ingrédients': ['lentilles',
   'carottes',
   'oignon',
   'céleri',
   'tomates pelées',
   'bouillon de légumes',
   'cumin',
   'coriandre']},
 {'name': 'poulet grillé aux herbes',
  'ingrédients': ['poulet',
   'thym',
   'romarin',
   'ail',
   'huile d'olive',
   'jus de citron']},
 {'name': 'salade d

["[{'name': 'salade grecque',\n  'ingrédients': ['tomates',\n   'concombre',\n   'oignon rouge',\n   'olives noires',\n   'feta',\n   'huile d'olive',\n   'vinaigre de vin rouge',\n   'origan']},\n {'name': 'smoothie aux épinards et à la banane',\n  'ingrédients': ['épinards frais',\n   'banane',\n   'lait d'amande',\n   'graines de chia',\n   'miel']},\n {'name': 'omelette aux légumes',\n  'ingrédients': ['œufs',\n   'poivrons',\n   'oignons',\n   'épinards',\n   'tomates',\n   'fromage feta']},\n {'name': 'buddha bowl',\n  'ingrédients': ['quinoa',\n   'haricots noirs',\n   'avocat',\n   'carottes râpées',\n   'chou rouge',\n   'pousses d’épinards',\n   'vinaigrette tahini']},\n {'name': 'soupe de lentilles',\n  'ingrédients': ['lentilles',\n   'carottes',\n   'oignon',\n   'céleri',\n   'tomates pelées',\n   'bouillon de légumes',\n   'cumin',\n   'coriandre']},\n {'name': 'poulet grillé aux herbes',\n  'ingrédients': ['poulet',\n   'thym',\n   'romarin',\n   'ail',\n   'huile d'oli

In [27]:
# with open('french_resp.pkl', 'wb') as f:
#     pickle.dump(french_resp, f)

In [146]:
with open('responses/french_resp.pkl', 'rb') as f:
    french_resp = pickle.load(f)

In [147]:
french_resp

["[{'name': 'salade grecque',\n  'ingrédients': ['tomates',\n   'concombre',\n   'oignon rouge',\n   'olives noires',\n   'feta',\n   'huile d'olive',\n   'vinaigre de vin rouge',\n   'origan']},\n {'name': 'smoothie aux épinards et à la banane',\n  'ingrédients': ['épinards frais',\n   'banane',\n   'lait d'amande',\n   'graines de chia',\n   'miel']},\n {'name': 'omelette aux légumes',\n  'ingrédients': ['œufs',\n   'poivrons',\n   'oignons',\n   'épinards',\n   'tomates',\n   'fromage feta']},\n {'name': 'buddha bowl',\n  'ingrédients': ['quinoa',\n   'haricots noirs',\n   'avocat',\n   'carottes râpées',\n   'chou rouge',\n   'pousses d’épinards',\n   'vinaigrette tahini']},\n {'name': 'soupe de lentilles',\n  'ingrédients': ['lentilles',\n   'carottes',\n   'oignon',\n   'céleri',\n   'tomates pelées',\n   'bouillon de légumes',\n   'cumin',\n   'coriandre']},\n {'name': 'poulet grillé aux herbes',\n  'ingrédients': ['poulet',\n   'thym',\n   'romarin',\n   'ail',\n   'huile d'oli

In [148]:
trans_french_resp = translate_to_eng(french_resp)

In [149]:
trans_french_resp

["[{'name': 'greek salad',\n  'ingredients': ['tomatoes',\n   'cucumber',\n   'red onion',\n   'black olives',\n   'feta cheese',\n   'olive oil',\n   'red wine vinegar',\n   'oregano']},\n {'name': 'spinach and banana smoothie',\n  'ingredients': ['fresh spinach',\n   'banana',\n   'almond milk',\n   'chia seeds',\n   'honey']},\n {'name': 'vegetable omelette',\n  'ingredients': ['eggs',\n   'peppers',\n   'onions',\n   'spinach',\n   'tomatoes',\n   'feta cheese']},\n {'name': 'buddha bowl',\n  'ingredients': ['quinoa',\n   'black beans',\n   'lawyer',\n   'grated carrots',\n   'red cabbage',\n   'spinach shoots',\n   'tahini dressing']},\n {'name': 'lentil soup',\n  'ingredients': ['lentils',\n   'carrots',\n   'onion',\n   'celery',\n   'peeled tomatoes',\n   'vegetables soup',\n   'cumin',\n   'coriander']},\n {'name': 'grilled chicken with herbs',\n  'ingredients': ['chicken',\n   'thyme',\n   'rosemary',\n   'garlic',\n   'olive oil',\n   'lemon juice']},\n {'name': 'quinoa sala

In [150]:
french_df, french_err = create_dataframe_from_responses_2(trans_french_resp)

Error parsing response at index 9: unterminated string literal (detected at line 14) (<unknown>, line 14)
Error parsing response at index 12: unterminated string literal (detected at line 12) (<unknown>, line 12)
Error parsing response at index 21: unterminated string literal (detected at line 19) (<unknown>, line 19)
Error parsing response at index 23: unterminated string literal (detected at line 18) (<unknown>, line 18)
Error parsing response at index 25: unterminated string literal (detected at line 4) (<unknown>, line 4)


In [151]:
french_df

Unnamed: 0,name,ingredients
0,greek salad,"[tomatoes, cucumber, red onion, black olives, ..."
1,spinach and banana smoothie,"[fresh spinach, banana, almond milk, chia seed..."
2,vegetable omelette,"[eggs, peppers, onions, spinach, tomatoes, fet..."
3,buddha bowl,"[quinoa, black beans, lawyer, grated carrots, ..."
4,lentil soup,"[lentils, carrots, onion, celery, peeled tomat..."
...,...,...
245,kale and avocado salad,"[kale, avocado, tomato, sunflower seeds, lemon..."
246,ratatouille,"[eggplant, zucchini, red pepper, tomatoes, oni..."
247,vegetable and hummus wrap,"[whole wheat tortilla, hummus, arugula, grated..."
248,oat porridge with fruit,"[oat flakes, milk, banana, blueberries, sliver..."


In [152]:
french_err

[(9,
  "[{'name': 'quinoa salad',\n  'ingredients': ['quinoa', 'cherry tomatoes', 'cucumber', 'red pepper', 'red onion', 'lemon juice', 'olive oil', 'coriander', 'salt', ' black pepper']},\n {'name': 'green smoothie',\n  'ingredients': ['spinach', 'banana', 'apple', 'greek yogurt', 'almond milk', 'honey']},\n {'name': 'chickpea balls',\n  'ingredients': ['chickpeas', 'onion', 'garlic', 'coriander', 'cumin', 'flour', 'olive oil', 'salt', 'pepper']},\n {'name': 'lentil soup',\n  'ingredients': ['lentils', 'carrots', 'tomatoes', 'onion', 'vegetable broth', 'cumin', 'coriander', 'olive oil']},\n {'name': 'grilled tofu salad',\n  'ingredients': ['tofu', 'green salad', 'cherry tomatoes', 'cucumber', 'avocado', 'soy', 'honey', 'cider vinegar']},\n {'name': 'vegetable wrap',\n  'ingredients': ['whole wheat tortilla', 'hummus', 'grated carrots', 'spinach', 'red pepper', 'red onion']},\n {'name': 'vegetable gratin',\n  'ingredients': ['potatoes', 'zucchini', 'tomatoes', 'onion', 'garlic', 'milk'

In [153]:
all_dishes = []

# Regex to find dishes with different 'name' labels
dish_pattern = r"\{\s*'(?:m|the )?name'\s*:\s*'([^']+)',\s*'ingredients'\s*:\s*\[([^]]+)\]\}"

# Process each item in the data list
for _, dishes_str in french_err:
    # Adjust quotes for matching
    adjusted_str = dishes_str.replace('"', "'")
    # Find all matches in the current string
    matches = re.findall(dish_pattern, adjusted_str)
    if not matches:
        print("No matches found in:", dishes_str)
    for match in matches:
        dish_name = match[0]
        # Remove extra quotes and split ingredients into a list
        ingredients_list = [ingredient.strip().strip("'") for ingredient in match[1].split(',')]
        all_dishes.append({'name': dish_name, 'ingredients': ingredients_list})

In [154]:
french_err_df = pd.DataFrame(all_dishes)

In [155]:
french_err_df

Unnamed: 0,name,ingredients
0,quinoa salad,"[quinoa, cherry tomatoes, cucumber, red pepper..."
1,green smoothie,"[spinach, banana, apple, greek yogurt, almond ..."
2,chickpea balls,"[chickpeas, onion, garlic, coriander, cumin, f..."
3,lentil soup,"[lentils, carrots, tomatoes, onion, vegetable ..."
4,grilled tofu salad,"[tofu, green salad, cherry tomatoes, cucumber,..."
5,vegetable wrap,"[whole wheat tortilla, hummus, grated carrots,..."
6,vegetable gratin,"[potatoes, zucchini, tomatoes, onion, garlic, ..."
7,chicken light curry,"[chicken, curry powder, coconut milk, onion, g..."
8,oriental chickpea salad,"[chickpeas, tomatoes, cucumber, red onion, par..."
9,yogurt and fruit parfait,"[plain yogurt, granola, honey, raspberries, bl..."


In [158]:
new = call_openai_api(user_prompt_french, system_prompt_french)
new

[{'name': 'salade de quinoa',
  'ingrédients': ['quinoa', 'tomates cerises', 'concombre', 'poivron', 'oignon rouge', 'persil', 'jus de citron', 'huile d'olive', 'sel', 'poivre']},
 {'name': 'smoothie vert',
  'ingrédients': ['épinards', 'banane', 'pomme', 'jus d’orange', 'graines de chia']},
 {'name': 'wrap de poulet et avocat',
  'ingrédients': ['tortilla complète', 'blanc de poulet', 'avocat', 'salade', 'tomate', 'mayonnaise légère']},
 {'name': 'salade de lentilles',
  'ingrédients': ['lentilles', 'tomate', 'concombre', 'oignon rouge', 'persil', 'vinaigrette']},
 {'name': 'bowl de saumon',
  'ingrédients': ['saumon', 'riz brun', 'avocat', 'concombre', 'algues', 'sauce soja', 'graines de sésame']},
 {'name': 'soupe de carottes et gingembre',
  'ingrédients': ['carottes', 'gingembre frais', 'oignon', 'bouillon de légumes', 'crème', 'sel', 'poivre']},
 {'name': 'omelette aux épinards',
  'ingrédients': ['œufs', 'épinards', 'tomate', 'oignon', 'fromage feta', 'sel', 'poivre']},
 {'name'

["[{'name': 'salade de quinoa',\n  'ingrédients': ['quinoa', 'tomates cerises', 'concombre', 'poivron', 'oignon rouge', 'persil', 'jus de citron', 'huile d'olive', 'sel', 'poivre']},\n {'name': 'smoothie vert',\n  'ingrédients': ['épinards', 'banane', 'pomme', 'jus d’orange', 'graines de chia']},\n {'name': 'wrap de poulet et avocat',\n  'ingrédients': ['tortilla complète', 'blanc de poulet', 'avocat', 'salade', 'tomate', 'mayonnaise légère']},\n {'name': 'salade de lentilles',\n  'ingrédients': ['lentilles', 'tomate', 'concombre', 'oignon rouge', 'persil', 'vinaigrette']},\n {'name': 'bowl de saumon',\n  'ingrédients': ['saumon', 'riz brun', 'avocat', 'concombre', 'algues', 'sauce soja', 'graines de sésame']},\n {'name': 'soupe de carottes et gingembre',\n  'ingrédients': ['carottes', 'gingembre frais', 'oignon', 'bouillon de légumes', 'crème', 'sel', 'poivre']},\n {'name': 'omelette aux épinards',\n  'ingrédients': ['œufs', 'épinards', 'tomate', 'oignon', 'fromage feta', 'sel', 'poiv

In [159]:
new_data = pd.DataFrame({
    'name': ['spinach pasta', 'carrot and ginger soup'],
    'ingredients': [
        ['carrots', 'fresh ginger', 'onion', 'vegetable broth', 'cream', 'salt', 'pepper'],
        ['wholewheat pasta', 'spinach', 'garlic', 'olive oil', 'pine nuts', 'parmesan']
    ]
})

In [None]:
french_result_df = pd.concat([french_df, french_err_df], axis=0)
french_result_df = pd.concat([french_result_df, new_data], axis=0)

In [441]:
french_result_df.reset_index(drop=True, inplace=True)

In [442]:
french_result_df

Unnamed: 0,name,ingredients
0,greek salad,"[tomatoes, cucumber, red onion, black olives, ..."
1,spinach and banana smoothie,"[fresh spinach, banana, almond milk, chia seed..."
2,vegetable omelette,"[eggs, peppers, onions, spinach, tomatoes, fet..."
3,buddha bowl,"[quinoa, black beans, lawyer, grated carrots, ..."
4,lentil soup,"[lentils, carrots, onion, celery, peeled tomat..."
...,...,...
295,greek salad,"[tomatoes, cucumber, olives, red onion, feta, ..."
296,zucchini with tomato sauce,"[zucchini, crushed tomatoes, garlic, onion, ba..."
297,chicken curry and spinach,"[chicken, spinach, coconut milk, curry paste, ..."
298,spinach pasta,"[carrots, fresh ginger, onion, vegetable broth..."


In [491]:
float_rows = french_result_df['ingredients'].apply(lambda x: isinstance(x, float))
rows_with_floats = french_result_df[float_rows]
print("Rows containing floats:")
print(rows_with_floats)

Rows containing floats:
    name ingredients
149  NaN         NaN
167  NaN         NaN


In [492]:
french_add = call_openai_api(user_prompt_french, system_prompt_french)
french_add

[{'name': 'salade de quinoa',
  'ingrédients': ['quinoa', 'tomates cerises', 'concombre', 'poivron rouge', 'oignon rouge', 'coriandre', 'jus de citron', 'huile d’olive', 'sel', 'poivre']},
 {'name': 'smoothie vert',
  'ingrédients': ['épinards', 'banane', 'pomme', 'jus d’orange', 'graines de chia']},
 {'name': 'bowl de buddha',
  'ingrédients': ['riz brun', 'patate douce', 'chou kale', 'carottes', 'betterave', 'tofu', 'graines de sésame', 'sauce soja']},
 {'name': 'salade de lentilles',
  'ingrédients': ['lentilles vertes', 'tomates', 'concombre', 'feta', 'oignon rouge', 'vinaigre balsamique', 'huile d’olive', 'sel', 'poivre']},
 {'name': 'wrap de poulet et avocat',
  'ingrédients': ['tortilla de blé', 'poitrine de poulet', 'avocat', 'salade', 'tomate', 'mayonnaise allégée']},
 {'name': 'curry de légumes',
  'ingrédients': ['lait de coco', 'curcuma', 'gingembre', 'ail', 'carotte', 'pois chiches', 'épinards', 'brocoli', 'pâte de curry vert', 'coriandre']},
 {'name': 'soupe de tomate',
 

["[{'name': 'salade de quinoa',\n  'ingrédients': ['quinoa', 'tomates cerises', 'concombre', 'poivron rouge', 'oignon rouge', 'coriandre', 'jus de citron', 'huile d’olive', 'sel', 'poivre']},\n {'name': 'smoothie vert',\n  'ingrédients': ['épinards', 'banane', 'pomme', 'jus d’orange', 'graines de chia']},\n {'name': 'bowl de buddha',\n  'ingrédients': ['riz brun', 'patate douce', 'chou kale', 'carottes', 'betterave', 'tofu', 'graines de sésame', 'sauce soja']},\n {'name': 'salade de lentilles',\n  'ingrédients': ['lentilles vertes', 'tomates', 'concombre', 'feta', 'oignon rouge', 'vinaigre balsamique', 'huile d’olive', 'sel', 'poivre']},\n {'name': 'wrap de poulet et avocat',\n  'ingrédients': ['tortilla de blé', 'poitrine de poulet', 'avocat', 'salade', 'tomate', 'mayonnaise allégée']},\n {'name': 'curry de légumes',\n  'ingrédients': ['lait de coco', 'curcuma', 'gingembre', 'ail', 'carotte', 'pois chiches', 'épinards', 'brocoli', 'pâte de curry vert', 'coriandre']},\n {'name': 'soupe

In [493]:
trans_french_add = translate_to_eng(french_add)

In [494]:
trans_french_add

["[{'name': 'quinoa salad',\n  'ingredients': ['quinoa', 'cherry tomatoes', 'cucumber', 'red pepper', 'red onion', 'coriander', 'lemon juice', 'olive oil', 'salt', ' pepper']},\n {'name': 'green smoothie',\n  'ingredients': ['spinach', 'banana', 'apple', 'orange juice', 'chia seeds']},\n {'name': 'buddha bowl',\n  'ingredients': ['brown rice', 'sweet potato', 'kale', 'carrots', 'beetroot', 'tofu', 'sesame seeds', 'soy sauce']},\n {'name': 'lentil salad',\n  'ingredients': ['green lentils', 'tomatoes', 'cucumber', 'feta', 'red onion', 'balsamic vinegar', 'olive oil', 'salt', 'pepper']},\n {'name': 'chicken and avocado wrap',\n  'ingredients': ['wheat tortilla', 'chicken breast', 'avocado', 'salad', 'tomato', 'low-fat mayonnaise']},\n {'name': 'vegetable curry',\n  'ingredients': ['coconut milk', 'turmeric', 'ginger', 'garlic', 'carrot', 'chickpeas', 'spinach', 'broccoli', 'green curry paste', 'coriander' ]},\n {'name': 'tomato soup',\n  'ingredients': ['tomatoes', 'onion', 'garlic', 've

In [497]:
correct_name = {
    149: 'tomato soup',
    167: 'pasta with pesto and broccoli'
}

for idx, name in correct_name.items():
    french_result_df.at[idx, 'name'] = name

In [498]:
correct_ingredients = {
    149: ['tomatoes', 'onion', 'garlic', 'vegetable broth', 'basil', 'cream', 'olive oil'],
    167: ['wholemeal pasta', 'broccoli', 'pesto', 'parmesan', 'pine nuts', 'olive oil']
}

for idx, ingredients in correct_ingredients.items():
    french_result_df.at[idx, 'ingredients'] = ingredients

In [524]:
french_result_df.at[167, 'ingredients']

['wholemeal pasta', 'broccoli', 'pesto', 'parmesan', 'pine nuts', 'olive oil']

In [503]:
french_result_df.to_pickle("../dataset/french_result.pkl")

### Collect German Version

In [171]:
user_prompt_german = generate_user_prompt('prompts/user_prompt_german.txt')
system_prompt_german = generate_system_prompt('prompts/system_prompt_german.txt')

In [29]:
german_resp = call_openai_api(user_prompt_german, system_prompt_german, 30)
german_resp

[{'name': 'Quinoa Salat',
  'Zutaten': ['Quinoa', 'Gurke', 'Kirschtomaten', 'Feta-Käse', 'Olivenöl', 'Zitronensaft', 'Petersilie', 'Salz', 'Pfeffer']},
 {'name': 'Grüner Smoothie',
  'Zutaten': ['Spinat', 'Bananen', 'Apfel', 'Ingwer', 'Wasser', 'Zitronensaft']},
 {'name': 'Linsensuppe',
  'Zutaten': ['Linsen', 'Karotten', 'Zwiebeln', 'Sellerie', 'Knoblauch', 'Thymian', 'Salz', 'Pfeffer', 'Gemüsebrühe']},
 {'name': 'Hummus',
  'Zutaten': ['Kichererbsen', 'Tahini', 'Knoblauch', 'Zitronensaft', 'Olivenöl', 'Paprikapulver', 'Kreuzkümmel', 'Salz']},
 {'name': 'Gemüse Stir Fry',
  'Zutaten': ['Brokkoli', 'Karotten', 'Zucchini', 'Paprika', 'Sojasauce', 'Ingwer', 'Knoblauch', 'Sesamöl']},
 {'name': 'Süßkartoffel-Bowl',
  'Zutaten': ['Süßkartoffeln', 'Quinoa', 'Avocado', 'Spinat', 'Kichererbsen', 'Joghurtsauce', 'Limonensaft', 'Salz']},
 {'name': 'Mediterraner Kichererbsensalat',
  'Zutaten': ['Kichererbsen', 'Kirschtomaten', 'Gurke', 'Rote Zwiebel', 'Feta-Käse', 'Olivenöl', 'Balsamicoessig', '

["[{'name': 'Quinoa Salat',\n  'Zutaten': ['Quinoa', 'Gurke', 'Kirschtomaten', 'Feta-Käse', 'Olivenöl', 'Zitronensaft', 'Petersilie', 'Salz', 'Pfeffer']},\n {'name': 'Grüner Smoothie',\n  'Zutaten': ['Spinat', 'Bananen', 'Apfel', 'Ingwer', 'Wasser', 'Zitronensaft']},\n {'name': 'Linsensuppe',\n  'Zutaten': ['Linsen', 'Karotten', 'Zwiebeln', 'Sellerie', 'Knoblauch', 'Thymian', 'Salz', 'Pfeffer', 'Gemüsebrühe']},\n {'name': 'Hummus',\n  'Zutaten': ['Kichererbsen', 'Tahini', 'Knoblauch', 'Zitronensaft', 'Olivenöl', 'Paprikapulver', 'Kreuzkümmel', 'Salz']},\n {'name': 'Gemüse Stir Fry',\n  'Zutaten': ['Brokkoli', 'Karotten', 'Zucchini', 'Paprika', 'Sojasauce', 'Ingwer', 'Knoblauch', 'Sesamöl']},\n {'name': 'Süßkartoffel-Bowl',\n  'Zutaten': ['Süßkartoffeln', 'Quinoa', 'Avocado', 'Spinat', 'Kichererbsen', 'Joghurtsauce', 'Limonensaft', 'Salz']},\n {'name': 'Mediterraner Kichererbsensalat',\n  'Zutaten': ['Kichererbsen', 'Kirschtomaten', 'Gurke', 'Rote Zwiebel', 'Feta-Käse', 'Olivenöl', 'Bal

In [30]:
# with open('german_resp.pkl', 'wb') as f:
#     pickle.dump(german_resp, f)

In [162]:
with open('responses/german_resp.pkl', 'rb') as f:
    german_resp = pickle.load(f)

In [163]:
german_resp

["[{'name': 'Quinoa Salat',\n  'Zutaten': ['Quinoa', 'Gurke', 'Kirschtomaten', 'Feta-Käse', 'Olivenöl', 'Zitronensaft', 'Petersilie', 'Salz', 'Pfeffer']},\n {'name': 'Grüner Smoothie',\n  'Zutaten': ['Spinat', 'Bananen', 'Apfel', 'Ingwer', 'Wasser', 'Zitronensaft']},\n {'name': 'Linsensuppe',\n  'Zutaten': ['Linsen', 'Karotten', 'Zwiebeln', 'Sellerie', 'Knoblauch', 'Thymian', 'Salz', 'Pfeffer', 'Gemüsebrühe']},\n {'name': 'Hummus',\n  'Zutaten': ['Kichererbsen', 'Tahini', 'Knoblauch', 'Zitronensaft', 'Olivenöl', 'Paprikapulver', 'Kreuzkümmel', 'Salz']},\n {'name': 'Gemüse Stir Fry',\n  'Zutaten': ['Brokkoli', 'Karotten', 'Zucchini', 'Paprika', 'Sojasauce', 'Ingwer', 'Knoblauch', 'Sesamöl']},\n {'name': 'Süßkartoffel-Bowl',\n  'Zutaten': ['Süßkartoffeln', 'Quinoa', 'Avocado', 'Spinat', 'Kichererbsen', 'Joghurtsauce', 'Limonensaft', 'Salz']},\n {'name': 'Mediterraner Kichererbsensalat',\n  'Zutaten': ['Kichererbsen', 'Kirschtomaten', 'Gurke', 'Rote Zwiebel', 'Feta-Käse', 'Olivenöl', 'Bal

In [164]:
trans_german_resp = translate_to_eng(german_resp)

In [165]:
trans_german_resp

["[{'name': 'quinoa salad',\n'ingredients': ['quinoa', 'cucumber', 'cherry tomatoes', 'feta cheese', 'olive oil', 'lemon juice', 'parsley', 'salt', 'pepper']},\n{'name': 'green smoothie',\n'ingredients': ['spinach', 'bananas', 'apple', 'ginger', 'water', 'lemon juice']},\n{'name': 'lentil soup',\n'ingredients': ['lentils', 'carrots', 'onions', 'celery', 'garlic', 'thyme', 'salt', 'pepper', 'vegetable broth']},\n{'name': 'hummus',\n'ingredients': ['chickpeas', 'tahini', 'garlic', 'lemon juice', 'olive oil', 'paprika powder', 'cumin', 'salt']},\n{'name': 'vegetable stir fry',\n'ingredients': ['broccoli', 'carrots', 'zucchini', 'bell pepper', 'soy sauce', 'ginger', 'garlic', 'sesame oil']},\n{'name': 'sweet potato bowl',\n'ingredients': ['sweet potatoes', 'quinoa', 'avocado', 'spinach', 'chickpeas', 'yogurt sauce', 'lime juice', 'salt']},\n{'name': 'mediterranean chickpea salad',\n'ingredients': ['chickpeas', 'cherry tomatoes', 'cucumber', 'red onion', 'feta cheese', 'olive oil', 'balsami

In [166]:
german_df, german_err = create_dataframe_from_responses_2(trans_german_resp)

Error parsing response at index 2: closing parenthesis ']' does not match opening parenthesis '{' on line 11 (<unknown>, line 20)
Error parsing response at index 12: invalid syntax (<unknown>, line 48)
Error parsing response at index 22: unterminated string literal (detected at line 18) (<unknown>, line 18)
Error parsing response at index 25: invalid syntax. Perhaps you forgot a comma? (<unknown>, line 16)
Error parsing response at index 27: unterminated string literal (detected at line 25) (<unknown>, line 25)


In [167]:
german_df

Unnamed: 0,name,ingredients
0,quinoa salad,"[quinoa, cucumber, cherry tomatoes, feta chees..."
1,green smoothie,"[spinach, bananas, apple, ginger, water, lemon..."
2,lentil soup,"[lentils, carrots, onions, celery, garlic, thy..."
3,hummus,"[chickpeas, tahini, garlic, lemon juice, olive..."
4,vegetable stir fry,"[broccoli, carrots, zucchini, bell pepper, soy..."
...,...,...
245,berry yogurt parfait,"[greek yogurt, honey, blueberries, strawberrie..."
246,vegetable stir-fry,"[broccoli, carrots, bell peppers, soy sauce, s..."
247,kale salad,"[kale, apples, walnuts, cranberries, lemon dre..."
248,sweet potato tacos,"[sweet potatoes, black beans, cilantro, lime j..."


In [170]:
german_err

[(2,
  "[{'name': 'quinoa salad',\n'ingredients': ['quinoa', 'cherry tomatoes', 'cucumber', 'lemon juice', 'olive oil', 'feta', 'fresh parsley']},\n{'name': 'chickpea curry',\n'ingredients': ['chickpeas', 'coconut milk', 'curry powder', 'onion', 'garlic', 'fresh ginger', 'spinach']},\n{'name': 'green smoothie',\n'ingredients': ['spinach', 'bananas', 'apple', 'linseed', 'almond milk', 'honey']},\n{'name': 'vegetable soup',\n'ingredients': ['carrots', 'celery', 'onion', 'tomatoes', 'green beans', 'garlic', 'vegetable stock']},\n{'name': 'lentil salad',\n'ingredients': ['lentils', 'red peppers', 'fresh mint', 'lemon juice', 'olive oil', 'feta']},\n{'in': {'name': 'stuffed peppers',\n'ingredients': ['bell peppers', 'quinoa', 'black beans', 'corn', 'onion', 'cheese', 'tomatoes']},\n{'name': 'zucchini noodles',\n'ingredients': ['zucchini', 'cherry tomatoes', 'garlic', 'olive oil', 'parmesan', 'basil']},\n{'name': 'sweet potato toast',\n'ingredients': ['sweet potatoes', 'avocado', 'cherry tom

In [168]:
all_dishes = []

# Regex to find dishes with different 'name' labels
dish_pattern = r"\{\s*'(?:m|the )?name'\s*:\s*'([^']+)',\s*'ingredients'\s*:\s*\[([^]]+)\]\}"

# Process each item in the data list
for _, dishes_str in german_err:
    # Adjust quotes for matching
    adjusted_str = dishes_str.replace('"', "'")
    # Find all matches in the current string
    matches = re.findall(dish_pattern, adjusted_str)
    if not matches:
        print("No matches found in:", dishes_str)
    for match in matches:
        dish_name = match[0]
        # Remove extra quotes and split ingredients into a list
        ingredients_list = [ingredient.strip().strip("'") for ingredient in match[1].split(',')]
        all_dishes.append({'name': dish_name, 'ingredients': ingredients_list})

In [169]:
german_err_df = pd.DataFrame(all_dishes)
german_err_df

Unnamed: 0,name,ingredients
0,quinoa salad,"[quinoa, cherry tomatoes, cucumber, lemon juic..."
1,chickpea curry,"[chickpeas, coconut milk, curry powder, onion,..."
2,green smoothie,"[spinach, bananas, apple, linseed, almond milk..."
3,vegetable soup,"[carrots, celery, onion, tomatoes, green beans..."
4,lentil salad,"[lentils, red peppers, fresh mint, lemon juice..."
5,stuffed peppers,"[bell peppers, quinoa, black beans, corn, onio..."
6,zucchini noodles,"[zucchini, cherry tomatoes, garlic, olive oil,..."
7,sweet potato toast,"[sweet potatoes, avocado, cherry tomatoes, fet..."
8,kale salad,"[kale, apples, walnuts, cranberries, blue chee..."
9,vegetable omelette,"[eggs, spinach, bell pepper, onion, mushrooms,..."


In [173]:
new_data = pd.DataFrame({
    'name': ['sweet potato bowl', 'rice with vegetables', 'morning oatmeal', 'mango coconut chia pudding'],
    'ingredients': [
        ['sweet potatoes', 'quinoa', 'spinach', 'avocado', 'beetroot', 'chickpeas', 'tahini', 'lemon'],
        ['wholegrain rice', 'broccoli', 'peppers', 'onions', 'eggs', 'soy sauce', 'sesame oil'],
        ['oatmeal', 'almond milk', 'blueberries', 'chia seeds', 'honey', 'cinnamon'],
        ['chia seeds', 'coconut milk', 'mango puree', 'grated coconut', 'vanilla extract']
    ]
})

In [None]:
german_result_df = pd.concat([german_df, german_err_df], axis=0)
german_result_df = pd.concat([german_result_df, new_data], axis=0)

In [444]:
german_result_df.reset_index(drop=True, inplace=True)

In [445]:
german_result_df

Unnamed: 0,name,ingredients
0,quinoa salad,"[quinoa, cucumber, cherry tomatoes, feta chees..."
1,green smoothie,"[spinach, bananas, apple, ginger, water, lemon..."
2,lentil soup,"[lentils, carrots, onions, celery, garlic, thy..."
3,hummus,"[chickpeas, tahini, garlic, lemon juice, olive..."
4,vegetable stir fry,"[broccoli, carrots, zucchini, bell pepper, soy..."
...,...,...
295,kale salad with avocado and quinoa,"[kale, quinoa, avocado, lemon juice, olive oil..."
296,sweet potato bowl,"[sweet potatoes, quinoa, spinach, avocado, bee..."
297,rice with vegetables,"[wholegrain rice, broccoli, peppers, onions, e..."
298,morning oatmeal,"[oatmeal, almond milk, blueberries, chia seeds..."


In [504]:
float_rows = german_result_df['ingredients'].apply(lambda x: isinstance(x, float))
rows_with_floats = german_result_df[float_rows]
print("Rows containing floats:")
print(rows_with_floats)

Rows containing floats:
                  name ingredients
43  vegetable stir-fry         NaN
48    spinach omelette         NaN
49        pumpkin soup         NaN


In [505]:
# manually add back the ones that didn't get parsed successfully
# the ingredients exist in the translated text, just didn't get parsed
correct_ingredients = {
    43: ['broccoli', 'carrots', 'zucchini', 'bell pepper', 'soy sauce', 'ginger', 'garlic', 'sesame oil'],
    48: ['eggs', 'spinach', 'tomatoes', 'feta cheese', 'onions'],
    49: ['pumpkin', 'vegetable broth', 'cream', 'onions', 'nutmeg', 'pepper']
}

for idx, ingredients in correct_ingredients.items():
    german_result_df.at[idx, 'ingredients'] = ingredients

In [507]:
german_result_df.at[49, 'ingredients']

['pumpkin', 'vegetable broth', 'cream', 'onions', 'nutmeg', 'pepper']

In [508]:
german_result_df.to_pickle("../dataset/german_result.pkl")

### Collect Greek Version

In [185]:
user_prompt_greek = generate_user_prompt('prompts/user_prompt_greek.txt')
system_prompt_greek = generate_system_prompt('prompts/system_prompt_greek.txt')

In [32]:
greek_resp = call_openai_api(user_prompt_greek, system_prompt_greek, 30)
greek_resp

[{'name': 'σαλάτα με κινόα και φέτα',
  'συστατικά': ['κινόα', 'φέτα', 'ντομάτα', 'αγγούρι', 'κόκκινη πιπεριά', 'κρεμμύδι', 'ελαιόλαδο', 'λεμόνι', 'αλάτι', 'πιπέρι']},
 {'name': 'κρεμώδης σούπα μπρόκολο',
  'συστατικά': ['μπρόκολο', 'κρεμμύδι', 'σκόρδο', 'ζωμός λαχανικών', 'κουταλιά ελαιόλαδο', 'αλάτι', 'πιπέρι', 'νερό']},
 {'name': 'ρυζότο με φασόλια και σπανάκι',
  'συστατικά': ['ρύζι', 'κόκκινα φασόλια', 'σπανάκι', 'πράσο', 'ελαιόλαδο', 'θυμάρι', 'ζωμός λαχανικών', 'αλάτι']},
 {'name': 'ρολάκια αυγού με σπανάκι και τυρί',
  'συστατικά': ['αυγά', 'φέτα', 'σπανάκι', 'κρεμμύδι', 'ελαιόλαδο', 'αλάτι', 'πιπέρι']},
 {'name': 'πρωτεϊνική σαλάτα με ρεβίθια',
  'συστατικά': ['ρεβίθια', 'ντομάτα', 'κρεμμύδι', 'πιπεριά', 'λεμόνι', 'ελαιόλαδο', 'αλάτι', 'πιπέρι']},
 {'name': 'χούμους σπιτικό',
  'συστατικά': ['ρεβίθια', 'ταχίνι', 'σκόρδο', 'λεμόνι', 'κύμινο', 'ελαιόλαδο', 'αλάτι', 'πιπέρι']},
 {',', ']}ινακνη με λαχανικά',
  'συστατικά': ['μελιτζάνα', 'πιπεριά', 'κολοκύθι', 'κρεμμύδι', 'σκόρδο'

["[{'name': 'σαλάτα με κινόα και φέτα',\n  'συστατικά': ['κινόα', 'φέτα', 'ντομάτα', 'αγγούρι', 'κόκκινη πιπεριά', 'κρεμμύδι', 'ελαιόλαδο', 'λεμόνι', 'αλάτι', 'πιπέρι']},\n {'name': 'κρεμώδης σούπα μπρόκολο',\n  'συστατικά': ['μπρόκολο', 'κρεμμύδι', 'σκόρδο', 'ζωμός λαχανικών', 'κουταλιά ελαιόλαδο', 'αλάτι', 'πιπέρι', 'νερό']},\n {'name': 'ρυζότο με φασόλια και σπανάκι',\n  'συστατικά': ['ρύζι', 'κόκκινα φασόλια', 'σπανάκι', 'πράσο', 'ελαιόλαδο', 'θυμάρι', 'ζωμός λαχανικών', 'αλάτι']},\n {'name': 'ρολάκια αυγού με σπανάκι και τυρί',\n  'συστατικά': ['αυγά', 'φέτα', 'σπανάκι', 'κρεμμύδι', 'ελαιόλαδο', 'αλάτι', 'πιπέρι']},\n {'name': 'πρωτεϊνική σαλάτα με ρεβίθια',\n  'συστατικά': ['ρεβίθια', 'ντομάτα', 'κρεμμύδι', 'πιπεριά', 'λεμόνι', 'ελαιόλαδο', 'αλάτι', 'πιπέρι']},\n {'name': 'χούμους σπιτικό',\n  'συστατικά': ['ρεβίθια', 'ταχίνι', 'σκόρδο', 'λεμόνι', 'κύμινο', 'ελαιόλαδο', 'αλάτι', 'πιπέρι']},\n {',', ']}ινακνη με λαχανικά',\n  'συστατικά': ['μελιτζάνα', 'πιπεριά', 'κολοκύθι', 'κρεμ

In [33]:
# with open('greek_resp.pkl', 'wb') as f:
#     pickle.dump(greek_resp, f)

In [176]:
with open('responses/greek_resp.pkl', 'rb') as f:
    greek_resp = pickle.load(f)

In [177]:
greek_resp

["[{'name': 'σαλάτα με κινόα και φέτα',\n  'συστατικά': ['κινόα', 'φέτα', 'ντομάτα', 'αγγούρι', 'κόκκινη πιπεριά', 'κρεμμύδι', 'ελαιόλαδο', 'λεμόνι', 'αλάτι', 'πιπέρι']},\n {'name': 'κρεμώδης σούπα μπρόκολο',\n  'συστατικά': ['μπρόκολο', 'κρεμμύδι', 'σκόρδο', 'ζωμός λαχανικών', 'κουταλιά ελαιόλαδο', 'αλάτι', 'πιπέρι', 'νερό']},\n {'name': 'ρυζότο με φασόλια και σπανάκι',\n  'συστατικά': ['ρύζι', 'κόκκινα φασόλια', 'σπανάκι', 'πράσο', 'ελαιόλαδο', 'θυμάρι', 'ζωμός λαχανικών', 'αλάτι']},\n {'name': 'ρολάκια αυγού με σπανάκι και τυρί',\n  'συστατικά': ['αυγά', 'φέτα', 'σπανάκι', 'κρεμμύδι', 'ελαιόλαδο', 'αλάτι', 'πιπέρι']},\n {'name': 'πρωτεϊνική σαλάτα με ρεβίθια',\n  'συστατικά': ['ρεβίθια', 'ντομάτα', 'κρεμμύδι', 'πιπεριά', 'λεμόνι', 'ελαιόλαδο', 'αλάτι', 'πιπέρι']},\n {'name': 'χούμους σπιτικό',\n  'συστατικά': ['ρεβίθια', 'ταχίνι', 'σκόρδο', 'λεμόνι', 'κύμινο', 'ελαιόλαδο', 'αλάτι', 'πιπέρι']},\n {',', ']}ινακνη με λαχανικά',\n  'συστατικά': ['μελιτζάνα', 'πιπεριά', 'κολοκύθι', 'κρεμ

In [178]:
trans_greek_resp = translate_to_eng(greek_resp)

In [179]:
trans_greek_resp

["[{'name': 'quinoa and feta salad',\n  'ingredients': ['quinoa', 'feta', 'tomato', 'cucumber', 'red pepper', 'onion', 'olive oil', 'lemon', 'salt', 'pepper']},\n {'name': 'creamy broccoli soup',\n  'ingredients': ['broccoli', 'onion', 'garlic', 'vegetable stock', 'spoon olive oil', 'salt', 'pepper', 'water']},\n {'name': 'bean and spinach risotto',\n  'ingredients': ['rice', 'red beans', 'spinach', 'leek', 'olive oil', 'thyme', 'vegetable stock', 'salt']},\n {'name': 'egg rolls with spinach and cheese',\n  'ingredients': ['eggs', 'feta', 'spinach', 'onion', 'olive oil', 'salt', 'pepper']},\n {'name': 'chickpea protein salad',\n  'ingredients': ['chickpeas', 'tomato', 'onion', 'pepper', 'lemon', 'olive oil', 'salt', 'pepper']},\n {'name': 'homemade hummus',\n  'ingredients': ['chickpeas', 'tahini', 'garlic', 'lemon', 'cumin', 'olive oil', 'salt', 'pepper']},\n {',', ']}fiber with vegetables',\n  'ingredients': ['eggplant', 'pepper', 'pumpkin', 'onion', 'garlic', 'tomato', 'olive oil', 

In [180]:
greek_df, greek_err = create_dataframe_from_responses_2(trans_greek_resp)

Error parsing response at index 0: invalid syntax (<unknown>, line 14)
Error parsing response at index 6: unterminated string literal (detected at line 9) (<unknown>, line 9)
Error parsing response at index 15: invalid syntax (<unknown>, line 50)


In [181]:
greek_df

Unnamed: 0,name,ingredients
0,vegetable quinoa salad,"[quinoa, peppers, cucumber, onions, tomatoes, ..."
1,avocado and spinach smoothie,"[avocado, spinach, apple, yogurt, honey, water]"
2,omelette with asparagus and feta,"[eggs, asparagus, feta, onion, pepper, salt, o..."
3,spinach and ricotta tart,"[spinach, ricotta, crust, eggs, nutmeg, salt, ..."
4,grilled chicken with lemon sauce,"[chicken, lemon, garlic, oregano, salt, pepper..."
...,...,...
265,avocado dip,"[avocado, onion, tomato, cilantro, lime juice,..."
266,roasted chicken with thyme and lemon,"[chicken, thyme, lemon, garlic, olive oil, sal..."
267,lentil salad with smoked salmon,"[lentils, smoked salmon, onion, olive oil, mus..."
268,spinach and feta chicken roll,"[chicken, feta, fresh spinach, garlic, olive o..."


In [184]:
greek_err

[(0,
  "[{'name': 'quinoa and feta salad',\n  'ingredients': ['quinoa', 'feta', 'tomato', 'cucumber', 'red pepper', 'onion', 'olive oil', 'lemon', 'salt', 'pepper']},\n {'name': 'creamy broccoli soup',\n  'ingredients': ['broccoli', 'onion', 'garlic', 'vegetable stock', 'spoon olive oil', 'salt', 'pepper', 'water']},\n {'name': 'bean and spinach risotto',\n  'ingredients': ['rice', 'red beans', 'spinach', 'leek', 'olive oil', 'thyme', 'vegetable stock', 'salt']},\n {'name': 'egg rolls with spinach and cheese',\n  'ingredients': ['eggs', 'feta', 'spinach', 'onion', 'olive oil', 'salt', 'pepper']},\n {'name': 'chickpea protein salad',\n  'ingredients': ['chickpeas', 'tomato', 'onion', 'pepper', 'lemon', 'olive oil', 'salt', 'pepper']},\n {'name': 'homemade hummus',\n  'ingredients': ['chickpeas', 'tahini', 'garlic', 'lemon', 'cumin', 'olive oil', 'salt', 'pepper']},\n {',', ']}fiber with vegetables',\n  'ingredients': ['eggplant', 'pepper', 'pumpkin', 'onion', 'garlic', 'tomato', 'olive 

In [182]:
all_dishes = []

# Regex to find dishes with different 'name' labels
dish_pattern = r"\{\s*'(?:m|the )?name'\s*:\s*'([^']+)',\s*'ingredients'\s*:\s*\[([^]]+)\]\}"

# Process each item in the data list
for _, dishes_str in greek_err:
    # Adjust quotes for matching
    adjusted_str = dishes_str.replace('"', "'")
    # Find all matches in the current string
    matches = re.findall(dish_pattern, adjusted_str)
    if not matches:
        print("No matches found in:", dishes_str)
    for match in matches:
        dish_name = match[0]
        # Remove extra quotes and split ingredients into a list
        ingredients_list = [ingredient.strip().strip("'") for ingredient in match[1].split(',')]
        all_dishes.append({'name': dish_name, 'ingredients': ingredients_list})

In [183]:
greek_err_df = pd.DataFrame(all_dishes)
greek_err_df

Unnamed: 0,name,ingredients
0,quinoa and feta salad,"[quinoa, feta, tomato, cucumber, red pepper, o..."
1,creamy broccoli soup,"[broccoli, onion, garlic, vegetable stock, spo..."
2,bean and spinach risotto,"[rice, red beans, spinach, leek, olive oil, th..."
3,egg rolls with spinach and cheese,"[eggs, feta, spinach, onion, olive oil, salt, ..."
4,chickpea protein salad,"[chickpeas, tomato, onion, pepper, lemon, oliv..."
5,homemade hummus,"[chickpeas, tahini, garlic, lemon, cumin, oliv..."
6,mango yogurt smoothie,"[mango, yogurt, honey, ice]"
7,warm salad with lentils and vegetables,"[lentils, carrot, onion, garlic, spinach, sun-..."
8,"tuna, avocado and pomegranate salad","[tuna, avocado, pomegranate, onion, parsley, o..."
9,quinoa salad,"[quinoa, cucumber, tomato, onion, olive oil, l..."


In [186]:
new = call_openai_api(user_prompt_greek, system_prompt_greek)
new

[{'name': 'Smoothie με μπανάνα και σπανάκι',
  'συστατικά': ['μπανάνα', 'σπανάκι', 'αμύγδαλο γάλα', 'σπόροι τσία', 'μέλι']},
 {'name': 'Σαλάτα με Κινόα και ρόδι',
  'συστατικά': ['κινόα', 'ρόδι', 'αγγούρι', 'ντομάτα', 'φέτα', 'ελαιόλαδο']},
 {'name': 'Ψητό σολομό με λεμόνι και θυμάρι',
  'συστατικά': ['φιλέτο σολομού', 'λεμόνι', 'θυμάρι', 'ελαιόλαδο', 'αλάτι', 'πιπέρι']},
 {'name': 'Βραστά φασόλια με πιπεριές',
  'συστατικά': ['φασόλια', 'κόκκινη πιπεριά', 'κίτρινη πιπεριά', 'κρεμμύδι', 'σκόρδο', 'ελαιόλαδο']},
 {'name': 'Αυγά ποσέ με σπαράγγια',
  'συστατικά': ['αυγά', 'σπαράγγια', 'λευκό ξίδι', 'αλάτι']},
 {'name': 'Τσίκεν τίκα μασάλα',
  'συστατικά': ['κοτόπουλο', 'γιαούρτι', 'ντοματάκια', 'μασάλα', 'κάρυ', 'κρεμμύδι']},
 {'name': 'Τυρόπιτα με ολικής άλεσης ζύμη',
  'συστατικά': ['ολικής άλεσης ζύμη', 'φέτα', 'ρίγανη', 'αυγά', 'γάλα']},
 {'name': 'Smoothie με μάνγκο και γιαούρτι',
  'συστατικά': ['μάνγκο', 'γιαούρτι', 'μέλι', 'λάιμ']},
 {'name': 'Βεγανικά tacos με καλαμπόκι και αβοκ

["[{'name': 'Smoothie με μπανάνα και σπανάκι',\n  'συστατικά': ['μπανάνα', 'σπανάκι', 'αμύγδαλο γάλα', 'σπόροι τσία', 'μέλι']},\n {'name': 'Σαλάτα με Κινόα και ρόδι',\n  'συστατικά': ['κινόα', 'ρόδι', 'αγγούρι', 'ντομάτα', 'φέτα', 'ελαιόλαδο']},\n {'name': 'Ψητό σολομό με λεμόνι και θυμάρι',\n  'συστατικά': ['φιλέτο σολομού', 'λεμόνι', 'θυμάρι', 'ελαιόλαδο', 'αλάτι', 'πιπέρι']},\n {'name': 'Βραστά φασόλια με πιπεριές',\n  'συστατικά': ['φασόλια', 'κόκκινη πιπεριά', 'κίτρινη πιπεριά', 'κρεμμύδι', 'σκόρδο', 'ελαιόλαδο']},\n {'name': 'Αυγά ποσέ με σπαράγγια',\n  'συστατικά': ['αυγά', 'σπαράγγια', 'λευκό ξίδι', 'αλάτι']},\n {'name': 'Τσίκεν τίκα μασάλα',\n  'συστατικά': ['κοτόπουλο', 'γιαούρτι', 'ντοματάκια', 'μασάλα', 'κάρυ', 'κρεμμύδι']},\n {'name': 'Τυρόπιτα με ολικής άλεσης ζύμη',\n  'συστατικά': ['ολικής άλεσης ζύμη', 'φέτα', 'ρίγανη', 'αυγά', 'γάλα']},\n {'name': 'Smoothie με μάνγκο και γιαούρτι',\n  'συστατικά': ['μάνγκο', 'γιαούρτι', 'μέλι', 'λάιμ']},\n {'name': 'Βεγανικά tacos με 

In [187]:
new_data = pd.DataFrame({
    'name': ['chicken tikka masala', 'wholewheat cheesecake'],
    'ingredients': [
        ['chicken', 'yogurt', 'tomatoes', 'masala', 'curry', 'onion'],
        ['whole wheat dough', 'feta', 'oregano', 'eggs', 'milk']
    ]
})

In [None]:
greek_result_df = pd.concat([greek_df, greek_err_df], axis=0)
greek_result_df = pd.concat([greek_result_df, new_data], axis=0)

In [447]:
greek_result_df.reset_index(drop=True, inplace=True)

In [448]:
greek_result_df

Unnamed: 0,name,ingredients
0,vegetable quinoa salad,"[quinoa, peppers, cucumber, onions, tomatoes, ..."
1,avocado and spinach smoothie,"[avocado, spinach, apple, yogurt, honey, water]"
2,omelette with asparagus and feta,"[eggs, asparagus, feta, onion, pepper, salt, o..."
3,spinach and ricotta tart,"[spinach, ricotta, crust, eggs, nutmeg, salt, ..."
4,grilled chicken with lemon sauce,"[chicken, lemon, garlic, oregano, salt, pepper..."
...,...,...
295,hearty chickpea salad,"[chickpeas, pepper, corn, onion, black olives,..."
296,quinoa bowls with asian vegetables,"[quinoa, carrot, red pepper, mushrooms, soya s..."
297,spinach and feta omelette,"[eggs, fresh spinach, slice, onion, salt, pepper]"
298,chicken tikka masala,"[chicken, yogurt, tomatoes, masala, curry, onion]"


In [449]:
greek_result_df.to_pickle("../dataset/greek_result.pkl")

### Collect Hawaiian Version

In [191]:
user_prompt_hawa = generate_user_prompt('prompts/user_prompt_hawaiian.txt')
system_prompt_hawa = generate_system_prompt('prompts/system_prompt_hawaiian.txt')

In [192]:
hawa_resp = call_openai_api(user_prompt_hawa, system_prompt_hawa)
hawa_resp

Eia he 10 meaʻai olakino aʻu e hāʻawi aku ai iā ʻoe e hiki ke hoʻomākaukau ma ka hale:

1. **Salakeke Kale me nā ʻAlemona a me nā ʻAlamona Wīwī**
   - Mea hoʻohui:
     - Kale
     - Nā ʻōmato hou
     - Nā ʻalemona (nona)
     - Nā ʻalauwī wīwī
     - ʻAlaʻula oliba
     - ʻO ka waiʻona balsamic
     - Paʻakai a me ka pepa

2. **Smoothie Pūlū Beri**
   - Mea hoʻohui:
     - Nā beri uliuli
     - Nā raspberry
     - Nā strawberry
     - Ke kīʻaha yogurt kūlohelohe
     - Wai

3. **ʻO Quinoa Bowl me nā Mea Kanu**
   - Mea hoʻohui:
     - Quinoa
     - Nā broccoli
     - Nā kale kālika
     - ʻO nā hoʻohui ʻai hummus
     - Ka huaʻai lima
     - Nā walnuts

4. **ʻO ka Hummus Home-made**
   - Mea hoʻohui:
     - Nā ʻāpiki keʻokeʻo, kānanaʻia
     - ʻO ka wai o nā chickpeas
     - Tahini
     - Ka wai līmū
     - Kālika
     - Paʻakai a me ka pepa
     - ʻAlaʻula oliba

5. **ʻO nā ʻOmaʻomaʻo Mea ʻAi Me nā Meaʻai Kūlohelohe**
   - Mea hoʻohui:
     - ʻO nā lau tī
     - ʻO nā ʻōmato, pihaʻi

['Eia he 10 meaʻai olakino aʻu e hāʻawi aku ai iā ʻoe e hiki ke hoʻomākaukau ma ka hale:\n\n1. **Salakeke Kale me nā ʻAlemona a me nā ʻAlamona Wīwī**\n   - Mea hoʻohui:\n     - Kale\n     - Nā ʻōmato hou\n     - Nā ʻalemona (nona)\n     - Nā ʻalauwī wīwī\n     - ʻAlaʻula oliba\n     - ʻO ka waiʻona balsamic\n     - Paʻakai a me ka pepa\n\n2. **Smoothie Pūlū Beri**\n   - Mea hoʻohui:\n     - Nā beri uliuli\n     - Nā raspberry\n     - Nā strawberry\n     - Ke kīʻaha yogurt kūlohelohe\n     - Wai\n\n3. **ʻO Quinoa Bowl me nā Mea Kanu**\n   - Mea hoʻohui:\n     - Quinoa\n     - Nā broccoli\n     - Nā kale kālika\n     - ʻO nā hoʻohui ʻai hummus\n     - Ka huaʻai lima\n     - Nā walnuts\n\n4. **ʻO ka Hummus Home-made**\n   - Mea hoʻohui:\n     - Nā ʻāpiki keʻokeʻo, kānanaʻia\n     - ʻO ka wai o nā chickpeas\n     - Tahini\n     - Ka wai līmū\n     - Kālika\n     - Paʻakai a me ka pepa\n     - ʻAlaʻula oliba\n\n5. **ʻO nā ʻOmaʻomaʻo Mea ʻAi Me nā Meaʻai Kūlohelohe**\n   - Mea hoʻohui:\n    

In [None]:
# with open('hawaiian_resp.pkl', 'wb') as f:
#     pickle.dump(hawa_resp, f)

### Collect Hungarian Version

In [202]:
user_prompt_hung = generate_user_prompt('prompts/user_prompt_hungarian.txt')
system_prompt_hung = generate_system_prompt('prompts/system_prompt_hungarian.txt')

In [42]:
hung_resp = call_openai_api(user_prompt_hung, system_prompt_hung, 30)
hung_resp

[{'name': "Quinoa saláta",
  'összetevők': ['quinoa', 'uborka', 'paradicsom', 'vöröshagyma', 'feta sajt', 'citromlé', 'olívaolaj', 'petrezselyem', 'só', 'bors']},
 {'name': 'Zöldturmix',
  'összetevők': ['kale', 'spenót', 'banán', 'alma', 'chia mag', 'mandulatej']},
 {'name': 'Brokkolis csirke',
  'összetevők': ['csirkemell', 'brokkoli', 'fokhagyma', 'szójaszósz', 'méz', 'szezámmag', 'olívaolaj']},
 {'name': 'Avokádós tojássaláta',
  'összetevők': ['tojás', 'érett avokádó', 'snidling', 'lime lé', 'joghurt', 'mustár', 'só', 'bors']},
 {'name': 'Céklasaláta dióval',
  'összetevők': ['cékla', 'dió', 'feta sajt', 'szárított áfonya', 'olívaolaj', 'balzsamecet']},
 {'name': 'Lencsesaláta',
  'összetevők': ['lencse', 'cékla', 'sárgarépa', 'petrezselyem', 'feta sajt', 'olívaolaj', 'balzsamecet']},
 {'name': 'Kókusztejes csirke curry',
  'összetevők': ['csirkemell', 'kókusztej', 'curry por', 'hagyma', 'fokhagyma', 'gyömbér', 'brokkoli', 'kaliforniai paprika']},
 {'name': 'Rántott cukkini',
  'ö

['[{\'name\': "Quinoa saláta",\n  \'összetevők\': [\'quinoa\', \'uborka\', \'paradicsom\', \'vöröshagyma\', \'feta sajt\', \'citromlé\', \'olívaolaj\', \'petrezselyem\', \'só\', \'bors\']},\n {\'name\': \'Zöldturmix\',\n  \'összetevők\': [\'kale\', \'spenót\', \'banán\', \'alma\', \'chia mag\', \'mandulatej\']},\n {\'name\': \'Brokkolis csirke\',\n  \'összetevők\': [\'csirkemell\', \'brokkoli\', \'fokhagyma\', \'szójaszósz\', \'méz\', \'szezámmag\', \'olívaolaj\']},\n {\'name\': \'Avokádós tojássaláta\',\n  \'összetevők\': [\'tojás\', \'érett avokádó\', \'snidling\', \'lime lé\', \'joghurt\', \'mustár\', \'só\', \'bors\']},\n {\'name\': \'Céklasaláta dióval\',\n  \'összetevők\': [\'cékla\', \'dió\', \'feta sajt\', \'szárított áfonya\', \'olívaolaj\', \'balzsamecet\']},\n {\'name\': \'Lencsesaláta\',\n  \'összetevők\': [\'lencse\', \'cékla\', \'sárgarépa\', \'petrezselyem\', \'feta sajt\', \'olívaolaj\', \'balzsamecet\']},\n {\'name\': \'Kókusztejes csirke curry\',\n  \'összetevők\': [\

In [43]:
# with open('hungarian_resp.pkl', 'wb') as f:
#     pickle.dump(hung_resp, f)

In [193]:
with open('responses/hungarian_resp.pkl', 'rb') as f:
    hungarian_resp = pickle.load(f)

In [194]:
hungarian_resp

['[{\'name\': "Quinoa saláta",\n  \'összetevők\': [\'quinoa\', \'uborka\', \'paradicsom\', \'vöröshagyma\', \'feta sajt\', \'citromlé\', \'olívaolaj\', \'petrezselyem\', \'só\', \'bors\']},\n {\'name\': \'Zöldturmix\',\n  \'összetevők\': [\'kale\', \'spenót\', \'banán\', \'alma\', \'chia mag\', \'mandulatej\']},\n {\'name\': \'Brokkolis csirke\',\n  \'összetevők\': [\'csirkemell\', \'brokkoli\', \'fokhagyma\', \'szójaszósz\', \'méz\', \'szezámmag\', \'olívaolaj\']},\n {\'name\': \'Avokádós tojássaláta\',\n  \'összetevők\': [\'tojás\', \'érett avokádó\', \'snidling\', \'lime lé\', \'joghurt\', \'mustár\', \'só\', \'bors\']},\n {\'name\': \'Céklasaláta dióval\',\n  \'összetevők\': [\'cékla\', \'dió\', \'feta sajt\', \'szárított áfonya\', \'olívaolaj\', \'balzsamecet\']},\n {\'name\': \'Lencsesaláta\',\n  \'összetevők\': [\'lencse\', \'cékla\', \'sárgarépa\', \'petrezselyem\', \'feta sajt\', \'olívaolaj\', \'balzsamecet\']},\n {\'name\': \'Kókusztejes csirke curry\',\n  \'összetevők\': [\

In [195]:
trans_hung_resp = translate_to_eng(hungarian_resp)

In [196]:
trans_hung_resp

['[{\'name\': "quinoa salad",\n  \'ingredients\': [\'quinoa\', \'cucumber\', \'tomato\', \'red onion\', \'feta cheese\', \'lemon juice\', \'olive oil\', \'parsley\', \'salt\', \'pepper\']},\n {\'name\': \'green smoothie\',\n  \'ingredients\': [\'kale\', \'spinach\', \'banana\', \'apple\', \'chia seed\', \'almond milk\']},\n {\'name\': \'broccoli chicken\',\n  \'ingredients\': [\'chicken breast\', \'broccoli\', \'garlic\', \'soy sauce\', \'honey\', \'sesame seeds\', \'olive oil\']},\n {\'name\': \'avocado egg salad\',\n  \'ingredients\': [\'egg\', \'ripe avocado\', \'snidling\', \'lime juice\', \'yogurt\', \'mustard\', \'salt\', \'pepper\']},\n {\'name\': \'beet salad with walnuts\',\n  \'ingredients\': [\'beetroot\', \'nuts\', \'feta cheese\', \'dried cranberries\', \'olive oil\', \'balsamic vinegar\']},\n {\'name\': \'lentil salad\',\n  \'ingredients\': [\'lentils\', \'beetroot\', \'carrot\', \'parsley\', \'feta cheese\', \'olive oil\', \'balsamic vinegar\']},\n {\'name\': \'coconut m

In [197]:
hung_df, hung_err = create_dataframe_from_responses_2(trans_hung_resp)

Error parsing response at index 12: unterminated string literal (detected at line 9) (<unknown>, line 9)
Error parsing response at index 28: closing parenthesis ']' does not match opening parenthesis '{' (<unknown>, line 11)


In [198]:
hung_df

Unnamed: 0,name,ingredients
0,quinoa salad,"[quinoa, cucumber, tomato, red onion, feta che..."
1,green smoothie,"[kale, spinach, banana, apple, chia seed, almo..."
2,broccoli chicken,"[chicken breast, broccoli, garlic, soy sauce, ..."
3,avocado egg salad,"[egg, ripe avocado, snidling, lime juice, yogu..."
4,beet salad with walnuts,"[beetroot, nuts, feta cheese, dried cranberrie..."
...,...,...
275,lentil soup,"[lentils, carrots, starch-free corn, spinach, ..."
276,tofu stir-fry,"[tofu, broccoli, onion, pepper, soy sauce, ses..."
277,fibrous fruit smoothie,"[banana, blackberry, blueberry, basic yogurt, ..."
278,chickpea salad,"[chickpeas, red onion, cherry tomatoes, cucumb..."


In [201]:
hung_err

[(12,
  '[{\'name\': "quinoa salad",\n  \'ingredients\': [\'quinoa\', \'cherry tomatoes\', \'cucumber\', \'red onion\', \'feta cheese\', \'olive oil\', \'lemon juice\', \'parsley\', \'salt\', \'pepper\']},\n {\'name\': "vegetable stir-fry",\n  \'ingredients\': [\'broccoli\', \'carrot\', \'red pepper\', \'green beans\', \'soy sauce\', \'sesame seeds\', \'garlic\', \'ginger\', \'sesame oil\']},\n {\'name\': "avocado chicken wrap",\n  \'ingredients\': [\'chicken breast\', \'avocado\', \'romaine lettuce\', \'tomato\', \'whole wheat tortilla\', \'yogurt\', \'garlic powder\', \'lime juice\', \'salt\', \'pepper\']} ,\n {\'name\': "hummus",\n  \'ingredients\': [\'chickpeas\', \'tahini\', \'olive oil\', \'lemon juice\', \'garlic\', \'salt\', \'paprika\', \'parsley\']},\n {\'so..'),
 (28,
  '[{\'name\': \'broccoli cream soup\',\n  \'ingredients\': [\'broccoli\', \'onion\', \'garlic\', \'celery\', \'salt\', \'white pepper\', \'olive oil\', \'vegetable stock\']},\n {\'name\': \'quinoa salad\',\n  

In [199]:
all_dishes = []

# Regex to find dishes with different 'name' labels
dish_pattern = r"\{\s*'(?:m|the )?name'\s*:\s*'([^']+)',\s*'ingredients'\s*:\s*\[([^]]+)\]\}"

# Process each item in the data list
for _, dishes_str in hung_err:
    # Adjust quotes for matching
    adjusted_str = dishes_str.replace('"', "'")
    # Find all matches in the current string
    matches = re.findall(dish_pattern, adjusted_str)
    if not matches:
        print("No matches found in:", dishes_str)
    for match in matches:
        dish_name = match[0]
        # Remove extra quotes and split ingredients into a list
        ingredients_list = [ingredient.strip().strip("'") for ingredient in match[1].split(',')]
        all_dishes.append({'name': dish_name, 'ingredients': ingredients_list})

In [200]:
hung_err_df = pd.DataFrame(all_dishes)
hung_err_df

Unnamed: 0,name,ingredients
0,quinoa salad,"[quinoa, cherry tomatoes, cucumber, red onion,..."
1,vegetable stir-fry,"[broccoli, carrot, red pepper, green beans, so..."
2,avocado chicken wrap,"[chicken breast, avocado, romaine lettuce, tom..."
3,hummus,"[chickpeas, tahini, olive oil, lemon juice, ga..."
4,broccoli cream soup,"[broccoli, onion, garlic, celery, salt, white ..."
5,quinoa salad,"[quinoa, cherry tomatoes, cucumber, red onion,..."
6,sweet potato soup,"[sweet potato, onion, garlic, ginger, coconut ..."
7,tofu stir-fry,"[tofu, broccoli, carrot, red pepper, soy sauce..."


In [204]:
new = call_openai_api(user_prompt_hung, system_prompt_hung, 2)
new

[{'name': "avokádós csicseriborsó saláta",
  'összetevők': ['avokádó', 'csicseriborsó', 'koktélparadicsom', 'uborka', 'vöröshagyma', 'citromlé', 'olívaolaj', 'só', 'frissen őrölt fekete bors', 'petrezselyem']},
 {'name': "quinoa és fekete bab",
  'összetevők': ['quinoa', 'fekete bab', 'kukorica', 'piros paprika', 'avokádó', 'lime', 'olívaolaj', 'köménymag', 'chili por', 'fokhagymapor', 'só']},
 {'name': 'lazacos salsa',
  'összetevők': ['lazac filé', 'lime', 'mango', 'piros hagyma', 'jalapeno', 'koriander', 'olívaolaj', 'só', 'fekete bors']},
 {'name': 'spenótos omlett',
  'összetevők': ['tojás', 'friss spenót', 'feta sajt', 'tejföl', 'vaj', 'só', 'fekete bors']},
 {'name': 'mandulás energiaszelet',
  'összetevők': ['mandula', 'aszalt áfonya', 'kókuszreszelék', 'méz', 'chia mag']},
 {'name': 'édesburgonya leves',
  'összetevők': ['édesburgonya', 'hagyma', 'fokhagyma', 'gyömbér', 'kókusztej', 'csirkeleves alaplé', 'kurkuma', 'chili pehely', 'koriander']},
 {'name': 'tofu stir-fry',
  'ö

['[{\'name\': "avokádós csicseriborsó saláta",\n  \'összetevők\': [\'avokádó\', \'csicseriborsó\', \'koktélparadicsom\', \'uborka\', \'vöröshagyma\', \'citromlé\', \'olívaolaj\', \'só\', \'frissen őrölt fekete bors\', \'petrezselyem\']},\n {\'name\': "quinoa és fekete bab",\n  \'összetevők\': [\'quinoa\', \'fekete bab\', \'kukorica\', \'piros paprika\', \'avokádó\', \'lime\', \'olívaolaj\', \'köménymag\', \'chili por\', \'fokhagymapor\', \'só\']},\n {\'name\': \'lazacos salsa\',\n  \'összetevők\': [\'lazac filé\', \'lime\', \'mango\', \'piros hagyma\', \'jalapeno\', \'koriander\', \'olívaolaj\', \'só\', \'fekete bors\']},\n {\'name\': \'spenótos omlett\',\n  \'összetevők\': [\'tojás\', \'friss spenót\', \'feta sajt\', \'tejföl\', \'vaj\', \'só\', \'fekete bors\']},\n {\'name\': \'mandulás energiaszelet\',\n  \'összetevők\': [\'mandula\', \'aszalt áfonya\', \'kókuszreszelék\', \'méz\', \'chia mag\']},\n {\'name\': \'édesburgonya leves\',\n  \'összetevők\': [\'édesburgonya\', \'hagyma\',

In [208]:
trans_hung_new = translate_to_eng(new)

In [209]:
hung_new_df, hung_new_err = create_dataframe_from_responses_2(trans_hung_new)

In [210]:
hung_new_df

Unnamed: 0,name,ingredients
0,avocado chickpea salad,"[avocado, chickpeas, cocktail tomatoes, cucumb..."
1,quinoa and black beans,"[quinoa, black beans, corn, red pepper, avocad..."
2,salmon salsa,"[salmon fillet, lime, mango, red onion, jalape..."
3,spinach omelette,"[egg, fresh spinach, feta cheese, sour cream, ..."
4,almond energy bar,"[almonds, dried cranberries, shredded coconut,..."
5,sweet potato soup,"[sweet potato, onion, garlic, ginger, coconut ..."
6,tofu stir-fry,"[tofu, fresh vegetable mixture (broccoli, carr..."
7,greek yogurt stuffed peppers,"[red bell pepper, greek yogurt, snake, garlic,..."
8,coconut milk chia pudding,"[chia seeds, coconut milk, honey, vanilla extr..."
9,broccoli chicken,"[chicken breast, broccoli, soy sauce, garlic, ..."


In [241]:
hung_result_df = pd.concat([hung_df, hung_new_df], axis=0)

In [450]:
hung_result_df.reset_index(drop=True, inplace=True)

In [451]:
hung_result_df

Unnamed: 0,name,ingredients
0,quinoa salad,"[quinoa, cucumber, tomato, red onion, feta che..."
1,green smoothie,"[kale, spinach, banana, apple, chia seed, almo..."
2,broccoli chicken,"[chicken breast, broccoli, garlic, soy sauce, ..."
3,avocado egg salad,"[egg, ripe avocado, snidling, lime juice, yogu..."
4,beet salad with walnuts,"[beetroot, nuts, feta cheese, dried cranberrie..."
...,...,...
295,sweet potato meatloaf,"[sweet potato, black bean, coriander, onion, g..."
296,broccoli cream soup,"[broccoli, onion, garlic, celery, apple, cocon..."
297,chickpea curry,"[chickpeas, coconut milk, curry powder, onion,..."
298,spinach casserole,"[spinach, egg, feta cheese, sour cream, onion,..."


In [452]:
hung_result_df.to_pickle("../dataset/hung_result.pkl")

### Collect Hindi Version

In [219]:
user_prompt_hindi = generate_user_prompt('prompts/user_prompt_hindi.txt')
system_prompt_hindi = generate_system_prompt('prompts/system_prompt_hindi.txt')

In [47]:
hindi_resp = call_openai_api(user_prompt_hindi, system_prompt_hindi, 30)
hindi_resp

[{'नाम': 'क्विनोआ सलाद',
  'सामग्री': ['क्विनोआ', 'खीरा', 'टमाटर', 'लाल प्याज', 'जैतून का तेल', 'नींबू का रस', 'धनिया', 'नमक', 'काली मिर्च']},
 {'नाम': 'ओट्स और ब्लूबेरी स्मूथी',
  'सामग्री': ['ओट्स', 'ब्लूबेरी', 'दही', 'शहद', 'वेनिला एक्सट्रेक्ट', 'दूध']},
 {'नाम': 'चिकन सलाद',
  'सामग्री': ['उबला हुआ चिकन', 'हरा सलाद', 'खीरा', 'टमाटर', 'गाजर', 'लेमन ड्रेसिंग']},
 {'नाम': 'दाल तड़का',
  'सामग्री': ['तूर दाल', 'प्याज', 'लहसुन', 'टमाटर', 'हल्दी', 'जीरा', 'मस्तर्द ऑयल', 'नमक', 'हरी मिर्च', 'धनिया']},
 {'नाम': 'ग्रील्ड वेजिटेबल सैंडविच',
  'सामग्री': ['ब्राउन ब्रेड', 'ज़ुकीनी', 'बेल पेपर', 'प्याज़', 'पनीर', 'पेस्टो सॉस']},
 {'नाम': 'वेजिटेबल सूप',
  'सामग्री': ['गाजर', 'हरी बीन्स', 'मटर', 'शिमला मिर्च', 'प्याज', 'लहसुन', 'वेजिटेबल ब्रोथ', 'बे पत्ती', 'नमक', 'काली मिर्च']},
 {'नाम': 'तुरई की सब्जी',
  'सामग्री': ['तुरई', 'प्याज', 'टमाटर', 'लहसुन', 'जीरा', 'हल्दी', 'मिर्च पाउडर', 'धनिया पाउडर', 'नमक', 'तेल']},
 {'नाम': 'राजमा सलाद',
  'सामग्री': ['राजमा', 'लाल प्याज', 'टमाटर', 'खीरा', 'धनिया

["[{'नाम': 'क्विनोआ सलाद',\n  'सामग्री': ['क्विनोआ', 'खीरा', 'टमाटर', 'लाल प्याज', 'जैतून का तेल', 'नींबू का रस', 'धनिया', 'नमक', 'काली मिर्च']},\n {'नाम': 'ओट्स और ब्लूबेरी स्मूथी',\n  'सामग्री': ['ओट्स', 'ब्लूबेरी', 'दही', 'शहद', 'वेनिला एक्सट्रेक्ट', 'दूध']},\n {'नाम': 'चिकन सलाद',\n  'सामग्री': ['उबला हुआ चिकन', 'हरा सलाद', 'खीरा', 'टमाटर', 'गाजर', 'लेमन ड्रेसिंग']},\n {'नाम': 'दाल तड़का',\n  'सामग्री': ['तूर दाल', 'प्याज', 'लहसुन', 'टमाटर', 'हल्दी', 'जीरा', 'मस्तर्द ऑयल', 'नमक', 'हरी मिर्च', 'धनिया']},\n {'नाम': 'ग्रील्ड वेजिटेबल सैंडविच',\n  'सामग्री': ['ब्राउन ब्रेड', 'ज़ुकीनी', 'बेल पेपर', 'प्याज़', 'पनीर', 'पेस्टो सॉस']},\n {'नाम': 'वेजिटेबल सूप',\n  'सामग्री': ['गाजर', 'हरी बीन्स', 'मटर', 'शिमला मिर्च', 'प्याज', 'लहसुन', 'वेजिटेबल ब्रोथ', 'बे पत्ती', 'नमक', 'काली मिर्च']},\n {'नाम': 'तुरई की सब्जी',\n  'सामग्री': ['तुरई', 'प्याज', 'टमाटर', 'लहसुन', 'जीरा', 'हल्दी', 'मिर्च पाउडर', 'धनिया पाउडर', 'नमक', 'तेल']},\n {'नाम': 'राजमा सलाद',\n  'सामग्री': ['राजमा', 'लाल प्याज', 'टमाटर

In [48]:
# with open('hindi_resp.pkl', 'wb') as f:
#     pickle.dump(hindi_resp, f)

In [214]:
with open('responses/hindi_resp.pkl', 'rb') as f:
    hindi_resp = pickle.load(f)

In [215]:
hindi_resp

["[{'नाम': 'क्विनोआ सलाद',\n  'सामग्री': ['क्विनोआ', 'खीरा', 'टमाटर', 'लाल प्याज', 'जैतून का तेल', 'नींबू का रस', 'धनिया', 'नमक', 'काली मिर्च']},\n {'नाम': 'ओट्स और ब्लूबेरी स्मूथी',\n  'सामग्री': ['ओट्स', 'ब्लूबेरी', 'दही', 'शहद', 'वेनिला एक्सट्रेक्ट', 'दूध']},\n {'नाम': 'चिकन सलाद',\n  'सामग्री': ['उबला हुआ चिकन', 'हरा सलाद', 'खीरा', 'टमाटर', 'गाजर', 'लेमन ड्रेसिंग']},\n {'नाम': 'दाल तड़का',\n  'सामग्री': ['तूर दाल', 'प्याज', 'लहसुन', 'टमाटर', 'हल्दी', 'जीरा', 'मस्तर्द ऑयल', 'नमक', 'हरी मिर्च', 'धनिया']},\n {'नाम': 'ग्रील्ड वेजिटेबल सैंडविच',\n  'सामग्री': ['ब्राउन ब्रेड', 'ज़ुकीनी', 'बेल पेपर', 'प्याज़', 'पनीर', 'पेस्टो सॉस']},\n {'नाम': 'वेजिटेबल सूप',\n  'सामग्री': ['गाजर', 'हरी बीन्स', 'मटर', 'शिमला मिर्च', 'प्याज', 'लहसुन', 'वेजिटेबल ब्रोथ', 'बे पत्ती', 'नमक', 'काली मिर्च']},\n {'नाम': 'तुरई की सब्जी',\n  'सामग्री': ['तुरई', 'प्याज', 'टमाटर', 'लहसुन', 'जीरा', 'हल्दी', 'मिर्च पाउडर', 'धनिया पाउडर', 'नमक', 'तेल']},\n {'नाम': 'राजमा सलाद',\n  'सामग्री': ['राजमा', 'लाल प्याज', 'टमाटर

In [216]:
trans_hindi_resp = translate_to_eng(hindi_resp)

In [217]:
hindi_df, hindi_err = create_dataframe_from_responses_2(trans_hindi_resp)

Error parsing response at index 0: '[' was never closed (<unknown>, line 1)
Error parsing response at index 1: unterminated string literal (detected at line 5) (<unknown>, line 5)
Error parsing response at index 3: unterminated string literal (detected at line 11) (<unknown>, line 11)
Error parsing response at index 4: unterminated string literal (detected at line 18) (<unknown>, line 18)
Error parsing response at index 5: unterminated string literal (detected at line 11) (<unknown>, line 11)
Error parsing response at index 12: unterminated string literal (detected at line 5) (<unknown>, line 5)
Error parsing response at index 13: '[' was never closed (<unknown>, line 1)
Error parsing response at index 14: '[' was never closed (<unknown>, line 1)
Error parsing response at index 19: unterminated string literal (detected at line 17) (<unknown>, line 17)
Error parsing response at index 20: '[' was never closed (<unknown>, line 1)
Error parsing response at index 21: '[' was never closed (<

In [218]:
hindi_df

Unnamed: 0,name,ingredients
0,quinoa salad,"[quinoa, cherry tomatoes, cucumber, red onion,..."
1,oats and chia seed pudding,"[oats, chia seeds, milk or milk alternative, h..."
2,broccoli soup,"[broccoli, onion, garlic, vegetable stock, sal..."
3,turkey and vegetable stir fry,"[turkey breast, red bell pepper, onion, brocco..."
4,smoothie bowl,"[frozen berries, banana, spirulina powder, mil..."
...,...,...
145,vegetable smoothie,"[spinach, banana, yoghurt, chia seeds, water]"
146,tofu salad,"[tofu, mixed salad greens, tomatoes, cucumber,..."
147,broccoli and chicken casserole,"[broccoli, shredded chicken, low fat milk, oni..."
148,fruit yogurt parfait,"[greek yogurt, honey, mixed berries, granola]"


In [220]:
hindi_new = call_openai_api(user_prompt_hindi, system_prompt_hindi, 5)
hindi_new

[{'नाम': 'क्विनोआ सलाद',
  'सामग्री': [
   'क्विनोआ',
   'खीरा',
   'टमाटर',
   'लाल प्याज',
   'लेमन जूस',
   'जैतून का तेल',
   'काली मिर्च',
   'नमक',
   'ताजा धनिया']},
 {'नाम': 'ओटमील',
  'सामग्री': [
   'ओट्स',
   'पानी या दूध',
   'कटा हुआ फल',
   'नट्स',
   'शहद']},
 {'नाम': 'चिकन सूप',
  'सामग्री': [
   'चिकन ब्रेस्ट',
   'गाजर',
   'अजवाइन',
   'प्याज',
   'चिकन स्टॉक',
   'नमक',
   'काली मिर्च',
   'हरी धनिया']},
 {'नाम': 'ग्रिल्ड सैल्मन',
  'सामग्री': [
   'सैल्मन फिललेट्स',
   'लेमन जूस',
   'जैतून का तेल',
   'नमक',
   'काली मिर्च',
   'ताजा अजवायन की पत्ती']},
 {'नाम': 'टफू स्टिर-फ्राई',
  'सामग्री': [
   'टोफू',
   'विविध सब्जियाँ (ब्रॉकली, बेल पेपर, कटी सब्जियां)',
   'सोया सॉस',
   'अदरक',
   'लहसुन',
   'तिल का तेल']},
 {'नाम': 'वेजिटेबल करी',
  'सामग्री': [
   'करी पाउडर',
   'नारियल दूध',
   'विविध सब्जियाँ',
   'प्याज',
   'टमाटर',
   'लहसुन',
   'अदरक',
   'नमक']},
 {'नाम': 'ब्राउन राइस पिलाफ',
  'सामग्री': [
   'ब्राउन राइस',
   'चिकन या सब्जी स्टॉक',
   'प्याज'

["[{'नाम': 'क्विनोआ सलाद',\n  'सामग्री': [\n   'क्विनोआ',\n   'खीरा',\n   'टमाटर',\n   'लाल प्याज',\n   'लेमन जूस',\n   'जैतून का तेल',\n   'काली मिर्च',\n   'नमक',\n   'ताजा धनिया']},\n {'नाम': 'ओटमील',\n  'सामग्री': [\n   'ओट्स',\n   'पानी या दूध',\n   'कटा हुआ फल',\n   'नट्स',\n   'शहद']},\n {'नाम': 'चिकन सूप',\n  'सामग्री': [\n   'चिकन ब्रेस्ट',\n   'गाजर',\n   'अजवाइन',\n   'प्याज',\n   'चिकन स्टॉक',\n   'नमक',\n   'काली मिर्च',\n   'हरी धनिया']},\n {'नाम': 'ग्रिल्ड सैल्मन',\n  'सामग्री': [\n   'सैल्मन फिललेट्स',\n   'लेमन जूस',\n   'जैतून का तेल',\n   'नमक',\n   'काली मिर्च',\n   'ताजा अजवायन की पत्ती']},\n {'नाम': 'टफू स्टिर-फ्राई',\n  'सामग्री': [\n   'टोफू',\n   'विविध सब्जियाँ (ब्रॉकली, बेल पेपर, कटी सब्जियां)',\n   'सोया सॉस',\n   'अदरक',\n   'लहसुन',\n   'तिल का तेल']},\n {'नाम': 'वेजिटेबल करी',\n  'सामग्री': [\n   'करी पाउडर',\n   'नारियल दूध',\n   'विविध सब्जियाँ',\n   'प्याज',\n   'टमाटर',\n   'लहसुन',\n   'अदरक',\n   'नमक']},\n {'नाम': 'ब्राउन राइस पिलाफ',\n  'सामग्री':

In [221]:
trans_hindi_new = translate_to_eng(hindi_new)

In [222]:
hindi_new_df, hindi_new_err = create_dataframe_from_responses_2(trans_hindi_new)

Error parsing response at index 1: unterminated string literal (detected at line 10) (<unknown>, line 10)
Error parsing response at index 2: unterminated string literal (detected at line 17) (<unknown>, line 17)
Error parsing response at index 3: unterminated string literal (detected at line 5) (<unknown>, line 5)


In [223]:
hindi_new_df

Unnamed: 0,name,ingredients
0,quinoa salad,"[quinoa, cucumber, tomatoes, red onion, lemon ..."
1,oatmeal,"[oats, water or milk, chopped fruit, nuts, honey]"
2,chicken soup,"[chicken breast, carrots, celery, onion, chick..."
3,grilled salmon,"[salmon fillets, lemon juice, olive oil, salt,..."
4,tofu stir-fry,"[tofu, miscellaneous vegetables (broccoli, bel..."
5,vegetable curry,"[curry powder, coconut milk, miscellaneous veg..."
6,brown rice pilaf,"[brown rice, chicken or vegetable stock, onion..."
7,vegan chili,"[black beans, kidney beans, tomato paste, onio..."
8,smoothie bowls,"[frozen berries, banana, greek yogurt, honey, ..."
9,greek salad,"[romaine lettuce, cucumber, tomatoes, kalamata..."


In [224]:
hindi_new2 = call_openai_api(user_prompt_hindi, system_prompt_hindi, 3)
hindi_new2

[{'नाम': 'क्विनोआ सलाद',
  'सामग्री': ['क्विनोआ', 'खीरा', 'टमाटर', 'नींबू का रस', 'जैतून का तेल', 'नमक', 'काली मिर्च', 'धनिया']},
 {'नाम': 'ओट्स और चिया सीड पुडिंग',
  'सामग्री': ['ओट्स', 'चिया सीड्स', 'दूध या नारियल का दूध', 'शहद', 'वनीला एक्सट्रैक्ट', 'कटे हुए फल']},
 {'नाम': 'तुर्की और सब्जी सूप',
  'सामग्री': ['तुर्की का मांस', 'गाजर', 'अजवाइन', 'प्याज', 'टमाटर का पेस्ट', 'चिकन शोरबा', 'नमक', 'काली मिर्च', 'थाईम']},
 {'नाम': 'मिश्रित बेरी स्मूदी',
  'सामग्री': ['ब्लूबेरी', 'रसभरी', 'स्ट्रॉबेरी', 'दही', 'जैतून का तेल', 'शहद']},
 {'नाम': 'पालक और केले की स्मूदी',
  'सामग्री': ['पालक', 'केला', 'दूध या सोया मिल्क', 'शहद', 'फ्लेक्स सीड्स']},
 {'नाम': 'चिकन सलाद',
  'सामग्री': ['बोनलेस चिकन ब्रेस्ट', 'मिक्स्ड सलाद ग्रीन्स', 'टमाटर', 'खीरा', 'जैतून का तेल', 'नींबू का रस', 'नमक', 'काली मिर्च', 'धनिया']},
 {'नाम': 'शकरकंदी और पालक की टिक्की',
  'सामग्री': ['शकरकंदी', 'पालक', 'बेसन', 'साबुदाना', 'नमक', 'जीरा पाउडर', 'धनिया']},
 {'नाम': 'राजमा करी',
  'सामग्री': ['राजमा', 'प्याज', 'लहसुन', 'ट

["[{'नाम': 'क्विनोआ सलाद',\n  'सामग्री': ['क्विनोआ', 'खीरा', 'टमाटर', 'नींबू का रस', 'जैतून का तेल', 'नमक', 'काली मिर्च', 'धनिया']},\n {'नाम': 'ओट्स और चिया सीड पुडिंग',\n  'सामग्री': ['ओट्स', 'चिया सीड्स', 'दूध या नारियल का दूध', 'शहद', 'वनीला एक्सट्रैक्ट', 'कटे हुए फल']},\n {'नाम': 'तुर्की और सब्जी सूप',\n  'सामग्री': ['तुर्की का मांस', 'गाजर', 'अजवाइन', 'प्याज', 'टमाटर का पेस्ट', 'चिकन शोरबा', 'नमक', 'काली मिर्च', 'थाईम']},\n {'नाम': 'मिश्रित बेरी स्मूदी',\n  'सामग्री': ['ब्लूबेरी', 'रसभरी', 'स्ट्रॉबेरी', 'दही', 'जैतून का तेल', 'शहद']},\n {'नाम': 'पालक और केले की स्मूदी',\n  'सामग्री': ['पालक', 'केला', 'दूध या सोया मिल्क', 'शहद', 'फ्लेक्स सीड्स']},\n {'नाम': 'चिकन सलाद',\n  'सामग्री': ['बोनलेस चिकन ब्रेस्ट', 'मिक्स्ड सलाद ग्रीन्स', 'टमाटर', 'खीरा', 'जैतून का तेल', 'नींबू का रस', 'नमक', 'काली मिर्च', 'धनिया']},\n {'नाम': 'शकरकंदी और पालक की टिक्की',\n  'सामग्री': ['शकरकंदी', 'पालक', 'बेसन', 'साबुदाना', 'नमक', 'जीरा पाउडर', 'धनिया']},\n {'नाम': 'राजमा करी',\n  'सामग्री': ['राजमा', 'प्

In [225]:
trans_hindi_new2 = translate_to_eng(hindi_new2)

In [226]:
hindi_new_df2, hindi_new_err2 = create_dataframe_from_responses_2(trans_hindi_new2)

Error parsing response at index 1: unterminated string literal (detected at line 17) (<unknown>, line 17)


In [227]:
hindi_new_df2

Unnamed: 0,name,ingredients
0,quinoa salad,"[quinoa, cucumber, tomatoes, lemon juice, oliv..."
1,oats and chia seed pudding,"[oats, chia seeds, milk or coconut milk, honey..."
2,turkey and vegetable soup,"[turkey meat, carrots, celery, onion, tomato p..."
3,mixed berry smoothie,"[blueberries, raspberries, strawberries, yogur..."
4,spinach and banana smoothie,"[spinach, banana, milk or soy milk, honey, fla..."
5,chicken salad,"[boneless chicken breast, mixed salad greens, ..."
6,sweet potato and spinach tikki,"[sweet potato, spinach, gram flour, sabudana, ..."
7,rajma curry,"[rajma, onion, garlic, tomatoes, cumin seeds, ..."
8,vegetable stir fry,"[broccoli, carrot, capsicum, onion, ginger, ga..."
9,moong dal khichdi,"[moong dal, rice, turmeric powder, cumin seeds..."


In [232]:
hindi_new3 = call_openai_api(user_prompt_hindi, system_prompt_hindi, 5)
hindi_new3

[{'नाम': 'ओट्स उपमा',
  'सामग्री': ['ओट्स', 'हरी मिर्च', 'प्याज', 'सरसों के बीज', 'उड़द दाल', 'करी पत्ता', 'हल्दी', 'नमक', 'तेल', 'हरा धनिया']},
 {'नाम': 'क्विनोआ सलाद',
  'सामग्री': ['क्विनोआ', 'खीरा', 'टमाटर', 'प्याज', 'हरी मिर्च', 'हरा धनिया', 'नींबू का रस', 'ऑलिव ऑयल', 'काली मिर्च', 'नमक']},
 {'नाम': 'मक्खना स्नैक',
  'सामग्री': ['मक्खना', 'हल्दी', 'लाल मिर्च पाउडर', 'चाट मसाला', 'नमक', 'घी']},
 {'नाम': 'चना सलाद',
  'सामग्री': ['काबुली चना', 'खीरा', 'टमाटर', 'प्याज', 'हरी मिर्च', 'हरा धनिया', 'नींबू का रस', 'चाट मसाला', 'काली मिर्च', 'नमक']},
 {'नाम': 'पालक चिकन',
  'सामग्री': ['चिकन ब्रेस्ट', 'पालक', 'दही', 'अदरक-लहसुन पेस्ट', 'हल्दी', 'धनिया पाउडर', 'गरम मसाला', 'नमक', 'तेल']},
 {'नाम': 'कच्चे पपीते का सलाद',
  'सामग्री': ['कच्चा पपीता', 'हरी मिर्च', 'टमाटर', 'हरा धनिया', 'मूंगफली', 'नींबू का रस', 'चीनी', 'सोया सॉस', 'सेंधा नमक']},
 {'नाम': 'ब्रोकोली सूप',
  'सामग्री': ['ब्रोकोली', 'प्याज', 'लहसुन', 'वेजिटेबल स्टॉक', 'ऑलिव ऑयल', 'नमक', 'काली मिर्च', 'क्रीम']},
 {'नाम': 'स्वीट पो

["[{'नाम': 'ओट्स उपमा',\n  'सामग्री': ['ओट्स', 'हरी मिर्च', 'प्याज', 'सरसों के बीज', 'उड़द दाल', 'करी पत्ता', 'हल्दी', 'नमक', 'तेल', 'हरा धनिया']},\n {'नाम': 'क्विनोआ सलाद',\n  'सामग्री': ['क्विनोआ', 'खीरा', 'टमाटर', 'प्याज', 'हरी मिर्च', 'हरा धनिया', 'नींबू का रस', 'ऑलिव ऑयल', 'काली मिर्च', 'नमक']},\n {'नाम': 'मक्खना स्नैक',\n  'सामग्री': ['मक्खना', 'हल्दी', 'लाल मिर्च पाउडर', 'चाट मसाला', 'नमक', 'घी']},\n {'नाम': 'चना सलाद',\n  'सामग्री': ['काबुली चना', 'खीरा', 'टमाटर', 'प्याज', 'हरी मिर्च', 'हरा धनिया', 'नींबू का रस', 'चाट मसाला', 'काली मिर्च', 'नमक']},\n {'नाम': 'पालक चिकन',\n  'सामग्री': ['चिकन ब्रेस्ट', 'पालक', 'दही', 'अदरक-लहसुन पेस्ट', 'हल्दी', 'धनिया पाउडर', 'गरम मसाला', 'नमक', 'तेल']},\n {'नाम': 'कच्चे पपीते का सलाद',\n  'सामग्री': ['कच्चा पपीता', 'हरी मिर्च', 'टमाटर', 'हरा धनिया', 'मूंगफली', 'नींबू का रस', 'चीनी', 'सोया सॉस', 'सेंधा नमक']},\n {'नाम': 'ब्रोकोली सूप',\n  'सामग्री': ['ब्रोकोली', 'प्याज', 'लहसुन', 'वेजिटेबल स्टॉक', 'ऑलिव ऑयल', 'नमक', 'काली मिर्च', 'क्रीम']},\n {

In [233]:
trans_hindi3 = translate_to_eng(hindi_new3)

In [234]:
hindi3_df, hindi3_err = create_dataframe_from_responses_2(trans_hindi3)

Error parsing response at index 0: unterminated string literal (detected at line 35) (<unknown>, line 35)
Error parsing response at index 2: '[' was never closed (<unknown>, line 1)
Error parsing response at index 3: '[' was never closed (<unknown>, line 1)


In [235]:
hindi3_df

Unnamed: 0,name,ingredients
0,quinoa salad,"[quinoa, cucumber, tomatoes, red onion, olive ..."
1,oats and fruit smoothie,"[oats, banana, berries, almond milk, honey, ci..."
2,greek yogurt parfait,"[greek yogurt, honey, granola, fresh berries, ..."
3,chia seed pudding,"[chia seeds, coconut milk, honey, vanilla extr..."
4,sweet potato toast,"[sweet potato, avocado, lemon juice, salt, pep..."
5,broccoli and chickpea salad,"[broccoli, chickpeas, red bell pepper, onion, ..."
6,ribbon and moong dal soup,"[ridge gourd, moong dal, onion, tomatoes, cumi..."
7,vegetable stir fry,"[broccoli, carrot, bell pepper, soy sauce, gin..."
8,black chickpea chaat,"[black chickpeas, onion, tomato, cucumber, lem..."
9,spinach and corn curry,"[spinach, corn, garlic, onion, oil, salt, red ..."


In [228]:
hindi_new4 = call_openai_api(user_prompt_hindi, system_prompt_hindi, 15)
hindi_new4

[{'नाम': 'क्विनोआ सलाद',
  'सामग्री': ['क्विनोआ', 'खीरा', 'टमाटर', 'लाल शिमला मिर्च', 'प्याज', 'नींबू का रस', 'जैतून का तेल', 'नमक', 'काली मिर्च']},
 {'नाम': 'दाल का सूप',
  'सामग्री': ['मसूर दाल', 'प्याज', 'लहसुन', 'टमाटर', 'हल्दी पाउडर', 'जीरा', 'पानी']},
 {'नाम': 'चिकन सलाद',
  'सामग्री': ['चिकन ब्रेस्ट', 'हरी सलाद पत्ते', 'चेरी टमाटर', 'खीरा', 'जैतून का तेल', 'नमक', 'नींबू का रस']},
 {'नाम': 'टोफू स्टिर-फ्राई',
  'सामग्री': ['टोफू', 'मिक्स वेजिटेबल्स', 'सोया सॉस', 'लहसुन', 'अदरक', 'तिल का तेल']},
 {'नाम': 'व्होल व्हीट पेनकेक्स',
  'सामग्री': ['अखमीरित गेहूं का आटा', 'दूध', 'अंडा', 'वनस्पति तेल', 'बेकिंग पाउडर', 'शहद']},
 {'नाम': 'ओट और ब्लूबेरी स्मूदी',
  'सामग्री': ['ओट्स', 'ब्लूबेरी', 'दही', 'शहद', 'पानी']},
 {'नाम': 'चिया सीड पुडिंग',
  'सामग्री': ['चिया सीड्स', 'दूध', 'मेपल सिरप', 'वेनिला एक्सट्रैक्ट']},
 {'नाम': 'ब्रोकोली और चेडर सूप',
  'सामग्री': ['ब्रोकोली', 'प्याज', 'चेडर चीज', 'चिकन शोरबा', 'क्रीम', 'मक्खन', 'लहसुन']},
 {'नाम': 'स्पाइसी स्वीट पोटैटो फ्राईज',
  'सामग्री': 

["[{'नाम': 'क्विनोआ सलाद',\n  'सामग्री': ['क्विनोआ', 'खीरा', 'टमाटर', 'लाल शिमला मिर्च', 'प्याज', 'नींबू का रस', 'जैतून का तेल', 'नमक', 'काली मिर्च']},\n {'नाम': 'दाल का सूप',\n  'सामग्री': ['मसूर दाल', 'प्याज', 'लहसुन', 'टमाटर', 'हल्दी पाउडर', 'जीरा', 'पानी']},\n {'नाम': 'चिकन सलाद',\n  'सामग्री': ['चिकन ब्रेस्ट', 'हरी सलाद पत्ते', 'चेरी टमाटर', 'खीरा', 'जैतून का तेल', 'नमक', 'नींबू का रस']},\n {'नाम': 'टोफू स्टिर-फ्राई',\n  'सामग्री': ['टोफू', 'मिक्स वेजिटेबल्स', 'सोया सॉस', 'लहसुन', 'अदरक', 'तिल का तेल']},\n {'नाम': 'व्होल व्हीट पेनकेक्स',\n  'सामग्री': ['अखमीरित गेहूं का आटा', 'दूध', 'अंडा', 'वनस्पति तेल', 'बेकिंग पाउडर', 'शहद']},\n {'नाम': 'ओट और ब्लूबेरी स्मूदी',\n  'सामग्री': ['ओट्स', 'ब्लूबेरी', 'दही', 'शहद', 'पानी']},\n {'नाम': 'चिया सीड पुडिंग',\n  'सामग्री': ['चिया सीड्स', 'दूध', 'मेपल सिरप', 'वेनिला एक्सट्रैक्ट']},\n {'नाम': 'ब्रोकोली और चेडर सूप',\n  'सामग्री': ['ब्रोकोली', 'प्याज', 'चेडर चीज', 'चिकन शोरबा', 'क्रीम', 'मक्खन', 'लहसुन']},\n {'नाम': 'स्पाइसी स्वीट पोटैटो फ्रा

In [229]:
trans_hindi4 = translate_to_eng(hindi_new4)

In [230]:
hindi4_df, hindi4_err = create_dataframe_from_responses_2(trans_hindi4)

Error parsing response at index 2: '[' was never closed (<unknown>, line 1)
Error parsing response at index 3: '[' was never closed (<unknown>, line 1)
Error parsing response at index 4: '[' was never closed (<unknown>, line 1)
Error parsing response at index 6: '[' was never closed (<unknown>, line 1)
Error parsing response at index 7: '[' was never closed (<unknown>, line 1)
Error parsing response at index 8: '[' was never closed (<unknown>, line 1)
Error parsing response at index 9: '[' was never closed (<unknown>, line 1)
Error parsing response at index 14: unterminated string literal (detected at line 161) (<unknown>, line 161)


In [231]:
hindi4_df

Unnamed: 0,name,ingredients
0,quinoa salad,"[quinoa, cucumber, tomatoes, red bell pepper, ..."
1,lentil soup,"[lentil, onion, garlic, tomatoes, turmeric pow..."
2,chicken salad,"[chicken breast, green lettuce, cherry tomatoe..."
3,tofu stir-fry,"[tofu, mixed vegetables, soy sauce, garlic, gi..."
4,whole wheat pancakes,"[unleavened wheat flour, milk, egg, vegetable ..."
...,...,...
65,moong dal cheela,"[moong dal, green chillies, ginger, turmeric p..."
66,oats upma,"[oats, mustard seeds, urad dal, chana dal, gre..."
67,greek salad,"[cucumbers, tomatoes, onions, feta cheese, bla..."
68,roasted vegetables,"[broccoli, carrots, sweet potatoes, garlic, th..."


In [236]:
hindi_new5 = call_openai_api(user_prompt_hindi, system_prompt_hindi, 5)
hindi_new5

यहां 10 स्वस्थ व्यंजनों की सूची है, उनकी सामग्री के साथ:

1. {'नाम': 'क्विनोआ सलाद',
   'सामग्री': ['क्विनोआ', 'खीरा', 'टमाटर', 'लाल प्याज', 'ताजा धनिया', 'नींबू का रस', 'जैतून का तेल', 'काली मिर्च', 'नमक']
  }

2. {'नाम': 'मसूर दाल सूप',
   'सामग्री': ['मसूर दाल', 'प्याज', 'गाजर', 'अजवाइन', 'टमाटर', 'लहसुन', 'जीरा', 'तेल', 'पानी', 'नमक', 'काली मिर्च']
  }

3. {'नाम': 'ओट्स और नट्स स्मूथी',
   'सामग्री': ['ओट्स', 'बादाम दूध', 'केला', 'अखरोट', 'चिया सीड्स', 'शहद']
  }

4. {'नाम': 'स्वीट पोटैटो चाट',
   'सामग्री': ['शकरकंद', 'नींबू का रस', 'चाट मसाला', 'हरी मिर्च', 'ताजा धनिया', 'अनारदाना', 'नमक']
  }

5. {'नाम': 'टोफू स्टिर फ्राई',
   'सामग्री': ['टोफू', 'ब्रोकोली', 'गाजर', 'शिमला मिर्च', 'सोया सॉस', 'लहसुन', 'अदरक', 'तिल का तेल', 'तिल']
  }

6. {'नाम': 'चिकन सलाद',
   'सामग्री': ['चिकन ब्रेस्ट', 'लेट्यूस', 'चेरी टमाटर', 'खीरा', 'जैतून', 'अवोकाडो', 'फेटा चीज़', 'जैतून का तेल', 'नींबू का रस']
  }

7. {'नाम': 'स्पिनेच और मशरूम ओमलेट',
   'सामग्री': ['अंडे', 'पालक', 'मशरूम', 'प्याज', 'लहसु

["यहां 10 स्वस्थ व्यंजनों की सूची है, उनकी सामग्री के साथ:\n\n1. {'नाम': 'क्विनोआ सलाद',\n   'सामग्री': ['क्विनोआ', 'खीरा', 'टमाटर', 'लाल प्याज', 'ताजा धनिया', 'नींबू का रस', 'जैतून का तेल', 'काली मिर्च', 'नमक']\n  }\n\n2. {'नाम': 'मसूर दाल सूप',\n   'सामग्री': ['मसूर दाल', 'प्याज', 'गाजर', 'अजवाइन', 'टमाटर', 'लहसुन', 'जीरा', 'तेल', 'पानी', 'नमक', 'काली मिर्च']\n  }\n\n3. {'नाम': 'ओट्स और नट्स स्मूथी',\n   'सामग्री': ['ओट्स', 'बादाम दूध', 'केला', 'अखरोट', 'चिया सीड्स', 'शहद']\n  }\n\n4. {'नाम': 'स्वीट पोटैटो चाट',\n   'सामग्री': ['शकरकंद', 'नींबू का रस', 'चाट मसाला', 'हरी मिर्च', 'ताजा धनिया', 'अनारदाना', 'नमक']\n  }\n\n5. {'नाम': 'टोफू स्टिर फ्राई',\n   'सामग्री': ['टोफू', 'ब्रोकोली', 'गाजर', 'शिमला मिर्च', 'सोया सॉस', 'लहसुन', 'अदरक', 'तिल का तेल', 'तिल']\n  }\n\n6. {'नाम': 'चिकन सलाद',\n   'सामग्री': ['चिकन ब्रेस्ट', 'लेट्यूस', 'चेरी टमाटर', 'खीरा', 'जैतून', 'अवोकाडो', 'फेटा चीज़', 'जैतून का तेल', 'नींबू का रस']\n  }\n\n7. {'नाम': 'स्पिनेच और मशरूम ओमलेट',\n   'सामग्री': ['अंडे', 'प

In [237]:
trans_hindi5 = translate_to_eng(hindi_new5)

In [238]:
hindi5_df, hindi5_err = create_dataframe_from_responses_2(trans_hindi5)

Error parsing response at index 0: unterminated string literal (detected at line 16) (<unknown>, line 16)
Error parsing response at index 3: unterminated string literal (detected at line 11) (<unknown>, line 11)
Error parsing response at index 4: '[' was never closed (<unknown>, line 1)


In [240]:
hindi_result_df = pd.concat([hindi_df, hindi_new_df, hindi_new_df2, hindi3_df, hindi4_df, hindi5_df], axis=0)

In [453]:
hindi_result_df.reset_index(drop=True, inplace=True)

In [454]:
hindi_result_df

Unnamed: 0,name,ingredients
0,quinoa salad,"[quinoa, cherry tomatoes, cucumber, red onion,..."
1,oats and chia seed pudding,"[oats, chia seeds, milk or milk alternative, h..."
2,broccoli soup,"[broccoli, onion, garlic, vegetable stock, sal..."
3,turkey and vegetable stir fry,"[turkey breast, red bell pepper, onion, brocco..."
4,smoothie bowl,"[frozen berries, banana, spirulina powder, mil..."
...,...,...
295,vegetable stir fry,"[broccoli, carrot, red capsicum, ginger, garli..."
296,palak paneer,"[spinach, paneer, onion, tomatoes, ginger-garl..."
297,masoor dal,"[masoor dal, onion, tomatoes, turmeric powder,..."
298,tofu tikka,"[tofu, yoghurt, tandoori masala, garlic paste,..."


In [455]:
hindi_result_df.to_pickle("../dataset/hindi_result.pkl")

### Collect Irish Version

In [245]:
user_prompt_irish = generate_user_prompt('prompts/user_prompt_irish.txt')
system_prompt_irish = generate_system_prompt('prompts/system_prompt_irish.txt')

In [51]:
irish_resp = call_openai_api(user_prompt_irish, system_prompt_irish, 30)
irish_resp

[{'ainm': 'sailéad spinach agus sútha talún',
  'comhábhair': ['spinach úr', 'sútha talún', 'cnónna gallchnó', 'feta', 'olóige', 'fínéagar balsamach']},
 {'ainm': 'smoothie mango agus spionáiste',
  'comhábhair': ['mango', 'spionáiste', 'síolta chia', 'bainne almond', 'mil']},
 {'ainm': 'oideas guacamole',
  'comhábhair': ['avocado', 'trátaí', 'oigheann', 'sú aol', 'piobar cayenne', 'cilantro']},
 {'ainm': 'tacos éisc',
  'comhábhair': ['éisc', 'cabáiste', 'iomlán iógart Gréagach', 'sú aoil', 'tortillas arbhar']},
 {'ainm': 'anraith glasraí',
  'comhábhair': ['curraí dubh', 'cáiréid', 'piobair dearg', 'brocailí', 'uisce', 'luibheanna']},
 {'ainm': 'bowl quinoa',
  'comhábhair': ['quinoa', 'piobair', 'cipíní', 'uaineoil', 'pónairí gorma', 'avocado']},
 {'ainm': 'salad ceithre phónaire',
  'comhábhair': ['pónairí dubha', 'pónairí dearga', 'pónairí bán', 'peasair ghlasa', 'oigheann', 'fínéagar']},
 {'ainm': 'éisc bácáilte le luibheanna',
  'comhábhair': ['éisc', 'peirsil', 'dill', 'thyme'

["[{'ainm': 'sailéad spinach agus sútha talún',\n  'comhábhair': ['spinach úr', 'sútha talún', 'cnónna gallchnó', 'feta', 'olóige', 'fínéagar balsamach']},\n {'ainm': 'smoothie mango agus spionáiste',\n  'comhábhair': ['mango', 'spionáiste', 'síolta chia', 'bainne almond', 'mil']},\n {'ainm': 'oideas guacamole',\n  'comhábhair': ['avocado', 'trátaí', 'oigheann', 'sú aol', 'piobar cayenne', 'cilantro']},\n {'ainm': 'tacos éisc',\n  'comhábhair': ['éisc', 'cabáiste', 'iomlán iógart Gréagach', 'sú aoil', 'tortillas arbhar']},\n {'ainm': 'anraith glasraí',\n  'comhábhair': ['curraí dubh', 'cáiréid', 'piobair dearg', 'brocailí', 'uisce', 'luibheanna']},\n {'ainm': 'bowl quinoa',\n  'comhábhair': ['quinoa', 'piobair', 'cipíní', 'uaineoil', 'pónairí gorma', 'avocado']},\n {'ainm': 'salad ceithre phónaire',\n  'comhábhair': ['pónairí dubha', 'pónairí dearga', 'pónairí bán', 'peasair ghlasa', 'oigheann', 'fínéagar']},\n {'ainm': 'éisc bácáilte le luibheanna',\n  'comhábhair': ['éisc', 'peirsil'

In [52]:
# with open('irish_resp.pkl', 'wb') as f:
#     pickle.dump(irish_resp, f)

In [246]:
with open('responses/irish_resp.pkl', 'rb') as f:
    irish_resp = pickle.load(f)

In [247]:
irish_resp

["[{'ainm': 'sailéad spinach agus sútha talún',\n  'comhábhair': ['spinach úr', 'sútha talún', 'cnónna gallchnó', 'feta', 'olóige', 'fínéagar balsamach']},\n {'ainm': 'smoothie mango agus spionáiste',\n  'comhábhair': ['mango', 'spionáiste', 'síolta chia', 'bainne almond', 'mil']},\n {'ainm': 'oideas guacamole',\n  'comhábhair': ['avocado', 'trátaí', 'oigheann', 'sú aol', 'piobar cayenne', 'cilantro']},\n {'ainm': 'tacos éisc',\n  'comhábhair': ['éisc', 'cabáiste', 'iomlán iógart Gréagach', 'sú aoil', 'tortillas arbhar']},\n {'ainm': 'anraith glasraí',\n  'comhábhair': ['curraí dubh', 'cáiréid', 'piobair dearg', 'brocailí', 'uisce', 'luibheanna']},\n {'ainm': 'bowl quinoa',\n  'comhábhair': ['quinoa', 'piobair', 'cipíní', 'uaineoil', 'pónairí gorma', 'avocado']},\n {'ainm': 'salad ceithre phónaire',\n  'comhábhair': ['pónairí dubha', 'pónairí dearga', 'pónairí bán', 'peasair ghlasa', 'oigheann', 'fínéagar']},\n {'ainm': 'éisc bácáilte le luibheanna',\n  'comhábhair': ['éisc', 'peirsil'

In [248]:
trans_irish_resp = translate_to_eng(irish_resp)

In [510]:
trans_irish_resp 

["[{'name': 'spinach and strawberry salad',\n  'ingredients': ['fresh spinach', 'strawberries', 'walnuts', 'feta', 'olive', 'balsamic vinegar']},\n {'name': 'mango and spinach smoothie',\n  'ingredients': ['mango', 'spinach', 'chia seeds', 'almond milk', 'honey']},\n {'name': 'guacamole recipe',\n  'ingredients': ['avocado', 'tomatoes', 'oven', 'lime juice', 'cayenne pepper', 'cilantro']},\n {'name': 'fish tacos',\n  'ingredients': ['fish', 'cabbage', 'whole greek yogurt', 'lime juice', 'corn tortillas']},\n {'name': 'vegetable soup',\n  'ingredients': ['black curry', 'carrots', 'red peppers', 'broccoli', 'water', 'herbs']},\n {'name': 'quinoa bowl',\n  'ingredients': ['quinoa', 'peppers', 'chickens', 'lamb', 'blue beans', 'avocado']},\n {'name': 'four bean salad',\n  'ingredients': ['black beans', 'red beans', 'white beans', 'green peas', 'oven', 'vinegar']},\n {'name': 'herb baked fish',\n  'ingredients': ['fish', 'parsley', 'dill', 'thyme', 'olive', 'lemon']},\n {'name': 'bowl poke 

In [249]:
irish_df, irish_err = create_dataframe_from_responses_2(trans_irish_resp)

In [250]:
irish_df

Unnamed: 0,name,ingredients
0,spinach and strawberry salad,"[fresh spinach, strawberries, walnuts, feta, o..."
1,mango and spinach smoothie,"[mango, spinach, chia seeds, almond milk, honey]"
2,guacamole recipe,"[avocado, tomatoes, oven, lime juice, cayenne ..."
3,fish tacos,"[fish, cabbage, whole greek yogurt, lime juice..."
4,vegetable soup,"[black curry, carrots, red peppers, broccoli, ..."
...,...,...
295,turkey wraps,"[cooked turkey, cabbage leaves, avocado, tomat..."
296,fried no pain dish,"[brown rice, black beans, red peppers, zucchin..."
297,grilled tofu salad,"[tofu, mixed greens, red peppers, yellow peppe..."
298,carrot pata and carrot,"[carrot, parsley, walnuts, olive oil, cumin, s..."


In [509]:
float_rows = irish_df['ingredients'].apply(lambda x: isinstance(x, float))
rows_with_floats = irish_df[float_rows]
print("Rows containing floats:")
print(rows_with_floats)

Rows containing floats:
                                     name ingredients
73                           quinoa salad         NaN
74                    whole salmon pellet         NaN
134                                   NaN         NaN
135              fruit salad with yoghurt         NaN
138  homemade hummus with roasted peppers         NaN
165                             acai bowl         NaN


In [518]:
correct_name = {
    134: 'grilled chicken with vegetables'
}

for idx, name in correct_name.items():
    irish_df.at[idx, 'name'] = name

In [521]:
correct_ingredients = {
    73: ['quinoa', 'red peppers', 'stir-fry vegetables', 'feta', 'parsley', 'lemon', 'olive oil'],
    74: ['whole salmon', 'butter', 'sliced garlic', 'dill', 'salt', 'pepper'],
    134: ['chicken', 'red pepper', 'asparagus', 'onions', 'lime juice', 'olive oil', 'herbs'],
    135: ['raspberries', 'strawberries', 'kiwi', 'green apple', 'greek yogurt', 'natural honey', 'coconut'],
    138: ['chickpeas', 'roasted red pepper', 'tahini', 'garlic', 'lemon juice', 'sesame oil'],
    165: ['frozen acai berries', 'banana', 'orange juice', 'muesli', 'cherries', 'coconut' 'nuts']
}

for idx, ingredients in correct_ingredients.items():
    irish_df.at[idx, 'ingredients'] = ingredients

In [522]:
irish_df.to_pickle("../dataset/irish_result.pkl")

### Collect Italian Version

In [252]:
user_prompt_italian = generate_user_prompt('prompts/user_prompt_italian.txt')
system_prompt_italian = generate_system_prompt('prompts/system_prompt_italian.txt')

In [55]:
italian_resp = call_openai_api(user_prompt_italian, system_prompt_italian, 30)
italian_resp

[{'nome': 'insalata di quinoa e verdure',
  'ingredienti': ['quinoa', 'pomodorini', 'pepino', 'peperoni', 'limone', 'olio d'oliva', 'sale', 'pepe']},
 {'nome': 'smoothie verde',
  'ingredienti': ['spinaci', 'banana', 'latte di mandorla', 'semi di chia', 'miele']},
 {'nome': 'buddha bowl',
  'ingredienti': ['riso integrale', 'chickpeas', 'carote', 'cavolo rosso', 'avocado', 'semi di sesamo', 'salsa tahini']},
 {'nome': 'zuppa di lenticchie',
  'ingredienti': ['lenticchie', 'carote', 'cipolla', 'aglio', 'brodo vegetale', 'curcuma', 'pepe nero']},
 {'nome': 'insalata di ceci',
  'ingredienti': ['ceci', 'pomodorini', 'rucola', 'cipolla rossa', 'olio d'oliva', 'aceto balsamico']},
 {'nome': 'hummus fatto in casa',
  'ingredienti': ['ceci', 'tahini', 'limone', 'aglio', 'olio d'oliva', 'paprika']},
 {'nome': 'tacos vegetariani',
  'ingredienti': ['tortillas di mais', 'fagioli neri', 'avocado', 'cipolla', 'coriandolo', 'limone']},
 {'nome': 'insalata di barbabietola e arancia',
  'ingredienti'

["[{'nome': 'insalata di quinoa e verdure',\n  'ingredienti': ['quinoa', 'pomodorini', 'pepino', 'peperoni', 'limone', 'olio d'oliva', 'sale', 'pepe']},\n {'nome': 'smoothie verde',\n  'ingredienti': ['spinaci', 'banana', 'latte di mandorla', 'semi di chia', 'miele']},\n {'nome': 'buddha bowl',\n  'ingredienti': ['riso integrale', 'chickpeas', 'carote', 'cavolo rosso', 'avocado', 'semi di sesamo', 'salsa tahini']},\n {'nome': 'zuppa di lenticchie',\n  'ingredienti': ['lenticchie', 'carote', 'cipolla', 'aglio', 'brodo vegetale', 'curcuma', 'pepe nero']},\n {'nome': 'insalata di ceci',\n  'ingredienti': ['ceci', 'pomodorini', 'rucola', 'cipolla rossa', 'olio d'oliva', 'aceto balsamico']},\n {'nome': 'hummus fatto in casa',\n  'ingredienti': ['ceci', 'tahini', 'limone', 'aglio', 'olio d'oliva', 'paprika']},\n {'nome': 'tacos vegetariani',\n  'ingredienti': ['tortillas di mais', 'fagioli neri', 'avocado', 'cipolla', 'coriandolo', 'limone']},\n {'nome': 'insalata di barbabietola e arancia',

In [56]:
# with open('italian_resp.pkl', 'wb') as f:
#     pickle.dump(italian_resp, f)

In [253]:
with open('responses/italian_resp.pkl', 'rb') as f:
    italian_resp = pickle.load(f)

In [254]:
trans_italian_resp = translate_to_eng(italian_resp)

In [526]:
trans_italian_resp

["[{'name': 'quinoa and vegetable salad',\n  'ingredients': ['quinoa', 'cherry tomatoes', 'pepino', 'peppers', 'lemon', 'olive oil', 'salt', 'pepper']},\n {'name': 'green smoothie',\n  'ingredients': ['spinach', 'banana', 'almond milk', 'chia seeds', 'honey']},\n {'name': 'buddha bowl',\n  'ingredients': ['brown rice', 'chickpeas', 'carrots', 'red cabbage', 'avocado', 'sesame seeds', 'tahini sauce']},\n {'name': 'lentil soup',\n  'ingredients': ['lentils', 'carrots', 'onion', 'garlic', 'vegetable broth', 'turmeric', 'black pepper']},\n {'name': 'chickpea salad',\n  'ingredients': ['chickpeas', 'cherry tomatoes', 'arugula', 'red onion', 'olive oil', 'balsamic vinegar']},\n {'name': 'homemade hummus',\n  'ingredients': ['chickpeas', 'tahini', 'lemon', 'garlic', 'olive oil', 'paprika']},\n {'name': 'vegetarian tacos',\n  'ingredients': ['corn tortillas', 'black beans', 'avocado', 'onion', 'cilantro', 'lemon']},\n {'name': 'beetroot and orange salad',\n  'ingredients': ['beets', 'oranges',

In [255]:
italian_df, italian_err = create_dataframe_from_responses_2(trans_italian_resp)

Error parsing response at index 7: closing parenthesis ']' does not match opening parenthesis '{' on line 32 (<unknown>, line 39)
Error parsing response at index 16: unterminated string literal (detected at line 5) (<unknown>, line 5)
Error parsing response at index 25: unterminated string literal (detected at line 20) (<unknown>, line 20)


In [256]:
italian_df

Unnamed: 0,name,ingredients
0,quinoa and vegetable salad,"[quinoa, cherry tomatoes, pepino, peppers, lem..."
1,green smoothie,"[spinach, banana, almond milk, chia seeds, honey]"
2,buddha bowl,"[brown rice, chickpeas, carrots, red cabbage, ..."
3,lentil soup,"[lentils, carrots, onion, garlic, vegetable br..."
4,chickpea salad,"[chickpeas, cherry tomatoes, arugula, red onio..."
...,...,...
265,chicken avocado wrap,"[chicken breast, avocado, tomato, lettuce, who..."
266,baked salmon,"[salmon fillet, lemon, thyme, extra virgin oli..."
267,grilled tofu,"[tofu, soy sauce, sesame oil, garlic, ginger, ..."
268,zoodles (courgette spaghetti),"[courgettes, pesto, cherry tomatoes, pine nuts..."


In [257]:
all_dishes = []

# Regex to find dishes with different 'name' labels
dish_pattern = r"\{\s*'(?:m|the )?name'\s*:\s*'([^']+)',\s*'ingredients'\s*:\s*\[([^]]+)\]\}"

# Process each item in the data list
for _, dishes_str in italian_err:
    # Adjust quotes for matching
    adjusted_str = dishes_str.replace('"', "'")
    # Find all matches in the current string
    matches = re.findall(dish_pattern, adjusted_str)
    if not matches:
        print("No matches found in:", dishes_str)
    for match in matches:
        dish_name = match[0]
        # Remove extra quotes and split ingredients into a list
        ingredients_list = [ingredient.strip().strip("'") for ingredient in match[1].split(',')]
        all_dishes.append({'name': dish_name, 'ingredients': ingredients_list})

In [259]:
italian_err

[(7,
  "[{'name': 'quinoa salad',\n  'ingredients': ['quinoa',\n   'cucumbers',\n   'cherry tomatoes',\n   'peppers',\n   'olives',\n   'feta',\n   'lemon',\n   'olive oil']},\n {'name': 'green smoothie',\n  'ingredients': ['spinach',\n   'green apple',\n   'banana',\n   'almond milk',\n   'chia seeds']},\n {'name': 'lentil soup',\n  'ingredients': ['lentils',\n   'carrots',\n   'onion',\n   'garlic',\n   'vegetable broth',\n   'tomatoes',\n   'turmeric',\n   'pepper']},\n {'name': 'acai bowl',\n  'ingredients': ['acai pulp',\n   'bananas',\n   'blueberries',\n   'granola',\n   'honey',\n   'coconut milk']},\n {'name': 'chickpea meatballs',\n  'ingredients': ['chickpeas',\n   'garlic',\n   'parsley',\n   'coriander',\n   'cumin',\n   'bread crumbs',\n   'oil']],\n {'name': 'berry smoothie',\n  'ingredients': ['blueberries',\n   'raspberries',\n   'strawberries',\n   'greek yogurt',\n   'honey',\n   'coconut milk']},\n {'name': 'kale salad',\n  'ingredients': ['kale',\n   'almonds',\n  

In [258]:
italian_err_df = pd.DataFrame(all_dishes)
italian_err_df

Unnamed: 0,name,ingredients
0,quinoa salad,"[quinoa, cucumbers, cherry tomatoes, peppers, ..."
1,green smoothie,"[spinach, green apple, banana, almond milk, ch..."
2,lentil soup,"[lentils, carrots, onion, garlic, vegetable br..."
3,acai bowl,"[acai pulp, bananas, blueberries, granola, hon..."
4,berry smoothie,"[blueberries, raspberries, strawberries, greek..."
5,kale salad,"[kale, almonds, orange, olive oil, balsamic vi..."
6,risotto with pumpkin,"[arborio rice, pumpkin, vegetable broth, parme..."
7,hummus and vegetable wrap,"[hummus, peppers, carrots, lettuce, wholemeal ..."
8,baked salmon with asparagus,"[salmon, asparagus, lemon, olive oil, pepper, ..."
9,quinoa salad,"[quinoa, cherry tomatoes, cucumbers, parsley, ..."


In [262]:
new_data = pd.DataFrame({
    'name': ['chickpea meatballs', 'buddha bowl'],
    'ingredients': [
        ['chickpeas', 'garlic', 'parsley', 'coriander', 'cumin', 'bread crumbs', 'oil'],
        ['brown rice', 'avocado', 'red cabbage', 'carrots', 'sesame seeds', 'soy sauce', 'sesame oil']
    ]
})

In [None]:
italian_result_df = pd.concat([italian_df, italian_err_df], axis=0)
italian_result_df = pd.concat([italian_result_df, new_data], axis=0)

In [456]:
italian_result_df.reset_index(drop=True, inplace=True)

In [457]:
italian_result_df

Unnamed: 0,name,ingredients
0,quinoa and vegetable salad,"[quinoa, cherry tomatoes, pepino, peppers, lem..."
1,green smoothie,"[spinach, banana, almond milk, chia seeds, honey]"
2,buddha bowl,"[brown rice, chickpeas, carrots, red cabbage, ..."
3,lentil soup,"[lentils, carrots, onion, garlic, vegetable br..."
4,chickpea salad,"[chickpeas, cherry tomatoes, arugula, red onio..."
...,...,...
295,courgette spaghetti with pesto,"[courgettes, basil, pine nuts, garlic, olive o..."
296,spelt risotto with mushrooms,"[spelt, mushrooms, vegetable broth, onion, par..."
297,beetroot and goat cheese salad,"[beets, goat's cheese, walnuts, radishes, sala..."
298,chickpea meatballs,"[chickpeas, garlic, parsley, coriander, cumin,..."


In [525]:
float_rows = italian_result_df['ingredients'].apply(lambda x: isinstance(x, float))
rows_with_floats = italian_result_df[float_rows]
print("Rows containing floats:")
print(rows_with_floats)

Rows containing floats:
          name ingredients
94  fish tacos         NaN


In [527]:
correct_ingredients = {
    94: ['cod fillets', 'wholewheat flour', 'cabbage', 'avocado', 'taco seasoning', 'lime', 'coriander']
}

for idx, ingredients in correct_ingredients.items():
    italian_result_df.at[idx, 'ingredients'] = ingredients

In [528]:
italian_result_df.to_pickle("../dataset/italian_result.pkl")

### Collect Japanese Version

In [268]:
user_prompt_jap = generate_user_prompt('prompts/user_prompt_jap.txt')
system_prompt_jap = generate_system_prompt('prompts/system_prompt_jap.txt')

In [60]:
jap_resp = call_openai_api(user_prompt_jap, system_prompt_jap, 30)
jap_resp

[{'name': 'アボカドトースト',
  '材料': ['アボカド', 'レモン汁', '全粒粉ブレッド', '塩', 'こしょう', 'フレッシュハーブ']},
 {'name': 'キヌアサラダ',
  '材料': ['キヌア', 'キュウリ', 'トマト', '赤玉ねぎ', 'パセリ', 'レモン塩ドレッシング']},
 {'name': 'スムージーボウル',
  '材料': ['冷凍ベリー', 'バナナ', 'アーモンドミルク', 'スピルリナパウダー', 'フレッシュフルーツ', 'グラノーラ']},
 {'name': 'ベジタリアンチリ',
  '材料': ['玉ねぎ', 'にんにく', '赤ピーマン', 'ひよこ豆', 'トマト缶', 'クミン', 'チリパウダー']},
 {'name': 'ローストサーモンと野菜',
  '材料': ['サーモンフィレ', 'ブロッコリー', '人参', 'オリーブオイル', 'レモン', '塩', 'こしょう']},
 {'name': 'ギリシャヨーグルトパルフェ',
  '材料': ['ギリシャヨーグルト', 'ミックスベリー', '蜂蜜', 'アーモンドスライス']},
 {'name': 'スパイシー豆乳ラーメン',
  '材料': ['豆乳', '玉ねぎ', 'にんじん', 'しいたけ', 'ネギ', '煮卵', '唐辛子']},
 {'name': '野菜と豆のタコス',
  '材料': ['全粒粉トルティーヤ', '黒豆', 'アボカド', 'トマト', 'レッドキャベツ', 'ライム']},
 {'name': '豆腐と野菜のスティック',
  '材料': ['豆腐', 'ズッキーニ', 'ピーマン', '人参', '醤油', '生姜']},
 {'name': 'バナナオートミールクッキー',
  '材料': ['オートミール', 'バナナ', 'アーモンドバター', 'ダークチョコレートチップ', 'シナモン']}]

[{'name': 'キヌアサラダ',
  '材料': ['キヌア', '赤ピーマン', '黄ピーマン', 'きゅうり', 'レモンジュース', 'オリーブオイル', '塩', 'こしょう']},
 {'name': '豆腐と野菜の stir fry',
  '材料'

["[{'name': 'アボカドトースト',\n  '材料': ['アボカド', 'レモン汁', '全粒粉ブレッド', '塩', 'こしょう', 'フレッシュハーブ']},\n {'name': 'キヌアサラダ',\n  '材料': ['キヌア', 'キュウリ', 'トマト', '赤玉ねぎ', 'パセリ', 'レモン塩ドレッシング']},\n {'name': 'スムージーボウル',\n  '材料': ['冷凍ベリー', 'バナナ', 'アーモンドミルク', 'スピルリナパウダー', 'フレッシュフルーツ', 'グラノーラ']},\n {'name': 'ベジタリアンチリ',\n  '材料': ['玉ねぎ', 'にんにく', '赤ピーマン', 'ひよこ豆', 'トマト缶', 'クミン', 'チリパウダー']},\n {'name': 'ローストサーモンと野菜',\n  '材料': ['サーモンフィレ', 'ブロッコリー', '人参', 'オリーブオイル', 'レモン', '塩', 'こしょう']},\n {'name': 'ギリシャヨーグルトパルフェ',\n  '材料': ['ギリシャヨーグルト', 'ミックスベリー', '蜂蜜', 'アーモンドスライス']},\n {'name': 'スパイシー豆乳ラーメン',\n  '材料': ['豆乳', '玉ねぎ', 'にんじん', 'しいたけ', 'ネギ', '煮卵', '唐辛子']},\n {'name': '野菜と豆のタコス',\n  '材料': ['全粒粉トルティーヤ', '黒豆', 'アボカド', 'トマト', 'レッドキャベツ', 'ライム']},\n {'name': '豆腐と野菜のスティック',\n  '材料': ['豆腐', 'ズッキーニ', 'ピーマン', '人参', '醤油', '生姜']},\n {'name': 'バナナオートミールクッキー',\n  '材料': ['オートミール', 'バナナ', 'アーモンドバター', 'ダークチョコレートチップ', 'シナモン']}]\n",
 "[{'name': 'キヌアサラダ',\n  '材料': ['キヌア', '赤ピーマン', '黄ピーマン', 'きゅうり', 'レモンジュース', 'オリーブオイル', '塩', 'こしょう']},\n {'name

In [61]:
# with open('jap_resp.pkl', 'wb') as f:
#     pickle.dump(jap_resp, f)

In [265]:
with open('responses/jap_resp.pkl', 'rb') as f:
    jap_resp = pickle.load(f)

In [266]:
trans_jap_resp = translate_to_eng(jap_resp)

In [267]:
jap_df, jap_err = create_dataframe_from_responses_2(trans_jap_resp)

Error parsing response at index 2: invalid syntax (<unknown>, line 1)
Error parsing response at index 8: invalid syntax (<unknown>, line 22)
Error parsing response at index 13: invalid syntax (<unknown>, line 1)
Error parsing response at index 14: invalid syntax (<unknown>, line 1)
Error parsing response at index 17: invalid syntax (<unknown>, line 1)
Error parsing response at index 19: invalid syntax (<unknown>, line 1)


In [273]:
jap_df

Unnamed: 0,name,ingredients
0,avocado toast,"[avocado, lemon juice, whole wheat bread, salt..."
1,quinoa salad,"[quinoa, cucumber, tomato, red onion, parsley,..."
2,smoothie bowl,"[frozen berries, banana, almond milk, spirulin..."
3,vegetarian chili,"[onion, garlic, red pepper, chickpeas, canned ..."
4,roasted salmon and vegetables,"[salmon fillet, broccoli, carrot, olive oil, l..."
...,...,...
235,oatmeal,"[oats, almond milk, cinnamon, apples, raisins,..."
236,turkey ham burrito,"[whole wheat tortilla, turkey ham, lettuce, to..."
237,pumpkin soup,"[pumpkin, onion, garlic, chicken broth, cream,..."
238,lentil salad,"[lentils, tomato, cucumber, red onion, parsley..."


In [269]:
jap_resp2 = call_openai_api(user_prompt_jap, system_prompt_jap, 10)
jap_resp2

[{'name': 'キノコとほうれん草のクリチリト',
  '材料': [
   'オリーブオイル',
   '玉ねぎ',
   'にんにく',
   'キノコ',
   'ほうれん草',
   'パプリカ',
   '塩',
   '黒胡椒'
  ]},
 {'name': 'トマトとバジルのスパゲッティ',
  '材料': [
   '全粒粉スパゲッティ',
   'トマト',
   'バジル',
   'エクストラバージンオリーブオイル',
   'にんにく',
   '塩',
   '黒胡椒'
  ]},
 {'name': 'キヌアと野菜のサラダ',
  '材料': [
   'キヌア',
   '赤ピーマン',
   'きゅうり',
   '紫キャベツ',
   'ミント',
   'レモン汁',
   'オリーブオイル',
   '塩',
   '黒胡椒'
  ]},
 {'name': 'アボカドとトマトのサンドイッチ',
  '材料': [
   '全粒粉パン',
   'アボカド',
   'トマト',
   'レタス',
   'マヨネーズ',
   '塩',
   '黒胡椒'
  ]},
 {'name': '鶏胸肉のグリル',
  '材料': [
   '鶏胸肉',
   'オリーブオイル',
   'レモン汁',
   'ガーリックパウダー',
   '塩',
   '黒胡椒',
   'ローズマリー'
  ]},
 {'name': 'ブロッコリーとニンジンの蒸し物',
  '材料': [
   'ブロッコリー',
   'ニンジン',
   'オリーブオイル',
   '塩',
   '黒胡椒'
  ]},
 {'name': 'ひよこ豆のカレー',
  '材料': [
   'ひよこ豆',
   'オニオン',
   'トマト',
   'にんにく',
   'カレーパウダー',
   'クミン',
   'ココナッツミルク',
   '塩',
   'コリアンダー'
  ]},
 {'name': 'ビーツとオレンジのサラダ',
  '材料': [
   'ビーツ',
   'オレンジ',
   '赤オニオン',
   'ウォルナッツ',
   'フェタチーズ',
   'オリーブオイル',
   'バルサミコ酢'
  ]},


["[{'name': 'キノコとほうれん草のクリチリト',\n  '材料': [\n   'オリーブオイル',\n   '玉ねぎ',\n   'にんにく',\n   'キノコ',\n   'ほうれん草',\n   'パプリカ',\n   '塩',\n   '黒胡椒'\n  ]},\n {'name': 'トマトとバジルのスパゲッティ',\n  '材料': [\n   '全粒粉スパゲッティ',\n   'トマト',\n   'バジル',\n   'エクストラバージンオリーブオイル',\n   'にんにく',\n   '塩',\n   '黒胡椒'\n  ]},\n {'name': 'キヌアと野菜のサラダ',\n  '材料': [\n   'キヌア',\n   '赤ピーマン',\n   'きゅうり',\n   '紫キャベツ',\n   'ミント',\n   'レモン汁',\n   'オリーブオイル',\n   '塩',\n   '黒胡椒'\n  ]},\n {'name': 'アボカドとトマトのサンドイッチ',\n  '材料': [\n   '全粒粉パン',\n   'アボカド',\n   'トマト',\n   'レタス',\n   'マヨネーズ',\n   '塩',\n   '黒胡椒'\n  ]},\n {'name': '鶏胸肉のグリル',\n  '材料': [\n   '鶏胸肉',\n   'オリーブオイル',\n   'レモン汁',\n   'ガーリックパウダー',\n   '塩',\n   '黒胡椒',\n   'ローズマリー'\n  ]},\n {'name': 'ブロッコリーとニンジンの蒸し物',\n  '材料': [\n   'ブロッコリー',\n   'ニンジン',\n   'オリーブオイル',\n   '塩',\n   '黒胡椒'\n  ]},\n {'name': 'ひよこ豆のカレー',\n  '材料': [\n   'ひよこ豆',\n   'オニオン',\n   'トマト',\n   'にんにく',\n   'カレーパウダー',\n   'クミン',\n   'ココナッツミルク',\n   '塩',\n   'コリアンダー'\n  ]},\n {'name': 'ビーツとオレンジのサラダ',\n  '材料': [\n   'ビーツ',\n   

In [270]:
trans_jap_resp2 = translate_to_eng(jap_resp2)

In [271]:
jap_df2, jap_err2 = create_dataframe_from_responses_2(trans_jap_resp2)

Error parsing response at index 0: invalid syntax. Perhaps you forgot a comma? (<unknown>, line 92)


In [272]:
jap_df2

Unnamed: 0,name,ingredients
0,tomato and basil pasta,"[whole wheat pasta, fresh basil, cherry tomato..."
1,quince salad,"[quince, cucumber, cherry tomatoes, red onion,..."
2,broccoli soup,"[broccoli, onion, garlic, chicken broth, olive..."
3,sweet potato hash,"[sweet potato, red pepper, onion, spinach, ext..."
4,green smoothie,"[spinach, banana, apple, kale, almond milk, ch..."
...,...,...
85,broccoli and cheddar soup,"[broccoli, onion, unsalted butter, all-purpose..."
86,baked salmon trout,"[salmon trout fillet, lemon slices, garlic, di..."
87,avocado and shrimp salad,"[avocado, boiled shrimp, lime juice, olive oil..."
88,chicken breast with herb marinade,"[chicken breast, olive oil, lemon juice, garli..."


In [None]:
jap_result_df = pd.concat([jap_df, jap_df2[:60]], axis=0)

In [459]:
jap_result_df.reset_index(drop=True, inplace=True)

In [460]:
jap_result_df

Unnamed: 0,name,ingredients
0,avocado toast,"[avocado, lemon juice, whole wheat bread, salt..."
1,quinoa salad,"[quinoa, cucumber, tomato, red onion, parsley,..."
2,smoothie bowl,"[frozen berries, banana, almond milk, spirulin..."
3,vegetarian chili,"[onion, garlic, red pepper, chickpeas, canned ..."
4,roasted salmon and vegetables,"[salmon fillet, broccoli, carrot, olive oil, l..."
...,...,...
295,avocado and egg salad,"[avocado, boiled egg, lettuce, tomato, cucumbe..."
296,sweet potato and cumin soup,"[sweet potato, cumin, onion, garlic, chicken s..."
297,pumpkin risotto,"[pumpkin, rice, onion, garlic, chicken stock, ..."
298,oatmeal and banana pancakes,"[oatmeal, banana, egg, baking powder, cinnamon..."


In [461]:
jap_result_df.to_pickle("../dataset/jap_result.pkl")

### Collect Arabic Version

In [279]:
user_prompt_arab = generate_user_prompt('prompts/user_prompt_arabic.txt')
system_prompt_arab = generate_system_prompt('prompts/system_prompt_arabic.txt')

In [64]:
arab_resp = call_openai_api(user_prompt_arab, system_prompt_arab, 30)
arab_resp

[{'الاسم': 'سلطة الكينوا والخضار',
  'المكونات': ['كينوا', 'فلفل حلو', 'خيار', 'جزر', 'زيت زيتون', 'ليمون', 'ملح', 'فلفل']},
 {'الاسم': 'دجاج مشوي بالأعشاب',
  'المكونات': ['دجاج', 'ثوم', 'إكليل الجبل', 'زعتر', 'ليمون', 'زيت زيتون', 'ملح', 'فلفل']},
 {'الاسم': 'حساء العدس',
  'المكونات': ['عدس', 'طماطم', 'جزر', 'بصل', 'كرفس', 'ثوم', 'مرق الخضار', 'كمون', 'كزبرة']},
 {'الاسم': 'تبولة الكينوا',
  'المكونات': ['كينوا', 'بقدونس', 'نعناع', 'طماطم', 'خيار', 'بصل أخضر', 'ليمون', 'زيت زيتون', 'ملح']},
 {'الاسم': 'سمك السلمون المشوي',
  'المكونات': ['سلمون', 'ثوم', 'ليمون', 'زيت زيتون', 'إكليل الجبل', 'ملح', 'فلفل']},
 {'الاسم': 'عصير الخضروات الطازج',
  'المكونات': ['جزر', 'كرفس', 'طماطم', 'خيار', 'شمندر', 'ليمون', 'زنجبيل']},
 {'الاسم': 'صينية البطاطس والخضروات',
  'المكونات': ['بطاطس', 'كوسا', 'فلفل حلو', 'بصل', 'ثوم', 'زعتر', 'زيت زيتون', 'ملح', 'فلفل']},
 {'الاسم': 'أومليت الخضار',
  'المكونات': ['بيض', 'فلفل حلو', 'بصل', 'طماطم', 'جبنة قليلة الدسم', 'ملح', 'فلفل']},
 {'الاسم': 'شوربة البر

["[{'الاسم': 'سلطة الكينوا والخضار',\n  'المكونات': ['كينوا', 'فلفل حلو', 'خيار', 'جزر', 'زيت زيتون', 'ليمون', 'ملح', 'فلفل']},\n {'الاسم': 'دجاج مشوي بالأعشاب',\n  'المكونات': ['دجاج', 'ثوم', 'إكليل الجبل', 'زعتر', 'ليمون', 'زيت زيتون', 'ملح', 'فلفل']},\n {'الاسم': 'حساء العدس',\n  'المكونات': ['عدس', 'طماطم', 'جزر', 'بصل', 'كرفس', 'ثوم', 'مرق الخضار', 'كمون', 'كزبرة']},\n {'الاسم': 'تبولة الكينوا',\n  'المكونات': ['كينوا', 'بقدونس', 'نعناع', 'طماطم', 'خيار', 'بصل أخضر', 'ليمون', 'زيت زيتون', 'ملح']},\n {'الاسم': 'سمك السلمون المشوي',\n  'المكونات': ['سلمون', 'ثوم', 'ليمون', 'زيت زيتون', 'إكليل الجبل', 'ملح', 'فلفل']},\n {'الاسم': 'عصير الخضروات الطازج',\n  'المكونات': ['جزر', 'كرفس', 'طماطم', 'خيار', 'شمندر', 'ليمون', 'زنجبيل']},\n {'الاسم': 'صينية البطاطس والخضروات',\n  'المكونات': ['بطاطس', 'كوسا', 'فلفل حلو', 'بصل', 'ثوم', 'زعتر', 'زيت زيتون', 'ملح', 'فلفل']},\n {'الاسم': 'أومليت الخضار',\n  'المكونات': ['بيض', 'فلفل حلو', 'بصل', 'طماطم', 'جبنة قليلة الدسم', 'ملح', 'فلفل']},\n {'ا

In [65]:
# with open('arab_resp.pkl', 'wb') as f:
#     pickle.dump(arab_resp, f)

In [276]:
with open('responses/arab_resp.pkl', 'rb') as f:
    arab_resp = pickle.load(f)

In [277]:
trans_arab_resp = translate_to_eng(arab_resp)

In [530]:
trans_arab_resp

["[{'name': 'quinoa and vegetable salad',\n  'ingredients': ['quinoa', 'sweet pepper', 'cucumber', 'carrot', 'olive oil', 'lemon', 'salt', 'pepper']},\n {'name': 'herb roasted chicken',\n  'ingredients': ['chicken', 'garlic', 'rosemary', 'thyme', 'lemon', 'olive oil', 'salt', 'pepper']},\n {'name': 'lentil soup',\n  'ingredients': ['lentils', 'tomatoes', 'carrots', 'onions', 'celery', 'garlic', 'vegetable stock', 'cumin', 'coriander']},\n {'name': 'quinoa tabbouleh',\n  'ingredients': ['quinoa', 'parsley', 'mint', 'tomato', 'cucumber', 'green onion', 'lemon', 'olive oil', 'salt']},\n {'name': 'grilled salmon',\n  'ingredients': ['salmon', 'garlic', 'lemon', 'olive oil', 'rosemary', 'salt', 'pepper']},\n {'name': 'fresh vegetable juice',\n  'ingredients': ['carrot', 'celery', 'tomato', 'cucumber', 'beet', 'lemon', 'ginger']},\n {'name': 'potato and vegetable tray',\n  'ingredients': ['potatoes', 'zucchini', 'sweet pepper', 'onion', 'garlic', 'thyme', 'olive oil', 'salt', 'pepper']},\n {

In [278]:
arab_df, arab_err = create_dataframe_from_responses_2(trans_arab_resp)

Error parsing response at index 1: unterminated string literal (detected at line 30) (<unknown>, line 30)
Error parsing response at index 6: unterminated string literal (detected at line 18) (<unknown>, line 18)
Error parsing response at index 17: closing parenthesis ')' does not match opening parenthesis '[' (<unknown>, line 13)
Error parsing response at index 21: invalid syntax (<unknown>, line 1)
Error parsing response at index 23: invalid syntax (<unknown>, line 1)
Error parsing response at index 28: unterminated string literal (detected at line 9) (<unknown>, line 9)


In [284]:
arab_df

Unnamed: 0,name,ingredients
0,quinoa and vegetable salad,"[quinoa, sweet pepper, cucumber, carrot, olive..."
1,herb roasted chicken,"[chicken, garlic, rosemary, thyme, lemon, oliv..."
2,lentil soup,"[lentils, tomatoes, carrots, onions, celery, g..."
3,quinoa tabbouleh,"[quinoa, parsley, mint, tomato, cucumber, gree..."
4,grilled salmon,"[salmon, garlic, lemon, olive oil, rosemary, s..."
...,...,...
235,date and almond energy balls,"[dates, almonds, grated coconut, cocoa]"
236,hummus and spinach salad,"[chickpeas, spinach, tomatoes, onions, vinegar..."
237,grilled eggplant slices,"[eggplant, olive oil, garlic, basil, parmesan ..."
238,sweet potato pancakes,"[sweet potatoes, flour, baking powder, vanilla..."


In [280]:
arab_resp2 = call_openai_api(user_prompt_arab, system_prompt_arab, 8)
arab_resp2

[
{'الاسم': 'سلطة الكينوا',
 'المكونات': ['كينوا'، 'طماطم'، 'خيار'، 'فلفل أحمر'، 'بصل أخضر'، 'ليمون'، 'زيت زيتون'، 'ملح'، 'فلفل']},
{'الاسم': 'عصير الكركم والزنجبيل',
 'المكونات': ['كركم'، 'زنجبيل'، 'تفاح'، 'جزر'، 'ليمون']},
{'الاسم': 'سمك السلمون المشوي',
 'المكونات': ['سلمون'، 'ثوم'، 'ليمون'، 'زيت زيتون'، 'ملح'، 'فلفل']},
{'الاسم': 'شوربة الخضار',
 'المكونات': ['خضروات مشكلة'، 'ثوم'، 'مرق خضار'، 'بصل'، 'زيت زيتون']},
{'الاسم': 'تبولة',
 'المكونات': ['برغل'، 'طماطم'، 'بقدونس'، 'نعناع'، 'ليمون'، 'بصل أخضر'، 'زيت زيتون']},
{'الاسم': 'فاهيتا الدجاج',
 'المكونات': ['دجاج'، 'فلفل ألوان'، 'بصل'، 'توابل'، 'زيت زيتون']},
{'الاسم': 'سلطة الفواكه',
 'المكونات': ['تفاح'، 'موز'، 'كيوي'، 'عصير برتقال']},
{'الاسم': 'شرائح الديك الرومي المشوية',
 'المكونات': ['ديك رومي'، 'ثوم'، 'روزماري'، 'ليمون'، 'زيت زيتون']},
{'الاسم': 'ذرة مشوية بالأعشاب',
 'المكونات': ['ذرة'، 'زبدة'، 'ثوم'، 'أعشاب مختلطة']},
{'الاسم': 'كرات البقوليات',
 'المكونات': ['بقوليات مشكلة'، 'بصل'، 'ثوم'، 'كزبرة'، 'كمون'، 'بيض']}
]
[{'ا

["[\n{'الاسم': 'سلطة الكينوا',\n 'المكونات': ['كينوا'، 'طماطم'، 'خيار'، 'فلفل أحمر'، 'بصل أخضر'، 'ليمون'، 'زيت زيتون'، 'ملح'، 'فلفل']},\n{'الاسم': 'عصير الكركم والزنجبيل',\n 'المكونات': ['كركم'، 'زنجبيل'، 'تفاح'، 'جزر'، 'ليمون']},\n{'الاسم': 'سمك السلمون المشوي',\n 'المكونات': ['سلمون'، 'ثوم'، 'ليمون'، 'زيت زيتون'، 'ملح'، 'فلفل']},\n{'الاسم': 'شوربة الخضار',\n 'المكونات': ['خضروات مشكلة'، 'ثوم'، 'مرق خضار'، 'بصل'، 'زيت زيتون']},\n{'الاسم': 'تبولة',\n 'المكونات': ['برغل'، 'طماطم'، 'بقدونس'، 'نعناع'، 'ليمون'، 'بصل أخضر'، 'زيت زيتون']},\n{'الاسم': 'فاهيتا الدجاج',\n 'المكونات': ['دجاج'، 'فلفل ألوان'، 'بصل'، 'توابل'، 'زيت زيتون']},\n{'الاسم': 'سلطة الفواكه',\n 'المكونات': ['تفاح'، 'موز'، 'كيوي'، 'عصير برتقال']},\n{'الاسم': 'شرائح الديك الرومي المشوية',\n 'المكونات': ['ديك رومي'، 'ثوم'، 'روزماري'، 'ليمون'، 'زيت زيتون']},\n{'الاسم': 'ذرة مشوية بالأعشاب',\n 'المكونات': ['ذرة'، 'زبدة'، 'ثوم'، 'أعشاب مختلطة']},\n{'الاسم': 'كرات البقوليات',\n 'المكونات': ['بقوليات مشكلة'، 'بصل'، 'ثوم'، 'كزبرة'، 

In [281]:
trans_arab_resp2 = translate_to_eng(arab_resp2)

In [282]:
arab_df2, arab_err2 = create_dataframe_from_responses_2(trans_arab_resp2)

In [283]:
arab_df2

Unnamed: 0,name,ingredients
0,quinoa salad,"[quinoa, tomatoes, cucumber, red pepper, green..."
1,turmeric ginger juice,"[turmeric, ginger, apple, carrot, lemon]"
2,grilled salmon,"[salmon, garlic, lemon, olive oil, salt, pepper]"
3,vegetable soup,"[mixed vegetables, garlic, vegetable broth, on..."
4,tabbouleh,"[bulgur, tomatoes, parsley, mint, lemon, green..."
...,...,...
75,chickpea and tomato salad,"[chickpeas, tomatoes, red onions, balsamic vin..."
76,avocado and egg breakfast,"[avocado, eggs, pepper, salt]"
77,grilled salmon,"[salmon, lemon, garlic, olive oil, salt, pepper]"
78,quinoa tabbouleh,"[quinoa, parsley, tomato, onion, mint, lemon, ..."


In [None]:
arab_result_df = pd.concat([arab_df, arab_df[:60]], axis=0)

In [462]:
arab_result_df.reset_index(drop=True, inplace=True)

In [463]:
arab_result_df

Unnamed: 0,name,ingredients
0,quinoa and vegetable salad,"[quinoa, sweet pepper, cucumber, carrot, olive..."
1,herb roasted chicken,"[chicken, garlic, rosemary, thyme, lemon, oliv..."
2,lentil soup,"[lentils, tomatoes, carrots, onions, celery, g..."
3,quinoa tabbouleh,"[quinoa, parsley, mint, tomato, cucumber, gree..."
4,grilled salmon,"[salmon, garlic, lemon, olive oil, rosemary, s..."
...,...,...
295,chickpea and avocado salad,"[hummus, avocado, cherry tomatoes, red onion, ..."
296,raspberry chia pudding,"[chia seeds, almond milk, honey, fresh berries..."
297,energy balls with dates and nuts,"[dates, almonds, grated coconut, raw cocoa, va..."
298,military squash spaghetti,"[spaghetti pasta, military percussion, garlic,..."


In [529]:
float_rows = arab_result_df['ingredients'].apply(lambda x: isinstance(x, float))
rows_with_floats = arab_result_df[float_rows]
print("Rows containing floats:")
print(rows_with_floats)

Rows containing floats:
                             name ingredients
117         quinoa and beet salad         NaN
220    quinoa and vegetable salad         NaN
221                   lentil soup         NaN
222          herb roasted chicken         NaN
223  spinach and strawberry salad         NaN
224                   green juice         NaN
225     mixed vegetable casserole         NaN
226         sweet potato pancakes         NaN
227       grilled fish with herbs         NaN
228              quinoa tabbouleh         NaN
229      pasta with avocado sauce         NaN


In [531]:
correct_ingredients = {
    117: ['quinoa', 'roasted beets', 'walnuts', 'arugula', 'feta', 'grape vinegar'],
    220: ['quinoa', 'tomatoes', 'color pepers', 'red onion', 'lemon juice', 'olive oil', 'salt', 'black pepper'],
    221: ['lentil', 'carrot', 'potato', 'onion', 'garlic', 'broth', 'cumin', 'turmeric', 'salt', 'olive oil'],
    222: ['chicken breasts', 'garlic', 'lemon', 'rosemary', 'zaatar', 'salt', 'black pepper', 'olive oil'],
    223: ['spinach', 'strawberry', 'walnut', 'olive oil', 'balsamic vinegar', 'salt', 'black pepper'],
    224: ['spinach', 'celery', 'green apple', 'lemon', 'ginger'],
    225: ['potatoes', 'zucchini', 'carrot', 'pepper colors', 'onion', 'garlic', 'tomatoes', 'olive oil', 'cumin', 'salt'],
    226: ['sweet potato', 'flour', 'baking powder', 'milk', 'egg', 'oil', 'honey', 'vanilla'],
    227: ['fillet-fish', 'lemon', 'garlic', 'olive oil', 'rosemary', 'zaatar', 'salt', 'black pepper'],
    228: ['quinoa', 'parsley', 'tomatoes', 'option', 'green onion', 'mint', 'lemon', 'olive oil', 'salt'],
    229: ['macaroni', 'avocado', 'garlic', 'lemon', 'olive oil', 'parsley', 'black pepper', 'salt']
}

for idx, ingredients in correct_ingredients.items():
    arab_result_df.at[idx, 'ingredients'] = ingredients

In [532]:
arab_result_df.at[229, 'ingredients']

['macaroni',
 'avocado',
 'garlic',
 'lemon',
 'olive oil',
 'parsley',
 'black pepper',
 'salt']

In [533]:
arab_result_df.to_pickle("../dataset/arab_result.pkl")

### Collect Swedish Version

In [290]:
user_prompt_swed = generate_user_prompt('prompts/user_prompt_swedish.txt')
system_prompt_swed = generate_system_prompt('prompts/system_prompt_swedish.txt')

In [68]:
swed_resp = call_openai_api(user_prompt_swed, system_prompt_swed, 30)
swed_resp

[{'name': 'quinoa sallad',
  'ingredienser': ['quinoa', 'gurka', 'körsbärstomater', 'fetaost', 'olivolja', 'citronsaft', 'färsk basilika']},
 {'name': 'grön smoothie',
  'ingredienser': ['spenat', 'banan', 'päron', 'avokado', 'vatten', 'citronsaft', 'ingefära']},
 {'name': 'lax i folie',
  'ingredienser': ['laxfilé', 'citronskivor', 'smör', 'dill', 'salt', 'svartpeppar']},
 {'name': 'kikärtscurry',
  'ingredienser': ['kikärter', 'kokosmjölk', 'currypasta', 'spenat', 'tomater', 'lök', 'vitlök']},
 {'name': 'grillade grönsaker',
  'ingredienser': ['zucchini', 'paprika', 'rödlök', 'olivolja', 'balsamvinäger', 'salt', 'peppar']},
 {'name': 'kyckling och broccoli wok',
  'ingredienser': ['kycklingbröst', 'broccoli', 'sojasås', 'sesamolja', 'vitlök', 'ingefära', 'chili']},
 {'name': 'tomat och basilika soppa',
  'ingredienser': ['tomater', 'grönsaksbuljong', 'basilika', 'grädde', 'salt', 'peppar']},
 {'name': 'avokadosallad',
  'ingredienser': ['avokado', 'ärter', 'spenat', 'rödlök', 'olivol

["[{'name': 'quinoa sallad',\n  'ingredienser': ['quinoa', 'gurka', 'körsbärstomater', 'fetaost', 'olivolja', 'citronsaft', 'färsk basilika']},\n {'name': 'grön smoothie',\n  'ingredienser': ['spenat', 'banan', 'päron', 'avokado', 'vatten', 'citronsaft', 'ingefära']},\n {'name': 'lax i folie',\n  'ingredienser': ['laxfilé', 'citronskivor', 'smör', 'dill', 'salt', 'svartpeppar']},\n {'name': 'kikärtscurry',\n  'ingredienser': ['kikärter', 'kokosmjölk', 'currypasta', 'spenat', 'tomater', 'lök', 'vitlök']},\n {'name': 'grillade grönsaker',\n  'ingredienser': ['zucchini', 'paprika', 'rödlök', 'olivolja', 'balsamvinäger', 'salt', 'peppar']},\n {'name': 'kyckling och broccoli wok',\n  'ingredienser': ['kycklingbröst', 'broccoli', 'sojasås', 'sesamolja', 'vitlök', 'ingefära', 'chili']},\n {'name': 'tomat och basilika soppa',\n  'ingredienser': ['tomater', 'grönsaksbuljong', 'basilika', 'grädde', 'salt', 'peppar']},\n {'name': 'avokadosallad',\n  'ingredienser': ['avokado', 'ärter', 'spenat', 

In [70]:
# with open('swed_resp.pkl', 'wb') as f:
#     pickle.dump(swed_resp, f)

In [287]:
with open('responses/swed_resp.pkl', 'rb') as f:
    swed_resp = pickle.load(f)

In [288]:
trans_swed_resp = translate_to_eng(swed_resp)

In [289]:
swed_df, swed_err = create_dataframe_from_responses_2(trans_swed_resp)

Error parsing response at index 1: unterminated string literal (detected at line 19) (<unknown>, line 19)
Error parsing response at index 4: invalid syntax (<unknown>, line 10)
Error parsing response at index 5: invalid syntax (<unknown>, line 16)


In [291]:
swed_resp2 = call_openai_api(user_prompt_swed, system_prompt_swed, 4)
swed_resp2

[{'name': 'quinoasallad',
  'ingredienser': ['quinoa', 'körsbärstomater', 'gurka', 'fetaost', 'olivolja', 'citronjuice', 'persilja', 'salt', 'peppar']},
 {'name': 'grönsakssoppa',
  'ingredienser': ['morötter', 'potatis', 'lök', 'selleri', 'zucchini', 'krossade tomater', 'vatten', 'salt', 'peppar', 'olivolja']},
 {'name': 'lax i ugn',
  'ingredienser': ['laxfilé', 'citron', 'olivolja', 'vitlök', 'dill', 'salt', 'peppar']},
 {'name': 'kikärtscurry',
  'ingredienser': ['kikärtor', 'kokosmjölk', 'currypasta', 'spenat', 'lök', 'vitlök', 'ingefära', 'olivolja', 'citronjuice']},
 {'name': 'hemmagjord granola',
  'ingredienser': ['havregryn', 'honung', 'mandlar', 'valnötter', 'pumpafrön', 'solrosfrön', 'kokosolja', 'torkade tranbär', 'kanel']},
 {'name': 'kyckling och broccoli bowl',
  'ingredienser': ['kycklingbröst', 'broccoli', 'brunt ris', 'sesamfrön', 'sojasås', 'vitlök', 'olja']},
 {'name': 'kål och äppelsallad',
  'ingredienser': ['kål', 'äpple', 'valnötter', 'morötter', 'majonnäs', 'ä

["[{'name': 'quinoasallad',\n  'ingredienser': ['quinoa', 'körsbärstomater', 'gurka', 'fetaost', 'olivolja', 'citronjuice', 'persilja', 'salt', 'peppar']},\n {'name': 'grönsakssoppa',\n  'ingredienser': ['morötter', 'potatis', 'lök', 'selleri', 'zucchini', 'krossade tomater', 'vatten', 'salt', 'peppar', 'olivolja']},\n {'name': 'lax i ugn',\n  'ingredienser': ['laxfilé', 'citron', 'olivolja', 'vitlök', 'dill', 'salt', 'peppar']},\n {'name': 'kikärtscurry',\n  'ingredienser': ['kikärtor', 'kokosmjölk', 'currypasta', 'spenat', 'lök', 'vitlök', 'ingefära', 'olivolja', 'citronjuice']},\n {'name': 'hemmagjord granola',\n  'ingredienser': ['havregryn', 'honung', 'mandlar', 'valnötter', 'pumpafrön', 'solrosfrön', 'kokosolja', 'torkade tranbär', 'kanel']},\n {'name': 'kyckling och broccoli bowl',\n  'ingredienser': ['kycklingbröst', 'broccoli', 'brunt ris', 'sesamfrön', 'sojasås', 'vitlök', 'olja']},\n {'name': 'kål och äppelsallad',\n  'ingredienser': ['kål', 'äpple', 'valnötter', 'morötter',

In [292]:
trans_swed_resp2 = translate_to_eng(swed_resp2)

In [293]:
swed_df2, swed_err2 = create_dataframe_from_responses_2(trans_swed_resp2)

In [None]:
swed_result_df = pd.concat([swed_df, swed_df2[:30]], axis=0)

In [465]:
swed_result_df.reset_index(drop=True, inplace=True)

In [466]:
swed_result_df

Unnamed: 0,name,ingredients
0,quinoa salad,"[quinoa, cucumber, cherry tomatoes, feta chees..."
1,green smoothie,"[spinach, banana, pear, avocado, water, lemon ..."
2,salmon in foil,"[salmon fillet, lemon slices, butter, dill, sa..."
3,chickpea curry,"[chickpeas, coconut milk, curry paste, spinach..."
4,grilled vegetables,"[zucchini, pepper, red onion, olive oil, balsa..."
...,...,...
295,chickpea curry,"[chickpeas, coconut milk, curry powder, turmer..."
296,whole wheat pancakes,"[whole wheat flour, baking powder, salt, eggs,..."
297,ratatouille,"[eggplant, zucchini, red pepper, yellow pepper..."
298,omelette with spinach and feta cheese,"[egg, spinach, feta cheese, milk, salt, black ..."


In [467]:
swed_result_df.to_pickle("../dataset/swed_result.pkl")

### Collect Thai Version

In [307]:
user_prompt_thai = generate_user_prompt('prompts/user_prompt_thai.txt')
system_prompt_thai = generate_system_prompt('prompts/system_prompt_thai.txt')

In [73]:
thai_resp = call_openai_api(user_prompt_thai, system_prompt_thai, 30)
thai_resp

[{'ชื่อ': 'สลัดผัก',
  'ส่วนผสม': ['ผักสลัดหลากชนิด', 'มะเขือเทศ', 'แครอท', 'หอมแดง', 'โยเกิร์ต', 'มัสตาร์ด', 'น้ำมะนาว', 'น้ำมันมะกอก']},
 {'ชื่อ': 'สมูทตี้เบอร์รี่',
  'ส่วนผสม': ['สตรอเบอร์รี่', 'บลูเบอร์รี่', 'ราสเบอร์รี่', 'กล้วย', 'นมถั่วเหลือง', 'น้ำผึ้ง']},
 {'ชื่อ': 'โยเกิร์ตพาร์เฟต์',
  'ส่วนผสม': ['โยเกิร์ตกรีก', 'น้ำผึ้ง', 'แกรนโนล่า', 'สตรอเบอร์รี่', 'บลูเบอร์รี่']},
 {'ชื่อ': 'อโวคาโดทูน่า',
  'ส่วนผสม': ['อโวคาโด', 'ทูน่ากระป๋อง', 'มะเขือเทศหั่นสี่เหลี่ยม', 'มะนาว', 'พริกสด', 'ผักชี', 'เกลือ', 'พริกไทย']},
 {'ชื่อ': 'ข้าวโอ๊ตพร้อมผลไม้',
  'ส่วนผสม': ['ข้าวโอ๊ต', 'นม', 'แอปเปิ้ลหั่นเต๋า', 'องุ่น', 'น้ำผึ้ง', 'เกลือเล็กน้อย']},
 {'ชื่อ': 'ซุปเห็ด',
  'ส่วนผสม': ['เห็ดหลากชนิด', 'น้ำซุปไก่', 'ครีม', 'หัวหอม', 'เนย', 'เกลือ', 'พริกไทย']},
 {'ชื่อ': 'แซนด์วิชไก่งวง',
  'ส่วนผสม': ['ขนมปังโฮลวีท', 'ไก่งวงปรุงสุก', 'แตงกวา', 'มะเขือเทศ', 'ผักกาดหอม', 'มัสตาร์ด']},
 {'ชื่อ': 'เต้าหู้ผัดพริกขิง',
  'ส่วนผสม': ['เต้าหู้', 'พริกสด', 'ขิง', 'ซอสถั่วเหลือง', 'หอมใหญ่', 'น้ำตาลปี๊บ']

["[{'ชื่อ': 'สลัดผัก',\n  'ส่วนผสม': ['ผักสลัดหลากชนิด', 'มะเขือเทศ', 'แครอท', 'หอมแดง', 'โยเกิร์ต', 'มัสตาร์ด', 'น้ำมะนาว', 'น้ำมันมะกอก']},\n {'ชื่อ': 'สมูทตี้เบอร์รี่',\n  'ส่วนผสม': ['สตรอเบอร์รี่', 'บลูเบอร์รี่', 'ราสเบอร์รี่', 'กล้วย', 'นมถั่วเหลือง', 'น้ำผึ้ง']},\n {'ชื่อ': 'โยเกิร์ตพาร์เฟต์',\n  'ส่วนผสม': ['โยเกิร์ตกรีก', 'น้ำผึ้ง', 'แกรนโนล่า', 'สตรอเบอร์รี่', 'บลูเบอร์รี่']},\n {'ชื่อ': 'อโวคาโดทูน่า',\n  'ส่วนผสม': ['อโวคาโด', 'ทูน่ากระป๋อง', 'มะเขือเทศหั่นสี่เหลี่ยม', 'มะนาว', 'พริกสด', 'ผักชี', 'เกลือ', 'พริกไทย']},\n {'ชื่อ': 'ข้าวโอ๊ตพร้อมผลไม้',\n  'ส่วนผสม': ['ข้าวโอ๊ต', 'นม', 'แอปเปิ้ลหั่นเต๋า', 'องุ่น', 'น้ำผึ้ง', 'เกลือเล็กน้อย']},\n {'ชื่อ': 'ซุปเห็ด',\n  'ส่วนผสม': ['เห็ดหลากชนิด', 'น้ำซุปไก่', 'ครีม', 'หัวหอม', 'เนย', 'เกลือ', 'พริกไทย']},\n {'ชื่อ': 'แซนด์วิชไก่งวง',\n  'ส่วนผสม': ['ขนมปังโฮลวีท', 'ไก่งวงปรุงสุก', 'แตงกวา', 'มะเขือเทศ', 'ผักกาดหอม', 'มัสตาร์ด']},\n {'ชื่อ': 'เต้าหู้ผัดพริกขิง',\n  'ส่วนผสม': ['เต้าหู้', 'พริกสด', 'ขิง', 'ซอสถั่วเหลือง', 'หอมใหญ

In [74]:
# with open('thai_resp.pkl', 'wb') as f:
#     pickle.dump(thai_resp, f)

In [296]:
with open('responses/thai_resp.pkl', 'rb') as f:
    thai_resp = pickle.load(f)

In [297]:
trans_thai_resp = translate_to_eng(thai_resp)

In [535]:
trans_thai_resp

["[{'name': 'vegetable salad',\n  'ingredients': ['various salad vegetables', 'tomatoes', 'carrots', 'red onions', 'yogurt', 'mustard', 'lemon juice', 'olive oil']},\n {'name': 'berry smoothie',\n  'ingredients': ['strawberry', 'blueberry', 'raspberry', 'banana', 'soy milk', 'honey']},\n {'name': 'yogurt parfait',\n  'ingredients': ['greek yogurt', 'honey', 'granola', 'strawberry', 'blueberry']},\n {'name': 'avocado tuna',\n  'ingredients': ['avocado', 'canned tuna', 'diced tomatoes', 'lime', 'fresh chilli', 'cilantro', 'salt', 'pepper']},\n {'name': 'oatmeal with fruit',\n  'ingredients': ['oats', 'milk', 'diced apple', 'grapes', 'honey', 'a pinch of salt']},\n {'name': 'mushroom soup',\n  'ingredients': ['assorted mushrooms', 'chicken broth', 'cream', 'onion', 'butter', 'salt', 'pepper']},\n {'name': 'turkey sandwich',\n  'ingredients': ['whole wheat bread', 'cooked turkey', 'cucumber', 'tomato', 'lettuce', 'mustard']},\n {'name': 'stir-fried tofu with ginger chili',\n  'ingredients'

In [298]:
thai_df, thai_err = create_dataframe_from_responses_2(trans_thai_resp)

Error parsing response at index 1: unterminated string literal (detected at line 28) (<unknown>, line 28)
Error parsing response at index 4: invalid syntax (<unknown>, line 1)
Error parsing response at index 8: invalid syntax (<unknown>, line 1)
Error parsing response at index 9: invalid syntax (<unknown>, line 1)
Error parsing response at index 11: invalid syntax (<unknown>, line 1)
Error parsing response at index 14: invalid syntax (<unknown>, line 1)
Error parsing response at index 16: invalid syntax (<unknown>, line 1)
Error parsing response at index 20: invalid syntax (<unknown>, line 1)
Error parsing response at index 21: invalid syntax (<unknown>, line 1)
Error parsing response at index 24: invalid syntax (<unknown>, line 1)
Error parsing response at index 26: invalid syntax (<unknown>, line 1)
Error parsing response at index 28: invalid syntax (<unknown>, line 1)
Error parsing response at index 29: invalid syntax (<unknown>, line 1)


In [299]:
thai_df

Unnamed: 0,name,ingredients
0,vegetable salad,"[various salad vegetables, tomatoes, carrots, ..."
1,berry smoothie,"[strawberry, blueberry, raspberry, banana, soy..."
2,yogurt parfait,"[greek yogurt, honey, granola, strawberry, blu..."
3,avocado tuna,"[avocado, canned tuna, diced tomatoes, lime, f..."
4,oatmeal with fruit,"[oats, milk, diced apple, grapes, honey, a pin..."
...,...,...
165,stir-fried broccoli and mushrooms,
166,winged bean salad,
167,oatmeal and banana pancakes,
168,eggplant stew with red beans,


In [303]:
thai_err

[(1,
  "here are 10 healthy recipes you might like:\n\n1. {'name': 'grilled chicken salad',\n  'ingredients': ['grilled chicken', 'lettuce', 'tomato', 'cucumber', 'olive oil', 'balsamic vinegar']}\n\n2. {'name': 'fruit smoothie',\n  'ingredients': ['banana', 'frozen fruit of choice', 'coconut water', 'greek yogurt', 'honey']}\n\n3. {'name': 'bowlgrainolas',\n  'ingredients': ['corn', 'coarse rice', 'brown rice', 'avocado', 'red onion', 'diced red pepper', 'boiled egg', 'olive oil ', 'lemonade']}\n\n4. {'name': 'stir-fried tofu with vegetables',\n  'ingredients': ['tofu', 'spinach', 'mushroom', 'garlic', 'sesame oil', 'soy sauce', 'sugar', 'cayenne pepper']}\n\n5. {'name': 'chicken green curry',\n  'ingredients': ['chicken meat', 'green chilies', 'young coconut', 'kaffir lime leaves', 'fish sauce', 'palm sugar']}\n\n6. {'name': 'tomato soup',\n  'ingredients': ['fresh tomatoes', 'onion', 'garlic', 'olive oil', 'chicken broth', 'salt', 'pepper']}\n\n7. {'name': 'plowlets rice',\n  'ingre

In [300]:
all_dishes = []

# Regex to find dishes with different 'name' labels
dish_pattern = r"\{\s*'(?:m|the )?name'\s*:\s*'([^']+)',\s*'ingredients'\s*:\s*\[([^]]+)\]\}"

# Process each item in the data list
for _, dishes_str in thai_err:
    # Adjust quotes for matching
    adjusted_str = dishes_str.replace('"', "'")
    # Find all matches in the current string
    matches = re.findall(dish_pattern, adjusted_str)
    if not matches:
        print("No matches found in:", dishes_str)
    for match in matches:
        dish_name = match[0]
        # Remove extra quotes and split ingredients into a list
        ingredients_list = [ingredient.strip().strip("'") for ingredient in match[1].split(',')]
        all_dishes.append({'name': dish_name, 'ingredients': ingredients_list})

No matches found in: sure, here are 10 healthy recipes you requested:

1. **spinach salad**

- spinach
- avocado
- lemon juice
- olive oil
- salt
- pepper

2. **fruit yogurt smoothie**
- plain yogurt
- strawberries
- bananas
- honey
- almonds

3. **brown rice stir fry**
- brown rice
- bell pepper
- carrots
- onions
- sesame oil

4. **grilled salmon with lemon**
- salmon
- lemon
- olive oil
- salt
- pepper

5. **cinnamon mushroom soup**
- cinnamon mushrooms
- onions
- garlic
- stock
- thyme

6. **steamed chicken with tofu**
- lean chicken
- tofu
- ginger
- spring onions
- seafood sauce

7. **stir-fried kale with garlic**
- kale
- garlic
- olive oil
- salt
- pepper

8. **tom kha gai**
- chicken
- galangal
- fish balls
- tomato
- lemongrass

9. **stir-fried tofu with curry paste**
- tofu with curry paste
- green curry paste
- lime leaves
- olive oil
- fresh vegetables such as bell peppers

10. **oatmeal apple pancake**
- ground oatmeal
- apple
- eggs
- milk
- cinnamon powder


In [305]:
thai_err_df = pd.DataFrame(all_dishes)
thai_err_df

Unnamed: 0,name,ingredients
0,grilled chicken salad,"[grilled chicken, lettuce, tomato, cucumber, o..."
1,fruit smoothie,"[banana, frozen fruit of choice, coconut water..."
2,bowlgrainolas,"[corn, coarse rice, brown rice, avocado, red o..."
3,stir-fried tofu with vegetables,"[tofu, spinach, mushroom, garlic, sesame oil, ..."
4,chicken green curry,"[chicken meat, green chilies, young coconut, k..."
...,...,...
114,salmon steak,"[salmon, salt, pepper, olive oil]"
115,tomato soup,"[tomato, onion, garlic, basil, broth]"
116,stir-fried tofu with vegetables,"[tofu, various vegetables, soy sauce, pepper]"
117,vermicelli soup,"[vermicelli, shrimp, vegetables, shiitake mush..."


In [302]:
missed_recipes = [
    {'name': 'spinach salad', 'ingredients': ['spinach', 'avocado', 'lemon juice', 'olive oil', 'salt', 'pepper']},
    {'name': 'fruit yogurt smoothie', 'ingredients': ['plain yogurt', 'strawberries', 'bananas', 'honey', 'almonds']},
    {'name': 'brown rice stir fry', 'ingredients': ['brown rice', 'bell pepper', 'carrots', 'onions', 'sesame oil']},
    {'name': 'grilled salmon with lemon', 'ingredients': ['salmon', 'lemon', 'olive oil', 'salt', 'pepper']},
    {'name': 'cinnamon mushroom soup', 'ingredients': ['cinnamon mushrooms', 'onions', 'garlic', 'stock', 'thyme']},
    {'name': 'steamed chicken with tofu', 'ingredients': ['lean chicken', 'tofu', 'ginger', 'spring onions', 'seafood sauce']},
    {'name': 'stir-fried kale with garlic', 'ingredients': ['kale', 'garlic', 'olive oil', 'salt', 'pepper']},
    {'name': 'tom kha gai', 'ingredients': ['chicken', 'galangal', 'fish balls', 'tomato', 'lemongrass']},
    {'name': 'stir-fried tofu with curry paste', 'ingredients': ['tofu with curry paste', 'green curry paste', 'lime leaves', 'olive oil', 'fresh vegetables such as bell peppers']},
    {'name': 'oatmeal apple pancake', 'ingredients': ['ground oatmeal', 'apple', 'eggs', 'milk', 'cinnamon powder']}
]

missed_recipes_df = pd.DataFrame(missed_recipes)

In [306]:
missed_recipes_df

Unnamed: 0,name,ingredients
0,spinach salad,"[spinach, avocado, lemon juice, olive oil, sal..."
1,fruit yogurt smoothie,"[plain yogurt, strawberries, bananas, honey, a..."
2,brown rice stir fry,"[brown rice, bell pepper, carrots, onions, ses..."
3,grilled salmon with lemon,"[salmon, lemon, olive oil, salt, pepper]"
4,cinnamon mushroom soup,"[cinnamon mushrooms, onions, garlic, stock, th..."
5,steamed chicken with tofu,"[lean chicken, tofu, ginger, spring onions, se..."
6,stir-fried kale with garlic,"[kale, garlic, olive oil, salt, pepper]"
7,tom kha gai,"[chicken, galangal, fish balls, tomato, lemong..."
8,stir-fried tofu with curry paste,"[tofu with curry paste, green curry paste, lim..."
9,oatmeal apple pancake,"[ground oatmeal, apple, eggs, milk, cinnamon p..."


In [308]:
thai_resp2 = call_openai_api(user_prompt_thai, system_prompt_thai)
thai_resp2

[{'ชื่อ': 'สลัดผักสด',
  'ส่วนผสม': ['ผักกาดหอม', 'ผักสลัด', 'มะเขือเทศ', 'แตงกวา', 'มะกอก', 'น้ำสลัด']},
 {'ชื่อ': 'สเต็กปลาแซลมอน',
  'ส่วนผสม': ['ปลาแซลมอน', 'มะนาว', 'พริกไทย', 'เกลือ', 'โรสแมรี่']},
 {'ชื่อ': 'ข้าวโพดอบเนย',
  'ส่วนผสม': ['ข้าวโพด', 'เนย', 'เกลือ', 'พริกไทย']},
 {'ชื่อ': 'ซุปผัก',
  'ส่วนผสม': ['ผักคะน้า', 'แครอท', 'มันฝรั่ง', 'น้ำสต็อกผัก', 'หัวหอม', 'เกลือ', 'พริกไทย']},
 {'ชื่อ': 'ยำถั่วงอก',
  'ส่วนผสม': ['ถั่วงอก', 'ต้นหอม', 'มะนาว', 'พริกป่น', 'น้ำปลา', 'น้ำตาล']},
 {'ชื่อ': 'ไก่อบซอสมะขาม',
  'ส่วนผสม': ['ไก่', 'มะขามเปียก', 'น้ำตาลปี๊บ', 'น้ำปลา', 'หอมใหญ่', 'กระเทียม']},
 {'ชื่อ': 'สเต็กโทฟู',
  'ส่วนผสม': ['โทฟู', 'ซีอิ๊วขาว', 'ซอสถั่วเหลือง', 'น้ำมันงา', 'ต้นหอม']},
 {'ชื่อ': 'สมูทตี้เบอร์รี่',
  'ส่วนผสม': ['บลูเบอร์รี่', 'สตรอว์เบอร์รี่', 'ราสเบอร์รี่', 'โยเกิร์ต', 'น้ำผึ้ง']},
 {'ชื่อ': 'แกงเห็ดสด',
  'ส่วนผสม': ['เห็ดหูหนูสีขาว', 'เห็ดชิเมจิ', 'น้ำกะทิ', 'พริกแกง', 'กะปิ', 'ตะไคร้', 'ใบมะกรูด']},
 {'ชื่อ': 'เต้าหู้ทรงเครื่อง',
  'ส่วนผสม': ['เต้าหู้

["[{'ชื่อ': 'สลัดผักสด',\n  'ส่วนผสม': ['ผักกาดหอม', 'ผักสลัด', 'มะเขือเทศ', 'แตงกวา', 'มะกอก', 'น้ำสลัด']},\n {'ชื่อ': 'สเต็กปลาแซลมอน',\n  'ส่วนผสม': ['ปลาแซลมอน', 'มะนาว', 'พริกไทย', 'เกลือ', 'โรสแมรี่']},\n {'ชื่อ': 'ข้าวโพดอบเนย',\n  'ส่วนผสม': ['ข้าวโพด', 'เนย', 'เกลือ', 'พริกไทย']},\n {'ชื่อ': 'ซุปผัก',\n  'ส่วนผสม': ['ผักคะน้า', 'แครอท', 'มันฝรั่ง', 'น้ำสต็อกผัก', 'หัวหอม', 'เกลือ', 'พริกไทย']},\n {'ชื่อ': 'ยำถั่วงอก',\n  'ส่วนผสม': ['ถั่วงอก', 'ต้นหอม', 'มะนาว', 'พริกป่น', 'น้ำปลา', 'น้ำตาล']},\n {'ชื่อ': 'ไก่อบซอสมะขาม',\n  'ส่วนผสม': ['ไก่', 'มะขามเปียก', 'น้ำตาลปี๊บ', 'น้ำปลา', 'หอมใหญ่', 'กระเทียม']},\n {'ชื่อ': 'สเต็กโทฟู',\n  'ส่วนผสม': ['โทฟู', 'ซีอิ๊วขาว', 'ซอสถั่วเหลือง', 'น้ำมันงา', 'ต้นหอม']},\n {'ชื่อ': 'สมูทตี้เบอร์รี่',\n  'ส่วนผสม': ['บลูเบอร์รี่', 'สตรอว์เบอร์รี่', 'ราสเบอร์รี่', 'โยเกิร์ต', 'น้ำผึ้ง']},\n {'ชื่อ': 'แกงเห็ดสด',\n  'ส่วนผสม': ['เห็ดหูหนูสีขาว', 'เห็ดชิเมจิ', 'น้ำกะทิ', 'พริกแกง', 'กะปิ', 'ตะไคร้', 'ใบมะกรูด']},\n {'ชื่อ': 'เต้าหู้ทรงเครื่อง',\n 

In [309]:
trans_thai_resp2 = translate_to_eng(thai_resp2)

In [310]:
thai_df2, thai_err2 = create_dataframe_from_responses_2(trans_thai_resp2)

In [311]:
thai_result_df = pd.concat([thai_df, thai_err_df, missed_recipes_df, thai_df2[:1]])

In [468]:
thai_result_df.reset_index(drop=True, inplace=True)

In [469]:
thai_result_df

Unnamed: 0,name,ingredients
0,vegetable salad,"[various salad vegetables, tomatoes, carrots, ..."
1,berry smoothie,"[strawberry, blueberry, raspberry, banana, soy..."
2,yogurt parfait,"[greek yogurt, honey, granola, strawberry, blu..."
3,avocado tuna,"[avocado, canned tuna, diced tomatoes, lime, f..."
4,oatmeal with fruit,"[oats, milk, diced apple, grapes, honey, a pin..."
...,...,...
295,stir-fried kale with garlic,"[kale, garlic, olive oil, salt, pepper]"
296,tom kha gai,"[chicken, galangal, fish balls, tomato, lemong..."
297,stir-fried tofu with curry paste,"[tofu with curry paste, green curry paste, lim..."
298,oatmeal apple pancake,"[ground oatmeal, apple, eggs, milk, cinnamon p..."


In [534]:
float_rows = thai_result_df['ingredients'].apply(lambda x: isinstance(x, float))
rows_with_floats = thai_result_df[float_rows]
print("Rows containing floats:")
print(rows_with_floats)

Rows containing floats:
                                   name ingredients
160                       avocado salad         NaN
161                khao wok omelet roll         NaN
162        scrambled eggs with tomatoes         NaN
163  salmon steak with quinoa tricholes         NaN
164                          tofu curry         NaN
165   stir-fried broccoli and mushrooms         NaN
166                   winged bean salad         NaN
167         oatmeal and banana pancakes         NaN
168        eggplant stew with red beans         NaN
169              carrot and ginger soup         NaN


In [536]:
correct_ingredients = {
    160: ['avocado', 'salad vegetables', 'olive oil', 'lemonade', 'salt', 'black pepper'],
    161: ['cooked rice', 'egg', 'spring onion', 'sugar', 'fish sauce', 'white soy sauce'],
    162: ['egg', 'tomato', 'spring onion', 'salt', 'black pepper'],
    163: ['salmon', 'quinoa', 'pumpkin', 'broccoli', 'cherry tomato'],
    164: ['hard tofu', 'red curry paste', 'coconut milk', 'kaffir lime leaves', 'dried chilli'],
    165: ['broccoli', 'white fungus', 'garlic', 'soy sauce', 'sesame oil'],
    166: ['fresh winged beans', 'cherry tomato', 'shallot', 'lemonade', 'bitter', 'salt', 'palm sugar'],
    167: ['oatmeal', 'banana', 'egg', 'milk', 'coconut sugar', 'baking powder'],
    168: ['eggplant brinjal', 'red beans', 'onion', 'garlic', 'dried chili', 'coconut milk'],
    169: ['carrot', 'ginger', 'big onion', 'stock water', 'cream', 'salt']
}

for idx, ingredients in correct_ingredients.items():
    thai_result_df.at[idx, 'ingredients'] = ingredients

In [537]:
thai_result_df.to_pickle("../dataset/thai_result.pkl")

### Collect Filipino Version

In [327]:
user_prompt_fili = generate_user_prompt('prompts/user_prompt_filipino.txt')
system_prompt_fili = generate_system_prompt('prompts/system_prompt_filipino.txt')

In [77]:
fili_resp = call_openai_api(user_prompt_fili, system_prompt_fili, 30)
fili_resp

[{'name': 'quinoa salad',
  'mga sangkap': ['quinoa', 'cherry tomatoes', 'cucumber', 'red onion', 'feta cheese', 'olive oil', 'lemon juice', 'parsley', 'mint', 'salt', 'pepper']},
 {'name': 'chickpea curry',
  'mga sangkap': ['chickpeas', 'onion', 'garlic', 'ginger', 'tomato puree', 'coconut milk', 'curry powder', 'turmeric', 'cumin', 'coriander', 'spinach']},
 {'name': 'broccoli soup',
  'mga sangkap': ['broccoli', 'onion', 'garlic', 'chicken or vegetable broth', 'olive oil', 'salt', 'pepper']},
 {'name': 'grilled salmon',
  'mga sangkap': ['salmon fillets', 'lemon juice', 'olive oil', 'garlic', 'dill', 'salt', 'pepper']},
 {'name': 'vegetable stir fry',
  'mga sangkap': ['broccoli', 'carrot', 'bell pepper', 'snap peas', 'ginger', 'garlic', 'soy sauce', 'olive oil', 'sesame seeds']},
 {'name': 'turkey chili',
  'mga sangkap': ['ground turkey', 'onion', 'garlic', 'diced tomatoes', 'kidney beans', 'chili powder', 'cumin', 'paprika', 'salt', 'olive oil']},
 {'name': 'spinach and mushroom

["[{'name': 'quinoa salad',\n  'mga sangkap': ['quinoa', 'cherry tomatoes', 'cucumber', 'red onion', 'feta cheese', 'olive oil', 'lemon juice', 'parsley', 'mint', 'salt', 'pepper']},\n {'name': 'chickpea curry',\n  'mga sangkap': ['chickpeas', 'onion', 'garlic', 'ginger', 'tomato puree', 'coconut milk', 'curry powder', 'turmeric', 'cumin', 'coriander', 'spinach']},\n {'name': 'broccoli soup',\n  'mga sangkap': ['broccoli', 'onion', 'garlic', 'chicken or vegetable broth', 'olive oil', 'salt', 'pepper']},\n {'name': 'grilled salmon',\n  'mga sangkap': ['salmon fillets', 'lemon juice', 'olive oil', 'garlic', 'dill', 'salt', 'pepper']},\n {'name': 'vegetable stir fry',\n  'mga sangkap': ['broccoli', 'carrot', 'bell pepper', 'snap peas', 'ginger', 'garlic', 'soy sauce', 'olive oil', 'sesame seeds']},\n {'name': 'turkey chili',\n  'mga sangkap': ['ground turkey', 'onion', 'garlic', 'diced tomatoes', 'kidney beans', 'chili powder', 'cumin', 'paprika', 'salt', 'olive oil']},\n {'name': 'spinac

In [78]:
# with open('fili_resp.pkl', 'wb') as f:
#     pickle.dump(fili_resp, f)

In [314]:
with open('responses/fili_resp.pkl', 'rb') as f:
    fili_resp = pickle.load(f)

In [315]:
trans_fili_resp = translate_to_eng(fili_resp)

In [319]:
trans_fili_resp

["[{'name': 'quinoa salad',\n  'mga sangkap': ['quinoa', 'cherry tomatoes', 'cucumber', 'red onion', 'feta cheese', 'olive oil', 'lemon juice', 'parsley', 'mint', 'salt', 'pepper']},\n {'name': 'chickpea curry',\n  'mga sangkap': ['chickpeas', 'onion', 'garlic', 'ginger', 'tomato puree', 'coconut milk', 'curry powder', 'turmeric', 'cumin', 'coriander', 'spinach']},\n {'name': 'broccoli soup',\n  'mga sangkap': ['broccoli', 'onion', 'garlic', 'chicken or vegetable broth', 'olive oil', 'salt', 'pepper']},\n {'name': 'grilled salmon',\n  'mga sangkap': ['salmon fillets', 'lemon juice', 'olive oil', 'garlic', 'dill', 'salt', 'pepper']},\n {'name': 'vegetable stir fry',\n  'mga sangkap': ['broccoli', 'carrot', 'bell pepper', 'snap peas', 'ginger', 'garlic', 'soy sauce', 'olive oil', 'sesame seeds']},\n {'name': 'turkey chili',\n  'mga sangkap': ['ground turkey', 'onion', 'garlic', 'diced tomatoes', 'kidney beans', 'chili powder', 'cumin', 'paprika', 'salt', 'olive oil']},\n {'name': 'spinac

In [539]:
trans_fili_resp

["[{'name': 'quinoa salad',\n  'mga sangkap': ['quinoa', 'cherry tomatoes', 'cucumber', 'red onion', 'feta cheese', 'olive oil', 'lemon juice', 'parsley', 'mint', 'salt', 'pepper']},\n {'name': 'chickpea curry',\n  'mga sangkap': ['chickpeas', 'onion', 'garlic', 'ginger', 'tomato puree', 'coconut milk', 'curry powder', 'turmeric', 'cumin', 'coriander', 'spinach']},\n {'name': 'broccoli soup',\n  'mga sangkap': ['broccoli', 'onion', 'garlic', 'chicken or vegetable broth', 'olive oil', 'salt', 'pepper']},\n {'name': 'grilled salmon',\n  'mga sangkap': ['salmon fillets', 'lemon juice', 'olive oil', 'garlic', 'dill', 'salt', 'pepper']},\n {'name': 'vegetable stir fry',\n  'mga sangkap': ['broccoli', 'carrot', 'bell pepper', 'snap peas', 'ginger', 'garlic', 'soy sauce', 'olive oil', 'sesame seeds']},\n {'name': 'turkey chili',\n  'mga sangkap': ['ground turkey', 'onion', 'garlic', 'diced tomatoes', 'kidney beans', 'chili powder', 'cumin', 'paprika', 'salt', 'olive oil']},\n {'name': 'spinac

In [322]:
def preprocess_responses(responses):
    processed_responses = []
    for response in responses:
        try:
            # Attempt to parse the response into a list of dictionaries
            parsed = ast.literal_eval(response)
            # Check if all elements are dictionaries
            if all(isinstance(item, dict) for item in parsed):
                processed_responses.append(response)
            else:
                print(f"Non-dictionary data found and will be excluded: {parsed}")
        except Exception as e:
            print(f"Error processing response: {e}")
    return processed_responses

In [323]:
cleaned_fili_resp = preprocess_responses(trans_fili_resp)

Non-dictionary data found and will be excluded: [{'name': 'quinoa salad', 'mga sangkap': ['quinoa', 'cherry tomatoes', 'cucumber', 'feta cheese', 'olive oil', 'lemon juice', 'mint', 'parsley']}, {'something else'}, {'something more'}, {'and more'}, {'another one'}, {'yet another one'}, {'still more'}, {'more and more'}, {'keep going'}, {'last one'}]


In [326]:
cleaned_fili_resp

["[{'name': 'quinoa salad',\n  'mga sangkap': ['quinoa', 'cherry tomatoes', 'cucumber', 'red onion', 'feta cheese', 'olive oil', 'lemon juice', 'parsley', 'mint', 'salt', 'pepper']},\n {'name': 'chickpea curry',\n  'mga sangkap': ['chickpeas', 'onion', 'garlic', 'ginger', 'tomato puree', 'coconut milk', 'curry powder', 'turmeric', 'cumin', 'coriander', 'spinach']},\n {'name': 'broccoli soup',\n  'mga sangkap': ['broccoli', 'onion', 'garlic', 'chicken or vegetable broth', 'olive oil', 'salt', 'pepper']},\n {'name': 'grilled salmon',\n  'mga sangkap': ['salmon fillets', 'lemon juice', 'olive oil', 'garlic', 'dill', 'salt', 'pepper']},\n {'name': 'vegetable stir fry',\n  'mga sangkap': ['broccoli', 'carrot', 'bell pepper', 'snap peas', 'ginger', 'garlic', 'soy sauce', 'olive oil', 'sesame seeds']},\n {'name': 'turkey chili',\n  'mga sangkap': ['ground turkey', 'onion', 'garlic', 'diced tomatoes', 'kidney beans', 'chili powder', 'cumin', 'paprika', 'salt', 'olive oil']},\n {'name': 'spinac

In [330]:
fili_df, fili_err = create_dataframe_from_responses_2(cleaned_fili_resp)

In [331]:
fili_df

Unnamed: 0,name,ingredients
0,quinoa salad,"[quinoa, cherry tomatoes, cucumber, red onion,..."
1,chickpea curry,"[chickpeas, onion, garlic, ginger, tomato pure..."
2,broccoli soup,"[broccoli, onion, garlic, chicken or vegetable..."
3,grilled salmon,"[salmon fillets, lemon juice, olive oil, garli..."
4,vegetable stir fry,"[broccoli, carrot, bell pepper, snap peas, gin..."
...,...,...
285,chickpea curry,"[chickpeas, coconut milk, onion, tomatoes, gar..."
286,spinach and tomato pasta,"[whole wheat pasta, spinach, cherry tomatoes, ..."
287,vegetable stir fry,"[broccoli, bell pepper, carrot, snow peas, soy..."
288,oatmeal with fruits,"[rolled oats, milk, cinnamon, honey, fresh ber..."


In [332]:
fili_resp2 = call_openai_api(user_prompt_fili, system_prompt_fili)
fili_resp2

[{'name': 'quinoa salad',
  'mga sangkap': ['quinoa', 'pipino', 'kamatis', 'red onion', 'feta cheese', 'lemon juice', 'olive oil', 'parsley']},
 {'name': 'turkey chili',
  'mga sangkap': ['ground turkey', 'onion', 'garlic', 'bell peppers', 'diced tomatoes', 'kidney beans', 'chili powder', 'cumin', 'chicken broth']},
 {'name': 'vegetable stir-fry',
  'mga sangkap': ['broccoli', 'carrot', 'bell pepper', 'snap peas', 'garlic', 'soy sauce', 'sesame oil', 'tofu']},
 {'name': 'chicken quinoa bowl',
  'mga sangkap': ['chicken breast', 'quinoa', 'avocado', 'corn', 'black beans', 'lime', 'cilantro']},
 {'name': 'salmon with asparagus',
  'mga sangkap': ['salmon fillets', 'asparagus', 'lemon', 'olive oil', 'garlic', 'dill']},
 {'name': 'kale smoothie',
  'mga sangkap': ['kale', 'banana', 'apple', 'chia seeds', 'almond milk', 'honey']},
 {'name': 'lentil soup',
  'mga sangkap': ['lentils', 'carrot', 'celery', 'onion', 'garlic', 'tomatoes', 'thyme', 'vegetable broth']},
 {'name': 'cauliflower taco

["[{'name': 'quinoa salad',\n  'mga sangkap': ['quinoa', 'pipino', 'kamatis', 'red onion', 'feta cheese', 'lemon juice', 'olive oil', 'parsley']},\n {'name': 'turkey chili',\n  'mga sangkap': ['ground turkey', 'onion', 'garlic', 'bell peppers', 'diced tomatoes', 'kidney beans', 'chili powder', 'cumin', 'chicken broth']},\n {'name': 'vegetable stir-fry',\n  'mga sangkap': ['broccoli', 'carrot', 'bell pepper', 'snap peas', 'garlic', 'soy sauce', 'sesame oil', 'tofu']},\n {'name': 'chicken quinoa bowl',\n  'mga sangkap': ['chicken breast', 'quinoa', 'avocado', 'corn', 'black beans', 'lime', 'cilantro']},\n {'name': 'salmon with asparagus',\n  'mga sangkap': ['salmon fillets', 'asparagus', 'lemon', 'olive oil', 'garlic', 'dill']},\n {'name': 'kale smoothie',\n  'mga sangkap': ['kale', 'banana', 'apple', 'chia seeds', 'almond milk', 'honey']},\n {'name': 'lentil soup',\n  'mga sangkap': ['lentils', 'carrot', 'celery', 'onion', 'garlic', 'tomatoes', 'thyme', 'vegetable broth']},\n {'name': '

In [333]:
fili_df2, fili_err2 = create_dataframe_from_responses_2(fili_resp2)

In [334]:
fili_result_df = pd.concat([fili_df, fili_df2], axis=0)

In [471]:
fili_result_df.reset_index(drop=True, inplace=True)

In [472]:
fili_result_df

Unnamed: 0,name,ingredients
0,quinoa salad,"[quinoa, cherry tomatoes, cucumber, red onion,..."
1,chickpea curry,"[chickpeas, onion, garlic, ginger, tomato pure..."
2,broccoli soup,"[broccoli, onion, garlic, chicken or vegetable..."
3,grilled salmon,"[salmon fillets, lemon juice, olive oil, garli..."
4,vegetable stir fry,"[broccoli, carrot, bell pepper, snap peas, gin..."
...,...,...
295,kale smoothie,"[kale, banana, apple, chia seeds, almond milk,..."
296,lentil soup,"[lentils, carrot, celery, onion, garlic, tomat..."
297,cauliflower tacos,"[cauliflower, chili powder, cumin, garlic powd..."
298,oatmeal with berries,"[oats, milk, honey, blueberries, strawberries,..."


In [538]:
float_rows = fili_result_df['ingredients'].apply(lambda x: isinstance(x, float))
rows_with_floats = fili_result_df[float_rows]
print("Rows containing floats:")
print(rows_with_floats)

Rows containing floats:
                            name ingredients
109  kale and quinoa power salad         NaN
174                          NaN         NaN
175                          NaN         NaN
176                          NaN         NaN
177                          NaN         NaN
178                          NaN         NaN
179                          NaN         NaN
208      carrot and ginger juice         NaN


In [540]:
fili_result_df.at[173, 'name']

'vegetarian chili'

In [545]:
correct_name = {
    174: 'oven-baked salmon',
    175: 'kale and apple salad',
    176: 'chickpea curry',
    177: 'vegetable stir-fry',
    178: 'lentil soup',
    179: 'oatmeal pancakes'
}

for idx, name in correct_name.items():
    fili_result_df.at[idx, 'name'] = name

In [546]:
correct_ingredients = {
    109: ['kale', 'quinoa', 'dried cranberries', 'slivered almonds', 'feta cheese', 'olive oil', 'lemon juice'],
    174: ['salmon fillets', 'lemon juice', 'olive oil', 'garlic', 'salt', 'pepper', 'dill'],
    175: ['kale', 'apple', 'walnuts', 'red onion', 'olive oil', 'apple cider vinegar', 'mustard', 'honey', 'salt', 'pepper'],
    176: ['chickpeas', 'coconut milk', 'diced tomatoes', 'onion', 'garlic', 'ginger', 'curry powder', 'cumin', 'cilantro', 'coconut oil'],
    177: ['mixed vegetables', 'tofu', 'soy sauce', 'sesame oil', 'gingaff', 'garlic', 'brown rice'],
    178: ['lentils', 'carrots', 'celery', 'onion', 'diced tomatoes', 'garlic', 'thyme', 'bay leaves', 'vegetable stock', 'olive oil'],
    179: ['rolled oats', 'whole wheat flour', 'baking powder', 'cinnamon', 'unsweetened almond milk', 'egg', 'banana', 'honey'],
    208: ['carrots', 'ginger', 'apple', 'lemon']
}

for idx, ingredients in correct_ingredients.items():
    fili_result_df.at[idx, 'ingredients'] = ingredients

In [547]:
fili_result_df.to_pickle("../dataset/fili_result.pkl")

### Collect Vietnamese Version

In [344]:
user_prompt_viet = generate_user_prompt('prompts/user_prompt_viet.txt')
system_prompt_viet = generate_system_prompt('prompts/system_prompt_viet.txt')

In [81]:
viet_resp = call_openai_api(user_prompt_viet, system_prompt_viet, 30)
viet_resp

 [{'name': 'Salad rau củ quả',
  'thành phần': ['rau diếp', 'cà chua', 'dưa leo', 'hành tây', 'dầu oliu', 'giấm balsamic']},
 {'name': 'Smoothie trái cây',
  'thành phần': ['chuối', 'dâu tây', 'sữa hạnh nhân', 'mật ong', 'bột protein']},
 {'name': 'Cá hấp gừng',
  'thành phần': ['cá fillet', 'gừng tươi', 'hành lá', 'nước tương', 'dầu mè']},
 {'name': 'Gà nướng thảo mộc',
  'thành phần': ['ức gà', 'tỏi', 'rosemary', 'thyme', 'dầu oliu', 'muối', 'tiêu đen']},
 {'name': 'Cháo yến mạch',
  'thành phần': ['yến mạch', 'nước', 'muối', 'quả mọng', 'hạt chia', 'sữa hạnh nhân']},
 {'name': 'Soup khoai lang',
  'thành phần': ['khoai lang', 'hành tây', 'tỏi', 'nước dùng rau củ', 'kem dừa', 'muối', 'tiêu']},
 {'name': 'Tofu xào rau củ',
  'thành phần': ['tofu', 'bông cải xanh', 'cà rốt', 'ớt chuông', 'sốt xì dầu', 'dầu mè', 'hành tây']},
 {'name': 'Quinoa salad',
  'thành phần': ['quinoa', 'cà chua bi', 'dưa chuột', 'hành tây tím', 'feta cheese', 'dầu oliu', 'chanh']},
 {'name': 'Cơm chiên trứng và

[" [{'name': 'Salad rau củ quả',\n  'thành phần': ['rau diếp', 'cà chua', 'dưa leo', 'hành tây', 'dầu oliu', 'giấm balsamic']},\n {'name': 'Smoothie trái cây',\n  'thành phần': ['chuối', 'dâu tây', 'sữa hạnh nhân', 'mật ong', 'bột protein']},\n {'name': 'Cá hấp gừng',\n  'thành phần': ['cá fillet', 'gừng tươi', 'hành lá', 'nước tương', 'dầu mè']},\n {'name': 'Gà nướng thảo mộc',\n  'thành phần': ['ức gà', 'tỏi', 'rosemary', 'thyme', 'dầu oliu', 'muối', 'tiêu đen']},\n {'name': 'Cháo yến mạch',\n  'thành phần': ['yến mạch', 'nước', 'muối', 'quả mọng', 'hạt chia', 'sữa hạnh nhân']},\n {'name': 'Soup khoai lang',\n  'thành phần': ['khoai lang', 'hành tây', 'tỏi', 'nước dùng rau củ', 'kem dừa', 'muối', 'tiêu']},\n {'name': 'Tofu xào rau củ',\n  'thành phần': ['tofu', 'bông cải xanh', 'cà rốt', 'ớt chuông', 'sốt xì dầu', 'dầu mè', 'hành tây']},\n {'name': 'Quinoa salad',\n  'thành phần': ['quinoa', 'cà chua bi', 'dưa chuột', 'hành tây tím', 'feta cheese', 'dầu oliu', 'chanh']},\n {'name': '

In [82]:
# with open('viet_resp.pkl', 'wb') as f:
#     pickle.dump(viet_resp, f)

In [337]:
with open('responses/viet_resp.pkl', 'rb') as f:
    viet_resp = pickle.load(f)

In [338]:
trans_viet_resp = translate_to_eng(viet_resp)

In [341]:
trans_viet_resp

["[{'name': 'vegetable salad',\n  'ingredients': ['lettuce', 'tomato', 'cucumber', 'onion', 'olive oil', 'balsamic vinegar']},\n {'name': 'fruit smoothie',\n  'ingredients': ['banana', 'strawberries', 'almond milk', 'honey', 'protein powder']},\n {'name': 'steamed fish with ginger',\n  'ingredients': ['fish fillet', 'fresh ginger', 'scallions', 'soy sauce', 'sesame oil']},\n {'name': 'herb grilled chicken',\n  'ingredients': ['chicken breast', 'garlic', 'rosemary', 'thyme', 'olive oil', 'salt', 'black pepper']},\n {'name': 'oatmeal',\n  'ingredients': ['oats', 'water', 'salt', 'berries', 'chia seeds', 'almond milk']},\n {'name': 'sweet potato soup',\n  'ingredients': ['sweet potato', 'onion', 'garlic', 'vegetable broth', 'coconut cream', 'salt', 'pepper']},\n {'name': 'stir-fried tofu with vegetables',\n  'ingredients': ['tofu', 'broccoli', 'carrot', 'bell pepper', 'pesky sauce', 'sesame oil', 'onion']},\n {'name': 'quinoa salad',\n  'ingredients': ['quinoa', 'cherry tomatoes', 'cucumb

In [339]:
viet_df, viet_err = create_dataframe_from_responses_2(trans_viet_resp)

Error parsing response at index 1: invalid syntax (<unknown>, line 1)
Error parsing response at index 2: unterminated string literal (detected at line 33) (<unknown>, line 33)
Error parsing response at index 3: unterminated string literal (detected at line 33) (<unknown>, line 33)
Error parsing response at index 4: invalid syntax (<unknown>, line 1)
Error parsing response at index 6: invalid syntax (<unknown>, line 1)
Error parsing response at index 7: invalid syntax (<unknown>, line 1)
Error parsing response at index 8: unterminated string literal (detected at line 11) (<unknown>, line 11)
Error parsing response at index 9: invalid syntax (<unknown>, line 1)
Error parsing response at index 10: unterminated string literal (detected at line 24) (<unknown>, line 24)
Error parsing response at index 11: invalid syntax (<unknown>, line 1)
Error parsing response at index 12: invalid syntax (<unknown>, line 1)
Error parsing response at index 13: unterminated string literal (detected at line 2

In [340]:
viet_df

Unnamed: 0,name,ingredients
0,vegetable salad,"[lettuce, tomato, cucumber, onion, olive oil, ..."
1,fruit smoothie,"[banana, strawberries, almond milk, honey, pro..."
2,steamed fish with ginger,"[fish fillet, fresh ginger, scallions, soy sau..."
3,herb grilled chicken,"[chicken breast, garlic, rosemary, thyme, oliv..."
4,oatmeal,"[oats, water, salt, berries, chia seeds, almon..."
5,sweet potato soup,"[sweet potato, onion, garlic, vegetable broth,..."
6,stir-fried tofu with vegetables,"[tofu, broccoli, carrot, bell pepper, pesky sa..."
7,quinoa salad,"[quinoa, cherry tomatoes, cucumber, red onion,..."
8,egg and vegetable fried rice,"[brown rice, eggs, peas, carrots, low sodium s..."
9,baked potato cake,"[potatoes, scallions, parmesan cheese, salt, p..."


In [342]:
all_dishes = []

# Regex to find dishes with different 'name' labels
dish_pattern = r"\{\s*'(?:m|the )?name'\s*:\s*'([^']+)',\s*'ingredients'\s*:\s*\[([^]]+)\]\}"

# Process each item in the data list
for _, dishes_str in viet_err:
    # Adjust quotes for matching
    adjusted_str = dishes_str.replace('"', "'")
    # Find all matches in the current string
    matches = re.findall(dish_pattern, adjusted_str)
    if not matches:
        print("No matches found in:", dishes_str)
    for match in matches:
        dish_name = match[0]
        # Remove extra quotes and split ingredients into a list
        ingredients_list = [ingredient.strip().strip("'") for ingredient in match[1].split(',')]
        all_dishes.append({'name': dish_name, 'ingredients': ingredients_list})

No matches found in: here are 10 healthy recipes with ingredients for each dish:

[{'name': 'vegetable salad',
  'ingredients': ['lettuce', 'tomato', 'cucumber', 'carrot', 'corn', 'lemon juice', 'olive oil', 'salt', 'black pepper'] },
 {'attribute':'grilled salmon salad',
  'ingredients': ['salmon', 'garlic', 'olive oil', 'lemon', 'salt', 'black pepper', 'chicken', 'rucola']},
 {'attribute':'refreshing mango smoothie',
  'ingredients': ['mango', 'banana', 'almond milk', 'honey', 'unsweetened yogurt']},
 {'attribute':'pineapple chicken dish',
  'ingredients': ['chicken breast', 'pineapple', 'red bell pepper', 'red onion', 'garlic', 'low salt soy sauce', 'sesame oil', 'honey', 'black pepper ']},
 {'attribute':'stir-fried shrimp with vegetables',
  'ingredients': ['shrimp', 'broccoli', 'carrot', 'bell pepper', 'garlic', 'olive oil', 'soy sauce', 'pepper']},
 {'attribute':'broccoli soup',
  'ingredients': ['broccoli soup', 'onion', 'garlic', 'olive oil', 'almond milk', 'salt', 'pepper']},


In [343]:
viet_err_df = pd.DataFrame(all_dishes)
viet_err_df

Unnamed: 0,name,ingredients
0,boiled chicken with vegetables,"[chicken breast, carrots, potatoes, broccoli, ..."
1,tuna salad,"[canned tuna, lettuce, cherry tomatoes, cucumb..."
2,grilled broccoli,"[broccoli, olive oil, minced garlic, salt, bla..."
3,vegetable sandwich,"[whole grain bread, avocado, bell pepper, lett..."
4,oatmeal,"[oats, water, honey, fresh berries]"
...,...,...
222,grilled salmon,"[salmon, lemon, olive oil, garlic, cilantro, s..."
223,broccoli soup,"[broccoli, potato, onion, garlic, vegetable br..."
224,brown rice,"[brown rice, water, salt]"
225,stir-fried shrimp with spinach,"[shrimp, spinach, garlic, olive oil, lemon, sa..."


In [345]:
viet_resp2 = call_openai_api(user_prompt_viet, system_prompt_viet, 8)
viet_resp2

Dưới đây là 10 công thức nấu ăn lành mạnh gồm đầy đủ các thành phần:

[{'name': 'Salad gà nướng',
  'thành phần': ['ức gà nướng', 'lá rau baby', 'cà chua bi', 'dưa chuột', 'hạt điều', 'dầu ô liu', 'giấm balsamic', 'mật ong', 'muối', 'tiêu đen']},
 {'name': 'Cá hồi nướng',
  'thành phần': ['cá hồi', 'dầu ô liu', 'chanh', 'muối', 'tiêu đen', 'hành tây', 'thì là']},
 {'name': 'Smoothie xoài và bơ',
  'thành phần': ['xoài', 'bơ', 'sữa hạnh nhân', 'mật ong', 'chia seeds']},
 {'name': 'Cháo yến mạch',
  'thành phần': ['yến mạch', 'nước', 'sữa', 'muối', 'hạnh nhân', 'mật ong', 'quả mọng']},
 {'name': 'Bánh pancake khoai lang',
  'thành phần': ['khoai lang', 'bột mì đa dụng', 'sữa', 'đường', 'bột nở', 'muối', 'dầu thực vật']},
 {'name': 'Soup lơ xanh',
  'thành phần': ['lơ xanh', 'hành tây', 'tỏi', 'nước dùng thực vật', 'kem tươi', 'muối', 'tiêu đen']},
 {'All': 'name': 'Mỳ spaghetti trộn rau',
  'composition': ['mỳ spaghetti', 'cà chua', 'húng quế', 'tỏi', 'dầu ô liu', 'pho mát Parmesan']},
 

["Dưới đây là 10 công thức nấu ăn lành mạnh gồm đầy đủ các thành phần:\n\n[{'name': 'Salad gà nướng',\n  'thành phần': ['ức gà nướng', 'lá rau baby', 'cà chua bi', 'dưa chuột', 'hạt điều', 'dầu ô liu', 'giấm balsamic', 'mật ong', 'muối', 'tiêu đen']},\n {'name': 'Cá hồi nướng',\n  'thành phần': ['cá hồi', 'dầu ô liu', 'chanh', 'muối', 'tiêu đen', 'hành tây', 'thì là']},\n {'name': 'Smoothie xoài và bơ',\n  'thành phần': ['xoài', 'bơ', 'sữa hạnh nhân', 'mật ong', 'chia seeds']},\n {'name': 'Cháo yến mạch',\n  'thành phần': ['yến mạch', 'nước', 'sữa', 'muối', 'hạnh nhân', 'mật ong', 'quả mọng']},\n {'name': 'Bánh pancake khoai lang',\n  'thành phần': ['khoai lang', 'bột mì đa dụng', 'sữa', 'đường', 'bột nở', 'muối', 'dầu thực vật']},\n {'name': 'Soup lơ xanh',\n  'thành phần': ['lơ xanh', 'hành tây', 'tỏi', 'nước dùng thực vật', 'kem tươi', 'muối', 'tiêu đen']},\n {'All': 'name': 'Mỳ spaghetti trộn rau',\n  'composition': ['mỳ spaghetti', 'cà chua', 'húng quế', 'tỏi', 'dầu ô liu', 'pho m

In [346]:
trans_viet_resp2 = translate_to_eng(viet_resp2)

In [347]:
viet_df2, viet_err2 = create_dataframe_from_responses_2(trans_viet_resp2)

Error parsing response at index 0: invalid syntax (<unknown>, line 1)
Error parsing response at index 1: invalid syntax (<unknown>, line 1)
Error parsing response at index 2: invalid syntax (<unknown>, line 1)
Error parsing response at index 3: invalid syntax (<unknown>, line 1)
Error parsing response at index 4: invalid syntax (<unknown>, line 1)
Error parsing response at index 5: invalid syntax (<unknown>, line 1)
Error parsing response at index 6: invalid syntax (<unknown>, line 1)


In [350]:
viet_df2

Unnamed: 0,name,ingredients
0,mixed vegetable salad,"[lettuce, tomato, cucumber, red onion, olive o..."
1,herb grilled chicken,"[skinless boneless chicken breast, minced garl..."
2,grilled salmon,"[salmon, olive oil, lemon juice, pepper, salt,..."
3,steamed broccoli,"[broccoli, water, salt, pepper]"
4,stir-fried tofu with vegetables,"[tofu, sesame oil, broccoli, carrot, bell pepp..."
5,oatmeal,"[oats, water, salt, honey, seasonal fruit]"
6,fruit smoothie,"[banana, strawberries, almond milk, honey, ice]"
7,quinoa mixed with vegetables,"[quinoa, olive oil, lemon juice, cherry tomato..."
8,grilled turmeric tofu,"[tofu, turmeric powder, coconut oil, soy sauce..."
9,roasted sweet potatoes,"[sweet potato, olive oil, salt, pepper]"


In [348]:
all_dishes = []

# Regex to find dishes with different 'name' labels
dish_pattern = r"\{\s*'(?:m|the )?name'\s*:\s*'([^']+)',\s*'ingredients'\s*:\s*\[([^]]+)\]\}"

# Process each item in the data list
for _, dishes_str in viet_err2:
    # Adjust quotes for matching
    adjusted_str = dishes_str.replace('"', "'")
    # Find all matches in the current string
    matches = re.findall(dish_pattern, adjusted_str)
    if not matches:
        print("No matches found in:", dishes_str)
    for match in matches:
        dish_name = match[0]
        # Remove extra quotes and split ingredients into a list
        ingredients_list = [ingredient.strip().strip("'") for ingredient in match[1].split(',')]
        all_dishes.append({'name': dish_name, 'ingredients': ingredients_list})

In [349]:
viet_err_df2 = pd.DataFrame(all_dishes)
viet_err_df2

Unnamed: 0,name,ingredients
0,grilled chicken salad,"[grilled chicken breast, baby leaves, cherry t..."
1,grilled salmon,"[salmon, olive oil, lemon, salt, black pepper,..."
2,mango and avocado smoothie,"[mango, avocado, almond milk, honey, chia seeds]"
3,oatmeal,"[oats, water, milk, salt, almonds, honey, berr..."
4,sweet potato pancake,"[sweet potato, all-purpose flour, milk, sugar,..."
...,...,...
61,oatmeal,"[oats, milk, honey, berries, chia seeds]"
62,tofu in tomato sauce,"[tofu, tomato, garlic, onion, olive oil, basil..."
63,broccoli soup,"[broccoli, onion, vegetable broth, vegetable c..."
64,roasted sweet potatoes,"[sweet potatoes, olive oil, salt, herbs]"


In [351]:
viet_result_df = pd.concat([viet_err_df, viet_df2, viet_err_df2[:63]], axis=0)

In [474]:
viet_result_df.reset_index(drop=True, inplace=True)

In [475]:
viet_result_df

Unnamed: 0,name,ingredients
0,boiled chicken with vegetables,"[chicken breast, carrots, potatoes, broccoli, ..."
1,tuna salad,"[canned tuna, lettuce, cherry tomatoes, cucumb..."
2,grilled broccoli,"[broccoli, olive oil, minced garlic, salt, bla..."
3,vegetable sandwich,"[whole grain bread, avocado, bell pepper, lett..."
4,oatmeal,"[oats, water, honey, fresh berries]"
...,...,...
295,fruit smoothie,"[banana, strawberries, raspberries, yogurt, ho..."
296,steamed fish with ginger,"[fish fillet, ginger, scallion, soy sauce, ses..."
297,boiled chicken with cabbage,"[chicken breast, cabbage, onion, pepper, salt]"
298,oatmeal,"[oats, milk, honey, berries, chia seeds]"


In [476]:
viet_result_df.to_pickle("../dataset/viet_result.pkl")

### Collect Korean Version

In [83]:
user_prompt_korean = generate_user_prompt('user_prompt_korean.txt')
system_prompt_korean = generate_system_prompt('system_prompt_korean.txt')

In [84]:
korean_resp = call_openai_api(user_prompt_korean, system_prompt_korean, 30)
korean_resp

[{'이름': '퀴노아 샐러드',
  '성분': ['퀴노아', '토마토', '오이', '레몬즙', '올리브오일', '후추', '소금', '파슬리']},
 {'이름': '브로콜리 스프',
  '성분': ['브로콜리', '양파', '마늘', '치킨스톡', '올리브오일', '소금', '후추']},
 {'이름': '닭가슴살 구이',
  '성분': ['닭가슴살', '파프리카', '마늘가루', '올리브오일', '레몬즙', '허브믹스']},
 {'이름': '귀리와 견과류 요거트',
  '성분': ['플레인 요거트', '생귀리', '꿀', '아몬드', '호두', '블루베리']},
 {'이름': '스피룰리나 스무디',
  '성분': ['스피룰리나 파우더', '바나나', '시금치', '아몬드밀크', '차이아씨드', '꿀']},
 {'이름': '토마토 바질 파스타',
  '성분': ['통밀 파스타', '체리 토마토', '바질', '마늘', '올리브오일', '파르메산 치즈']},
 {'이름': '아보카도 샐러드',
  '성분': ['아보카도', '적상추', '레모네이드', '올리브오일', '크랜베리', '호두']},
 {'이름': '강황 쌀',
  '성분': ['현미', '강황가루', '코코넛오일', '소금', '겨자씨', '커리잎']},
 {'이름': '버섯과 채소의 볶음',
  '성분': ['양송이버섯', '브로콜리', '당근', '간장', '참기름', '마늘']},
 {'이름': '가지 구이',
  '성분': ['가지', '올리브오일', '마늘가루', '파프리카', '소금', '후추']}]

[{'이름': '두부와 야채 스티어 프라이',
  '성분': ['두부', '브로콜리', '당근', '피망', '양파', '간장', '참기름', '마늘', '생강']},
 {'이름': '현미 잡곡밥',
  '성분': ['현미', '보리', '흑미', '수수', '옥수수']},
 {'이름': '닭가슴살 샐러드',
  '성분': ['닭가슴살', '스피나치', '아보카도', '토마토', '오이'

["[{'이름': '퀴노아 샐러드',\n  '성분': ['퀴노아', '토마토', '오이', '레몬즙', '올리브오일', '후추', '소금', '파슬리']},\n {'이름': '브로콜리 스프',\n  '성분': ['브로콜리', '양파', '마늘', '치킨스톡', '올리브오일', '소금', '후추']},\n {'이름': '닭가슴살 구이',\n  '성분': ['닭가슴살', '파프리카', '마늘가루', '올리브오일', '레몬즙', '허브믹스']},\n {'이름': '귀리와 견과류 요거트',\n  '성분': ['플레인 요거트', '생귀리', '꿀', '아몬드', '호두', '블루베리']},\n {'이름': '스피룰리나 스무디',\n  '성분': ['스피룰리나 파우더', '바나나', '시금치', '아몬드밀크', '차이아씨드', '꿀']},\n {'이름': '토마토 바질 파스타',\n  '성분': ['통밀 파스타', '체리 토마토', '바질', '마늘', '올리브오일', '파르메산 치즈']},\n {'이름': '아보카도 샐러드',\n  '성분': ['아보카도', '적상추', '레모네이드', '올리브오일', '크랜베리', '호두']},\n {'이름': '강황 쌀',\n  '성분': ['현미', '강황가루', '코코넛오일', '소금', '겨자씨', '커리잎']},\n {'이름': '버섯과 채소의 볶음',\n  '성분': ['양송이버섯', '브로콜리', '당근', '간장', '참기름', '마늘']},\n {'이름': '가지 구이',\n  '성분': ['가지', '올리브오일', '마늘가루', '파프리카', '소금', '후추']}]\n",
 "[{'이름': '두부와 야채 스티어 프라이',\n  '성분': ['두부', '브로콜리', '당근', '피망', '양파', '간장', '참기름', '마늘', '생강']},\n {'이름': '현미 잡곡밥',\n  '성분': ['현미', '보리', '흑미', '수수', '옥수수']},\n {'이름': '닭가슴살 샐러드',\n  '성분': ['닭가슴

In [85]:
# with open('korean_resp.pkl', 'wb') as f:
#     pickle.dump(korean_resp, f)

In [354]:
with open('responses/korean_resp.pkl', 'rb') as f:
    korean_resp = pickle.load(f)

In [355]:
trans_kor_resp = translate_to_eng(korean_resp)

In [356]:
kor_df, kor_err = create_dataframe_from_responses_2(trans_kor_resp)

Error parsing response at index 2: unterminated string literal (detected at line 20) (<unknown>, line 20)
Error parsing response at index 7: unterminated string literal (detected at line 18) (<unknown>, line 18)
Error parsing response at index 17: closing parenthesis ')' does not match opening parenthesis '[' (<unknown>, line 12)
Error parsing response at index 21: closing parenthesis ']' does not match opening parenthesis '{' on line 11 (<unknown>, line 12)
Error parsing response at index 24: unterminated string literal (detected at line 17) (<unknown>, line 17)


In [357]:
kor_df

Unnamed: 0,name,ingredients
0,quinoa salad,"[quinoa, tomato, cucumber, lemon juice, olive ..."
1,broccoli soup,"[broccoli, onion, garlic, chicken stock, olive..."
2,grilled chicken breast,"[chicken breast, paprika, garlic powder, olive..."
3,oats and nuts yogurt,"[plain yogurt, raw oats, honey, almonds, walnu..."
4,spirulina smoothie,"[spirulina powder, banana, spinach, almond mil..."
...,...,...
245,tomato basil pasta,"[spaghetti, fresh tomatoes, fresh basil, garli..."
246,omelet,"[eggs, green pepper, onion, tomato, cheese, ol..."
247,salmon grill,"[salmon fillet, lemon, garlic, olive oil, herbs]"
248,yogurt and fruit bowl,"[plain yogurt, fresh fruits (strawberries, blu..."


In [360]:
kor_err

[(2,
  "[{'name': 'quinoa salad',\n  'ingredients': ['quinoa', 'cucumber', 'tomato', 'red bell pepper', 'lemon juice', 'olive oil', 'salt', 'black pepper']},\n {'name': 'scrambled eggs',\n  'ingredients': ['eggs', 'spinach', 'green pepper', 'onion', 'olive oil', 'salt', 'black pepper']},\n {'name': 'broccoli soup',\n  'ingredients': ['broccoli', 'onion', 'garlic', 'chicken broth', 'olive oil', 'salt', 'black pepper']},\n {'name': 'grilled chicken breast',\n  'ingredients': ['chicken breast', 'lemon juice', 'garlic', 'rosemary', 'olive oil', 'salt', 'black pepper']},\n {'name': 'fruit yogurt parfait',\n  'ingredients': ['plain yogurt', 'fresh berries', 'honey', 'granola']},\n {'name': 'tomato basil pasta',\n  'ingredients': ['whole wheat pasta', 'fresh tomatoes', 'basil', 'garlic', 'olive oil', 'parmesan cheese', 'salt', 'black pepper']},\n {'name': 'avocado toast',\n  'ingredients': ['whole wheat bread', 'avocado', 'lemon juice', 'cheongyang pepper', 'salt', 'black pepper']},\n {'name'

In [358]:
all_dishes = []

# Regex to find dishes with different 'name' labels
dish_pattern = r"\{\s*'(?:m|the )?name'\s*:\s*'([^']+)',\s*'ingredients'\s*:\s*\[([^]]+)\]\}"

# Process each item in the data list
for _, dishes_str in kor_err:
    # Adjust quotes for matching
    adjusted_str = dishes_str.replace('"', "'")
    # Find all matches in the current string
    matches = re.findall(dish_pattern, adjusted_str)
    if not matches:
        print("No matches found in:", dishes_str)
    for match in matches:
        dish_name = match[0]
        # Remove extra quotes and split ingredients into a list
        ingredients_list = [ingredient.strip().strip("'") for ingredient in match[1].split(',')]
        all_dishes.append({'name': dish_name, 'ingredients': ingredients_list})

In [359]:
kor_err_df = pd.DataFrame(all_dishes)
kor_err_df

Unnamed: 0,name,ingredients
0,quinoa salad,"[quinoa, cucumber, tomato, red bell pepper, le..."
1,scrambled eggs,"[eggs, spinach, green pepper, onion, olive oil..."
2,broccoli soup,"[broccoli, onion, garlic, chicken broth, olive..."
3,grilled chicken breast,"[chicken breast, lemon juice, garlic, rosemary..."
4,fruit yogurt parfait,"[plain yogurt, fresh berries, honey, granola]"
5,tomato basil pasta,"[whole wheat pasta, fresh tomatoes, basil, gar..."
6,avocado toast,"[whole wheat bread, avocado, lemon juice, cheo..."
7,pumpkin smoothie,"[pumpkin, banana, almond milk, cinnamon, honey]"
8,vegan chocolate mousse,"[avocado, cocoa powder, honey, vanilla extract..."
9,greek salad,"[fresh tomato, cucumber, black olive, feta che..."


In [362]:
new_data = pd.DataFrame({
    'name': ['simple greek salad', 'young lettuce salad'],
    'ingredients': [
        ['tomato', 'cucumber', 'red onion', 'olive', 'feta cheese', 'olive oil', 'lemon juice', 'salt', 'pepper'],
        ['young lettuces', 'avocado', 'tomato', 'cucumber', 'olive oil', 'balsamic vinegar']
    ]
})

In [None]:
kor_result_df = pd.concat([kor_df, kor_err_df], axis=0)
kor_result_df = pd.concat([kor_result_df, new_data], axis=0)

In [477]:
kor_result_df.reset_index(drop=True, inplace=True)

In [478]:
kor_result_df

Unnamed: 0,name,ingredients
0,quinoa salad,"[quinoa, tomato, cucumber, lemon juice, olive ..."
1,broccoli soup,"[broccoli, onion, garlic, chicken stock, olive..."
2,grilled chicken breast,"[chicken breast, paprika, garlic powder, olive..."
3,oats and nuts yogurt,"[plain yogurt, raw oats, honey, almonds, walnu..."
4,spirulina smoothie,"[spirulina powder, banana, spinach, almond mil..."
...,...,...
295,butternut pumpkin soup,"[butternut pumpkin, onion, garlic, vegetable s..."
296,tomato couscous,"[couscous, cherry tomato, cucumber, lemon juic..."
297,lentil salad,"[lentils, cucumber, tomato peas, onion, lemon ..."
298,simple greek salad,"[tomato, cucumber, red onion, olive, feta chee..."


In [479]:
kor_result_df.to_pickle("../dataset/kor_result.pkl")

### Collect Russian Version

In [371]:
user_prompt_russian = generate_user_prompt('prompts/user_prompt_russian.txt')
system_prompt_russian = generate_system_prompt('prompts/system_prompt_russian.txt')

In [87]:
russian_resp = call_openai_api(user_prompt_russian, system_prompt_russian, 30)
russian_resp

[{'name': 'овощной салат с киноа',
  'ингредиенты': ['киноа', 'огурцы', 'помидоры черри', 'авокадо', 'лимонный сок', 'оливковое масло', 'петрушка']},
 {'name': 'овсяная каша с ягодами',
  'ингредиенты': ['овсяные хлопья', 'молоко или вода', 'мед', 'смесь ягод', 'корица']},
 {'name': 'смузи с зеленью',
  'ингредиенты': ['шпинат', 'банан', 'греческий йогурт', 'мед', 'молоко', 'лед']},
 {'name': 'тушеные брокколи с чесноком',
  'ингредиенты': ['брокколи', 'чеснок', 'оливковое масло', 'соль', 'перец']},
 {'name': 'лосось на гриле с травами',
  'ингредиенты': ['лосось', 'лимонный сок', 'оливковое масло', 'укроп', 'чеснок', 'соль', 'перец']},
 {'name': 'цыпленок в медово-горчичном соусе',
  'ингредиенты': ['куриная грудка', 'мед', 'горчица', 'оливковое масло', 'лимонный сок', 'паприка', 'соль']},
 {'name': 'салат из чечевицы с овощами',
  'ингредиенты': ['чечевица', 'морковь', 'красный лук', 'огурцы', 'оливковое масло', 'лимонный сок', 'петрушка']},
 {'nbsp:name': 'тушеный тыквенный суп',
  

["[{'name': 'овощной салат с киноа',\n  'ингредиенты': ['киноа', 'огурцы', 'помидоры черри', 'авокадо', 'лимонный сок', 'оливковое масло', 'петрушка']},\n {'name': 'овсяная каша с ягодами',\n  'ингредиенты': ['овсяные хлопья', 'молоко или вода', 'мед', 'смесь ягод', 'корица']},\n {'name': 'смузи с зеленью',\n  'ингредиенты': ['шпинат', 'банан', 'греческий йогурт', 'мед', 'молоко', 'лед']},\n {'name': 'тушеные брокколи с чесноком',\n  'ингредиенты': ['брокколи', 'чеснок', 'оливковое масло', 'соль', 'перец']},\n {'name': 'лосось на гриле с травами',\n  'ингредиенты': ['лосось', 'лимонный сок', 'оливковое масло', 'укроп', 'чеснок', 'соль', 'перец']},\n {'name': 'цыпленок в медово-горчичном соусе',\n  'ингредиенты': ['куриная грудка', 'мед', 'горчица', 'оливковое масло', 'лимонный сок', 'паприка', 'соль']},\n {'name': 'салат из чечевицы с овощами',\n  'ингредиенты': ['чечевица', 'морковь', 'красный лук', 'огурцы', 'оливковое масло', 'лимонный сок', 'петрушка']},\n {'nbsp:name': 'тушеный ты

In [88]:
# with open('russian_resp.pkl', 'wb') as f:
#     pickle.dump(russian_resp, f)

In [365]:
with open('responses/russian_resp.pkl', 'rb') as f:
    russian_resp = pickle.load(f)

In [366]:
trans_russian_resp = translate_to_eng(russian_resp)

In [550]:
trans_russian_resp

["[{'name': 'vegetable salad with quinoa',\n  'ingredients': ['quinoa', 'cucumbers', 'cherry tomatoes', 'avocado', 'lemon juice', 'olive oil', 'parsley']},\n {'name': 'oatmeal with berries',\n  'ingredients': ['oatmeal', 'milk or water', 'honey', 'mixed berries', 'cinnamon']},\n {'name': 'green smoothie',\n  'ingredients': ['spinach', 'banana', 'greek yogurt', 'honey', 'milk', 'ice']},\n {'name': 'stewed broccoli with garlic',\n  'ingredients': ['broccoli', 'garlic', 'olive oil', 'salt', 'pepper']},\n {'name': 'herb grilled salmon',\n  'ingredients': ['salmon', 'lemon juice', 'olive oil', 'dill', 'garlic', 'salt', 'pepper']},\n {'name': 'chicken in honey mustard sauce',\n  'ingredients': ['chicken breast', 'honey', 'mustard', 'olive oil', 'lemon juice', 'paprika', 'salt']},\n {'name': 'lentil salad with vegetables',\n  'ingredients': ['lentils', 'carrots', 'red onions', 'cucumbers', 'olive oil', 'lemon juice', 'parsley']},\n {'nbsp:name': 'pumpkin stew',\n  'ingredients': ['butternut s

In [367]:
russian_df, russian_err = create_dataframe_from_responses_2(trans_russian_resp)

Error parsing response at index 6: unterminated string literal (detected at line 17) (<unknown>, line 17)
Error parsing response at index 8: unterminated string literal (detected at line 11) (<unknown>, line 11)
Error parsing response at index 9: closing parenthesis ']' does not match opening parenthesis '{' on line 9 (<unknown>, line 10)
Error parsing response at index 10: unterminated string literal (detected at line 19) (<unknown>, line 19)
Error parsing response at index 15: unterminated string literal (detected at line 6) (<unknown>, line 6)
Error parsing response at index 16: unterminated string literal (detected at line 24) (<unknown>, line 24)


In [368]:
russian_df

Unnamed: 0,name,ingredients
0,vegetable salad with quinoa,"[quinoa, cucumbers, cherry tomatoes, avocado, ..."
1,oatmeal with berries,"[oatmeal, milk or water, honey, mixed berries,..."
2,green smoothie,"[spinach, banana, greek yogurt, honey, milk, ice]"
3,stewed broccoli with garlic,"[broccoli, garlic, olive oil, salt, pepper]"
4,herb grilled salmon,"[salmon, lemon juice, olive oil, dill, garlic,..."
...,...,...
235,chicken cutlets with vegetables,"[chicken breast fillet, carrot, corn, green on..."
236,fish baked with vegetables,"[white fish fillet, tomatoes, bell pepper, oni..."
237,beet and walnut salad,"[beet, walnuts, green salad, goat cheese, oliv..."
238,summer fruit salad,"[strawberry, kiwi, mango, bananas, orange juic..."


In [369]:
all_dishes = []

# Regex to find dishes with different 'name' labels
dish_pattern = r"\{\s*'(?:m|the )?name'\s*:\s*'([^']+)',\s*'ingredients'\s*:\s*\[([^]]+)\]\}"

# Process each item in the data list
for _, dishes_str in russian_err:
    # Adjust quotes for matching
    adjusted_str = dishes_str.replace('"', "'")
    # Find all matches in the current string
    matches = re.findall(dish_pattern, adjusted_str)
    if not matches:
        print("No matches found in:", dishes_str)
    for match in matches:
        dish_name = match[0]
        # Remove extra quotes and split ingredients into a list
        ingredients_list = [ingredient.strip().strip("'") for ingredient in match[1].split(',')]
        all_dishes.append({'name': dish_name, 'ingredients': ingredients_list})

In [370]:
russian_err_df = pd.DataFrame(all_dishes)
russian_err_df

Unnamed: 0,name,ingredients
0,quinoa salad with avocado,"[quinoa, avocado, cherry tomatoes, cucumber, r..."
1,water oatmeal,"[oatmeal, water, salt, honey, berries, nuts]"
2,buckwheat porridge with mushrooms,"[buckwheat, mushrooms, onions, carrots, vegeta..."
3,spinach and banana smoothie,"[spinach, banana, diamond milk, honey, flaxseed]"
4,vegetable lentil soup,"[lentils, potatoes, carrots, tomatoes, onions,..."
5,stewed vegetables,"[zucchini, eggplant, bell pepper, tomatoes, on..."
6,grilled chicken breast,"[chicken breast, lemon juice, olive oil, garli..."
7,caesar salad with shrimp,"[romaine lettuce, shrimp, cherry tomatoes, par..."
8,mango coconut milk smoothie,"[mango, coconut milk, yogurt, honey, hazelnut ..."
9,quinoa and black bean salad,"[quinoa, black beans, tomatoes, lime, avocado,..."


In [372]:
russian_resp2 = call_openai_api(user_prompt_russian, system_prompt_russian, 2)
russian_resp2

[{'name': 'салат из киноа и овощей',
  'ингредиенты': ['киноа', 'огурцы', 'помидоры черри', 'лимонный сок', 'оливковое масло', 'петрушка', 'морская соль', 'черный перец']},
 {'name': 'овсяная каша с ягодами',
  'ингредиенты': ['овсяные хлопья', 'альмондовое молоко', 'мед', 'голубика', 'малина']},
 {'name': 'смузи с зеленью',
  'ингредиенты': ['шпинат', 'банан', 'мороженое манго', 'греческий йогурт', 'мед', 'лимонный сок']},
 {'name': 'овощной салат с авокадо',
  'ингредиенты': ['авокадо', 'весенний лук', 'аругула', 'черри помидоры', 'оливковое масло', 'лимонный сок', 'морская соль', 'черный перец']},
 {'name': 'тушеная чечевица с овощами',
  'ингредиенты': ['чечевица', 'морковь', 'лук репчатый', 'сливочное масло', 'томатная паста', 'куркума', 'вода', 'соль', 'перец']},
 {'name': 'суп из сладкого картофеля',
  'ингредиенты': ['сладкий картофель', 'лук репчатый', 'чеснок', 'имбирь', 'куриный бульон', 'сливки', 'соль', 'перец', 'зелень']},
 {'name': 'табуле',
  'ингредиенты': ['булгур', '

["[{'name': 'салат из киноа и овощей',\n  'ингредиенты': ['киноа', 'огурцы', 'помидоры черри', 'лимонный сок', 'оливковое масло', 'петрушка', 'морская соль', 'черный перец']},\n {'name': 'овсяная каша с ягодами',\n  'ингредиенты': ['овсяные хлопья', 'альмондовое молоко', 'мед', 'голубика', 'малина']},\n {'name': 'смузи с зеленью',\n  'ингредиенты': ['шпинат', 'банан', 'мороженое манго', 'греческий йогурт', 'мед', 'лимонный сок']},\n {'name': 'овощной салат с авокадо',\n  'ингредиенты': ['авокадо', 'весенний лук', 'аругула', 'черри помидоры', 'оливковое масло', 'лимонный сок', 'морская соль', 'черный перец']},\n {'name': 'тушеная чечевица с овощами',\n  'ингредиенты': ['чечевица', 'морковь', 'лук репчатый', 'сливочное масло', 'томатная паста', 'куркума', 'вода', 'соль', 'перец']},\n {'name': 'суп из сладкого картофеля',\n  'ингредиенты': ['сладкий картофель', 'лук репчатый', 'чеснок', 'имбирь', 'куриный бульон', 'сливки', 'соль', 'перец', 'зелень']},\n {'name': 'табуле',\n  'ингредиенты

In [373]:
trans_russian_resp2 = translate_to_eng(russian_resp2)

In [551]:
trans_russian_resp2

["[{'name': 'quinoa and vegetable salad',\n  'ingredients': ['quinoa', 'cucumbers', 'cherry tomatoes', 'lemon juice', 'olive oil', 'parsley', 'sea salt', 'black pepper']},\n {'name': 'oatmeal with berries',\n  'ingredients': ['oatmeal', 'almond milk', 'honey', 'blueberry', 'raspberry']},\n {'name': 'green smoothie',\n  'ingredients': ['spinach', 'banana', 'mango ice cream', 'greek yogurt', 'honey', 'lemon juice']},\n {'name': 'vegetable salad with avocado',\n  'ingredients': ['avocado', 'spring onion', 'arugula', 'cherry tomatoes', 'olive oil', 'lemon juice', 'sea salt', 'black pepper']},\n {'name': 'lentil stew with vegetables',\n  'ingredients': ['lentils', 'carrots', 'onions', 'butter', 'tomato paste', 'turmeric', 'water', 'salt', 'pepper']},\n {'name': 'sweet potato soup',\n  'ingredients': ['sweet potatoes', 'onion', 'garlic', 'ginger', 'chicken broth', 'cream', 'salt', 'pepper', 'herbs']},\n {'name': 'tabule',\n  'ingredients': ['bulgur', 'tomatoes', 'cucumbers', 'parsley', 'mint

In [374]:
russian_df2, russian_err2 = create_dataframe_from_responses_2(trans_russian_resp2)

In [375]:
russian_df2

Unnamed: 0,name,ingredients
0,quinoa and vegetable salad,"[quinoa, cucumbers, cherry tomatoes, lemon jui..."
1,oatmeal with berries,"[oatmeal, almond milk, honey, blueberry, raspb..."
2,green smoothie,"[spinach, banana, mango ice cream, greek yogur..."
3,vegetable salad with avocado,"[avocado, spring onion, arugula, cherry tomato..."
4,lentil stew with vegetables,"[lentils, carrots, onions, butter, tomato past..."
5,sweet potato soup,"[sweet potatoes, onion, garlic, ginger, chicke..."
6,tabule,"[bulgur, tomatoes, cucumbers, parsley, mint, l..."
7,chicken thighs with vegetables in the oven,"[chicken thighs, potatoes, carrots, onion, gar..."
8,greek yogurt with honey and nuts,"[greek yogurt, honey, walnuts, raisins]"
9,baked salmon with asparagus,"[salmon, asparagus, lemon, garlic, olive oil, ..."


In [None]:
russian_result_df = pd.concat([russian_df, russian_err_df, russian_df2[:14]], axis=0)

In [480]:
russian_result_df.reset_index(drop=True, inplace=True)

In [481]:
russian_result_df

Unnamed: 0,name,ingredients
0,vegetable salad with quinoa,"[quinoa, cucumbers, cherry tomatoes, avocado, ..."
1,oatmeal with berries,"[oatmeal, milk or water, honey, mixed berries,..."
2,green smoothie,"[spinach, banana, greek yogurt, honey, milk, ice]"
3,stewed broccoli with garlic,"[broccoli, garlic, olive oil, salt, pepper]"
4,herb grilled salmon,"[salmon, lemon juice, olive oil, dill, garlic,..."
...,...,...
295,baked salmon with asparagus,"[salmon, asparagus, lemon, garlic, olive oil, ..."
296,quinoa salad with vegetables,"[quinoa, cucumber, cherry tomatoes, red onion,..."
297,greek yogurt with honey and nuts,"[greek yogurt, honey, walnuts, cinnamon]"
298,vegetable smoothie,"[spinach, kale, cucumber, apple, banana, water..."


In [548]:
float_rows = russian_result_df['ingredients'].apply(lambda x: isinstance(x, float))
rows_with_floats = russian_result_df[float_rows]
print("Rows containing floats:")
print(rows_with_floats)

Rows containing floats:
                  name ingredients
79   vegetable nuggets         NaN
169                NaN         NaN
198                NaN         NaN
199                NaN         NaN


In [552]:
russian_result_df.at[197, 'name']

'apple chips'

In [553]:
correct_name = {
    169: 'arugula and pear salad',
    198: 'vegetable soup with quinoa',
    199: 'baked cod with vegetables'
}

for idx, name in correct_name.items():
    russian_result_df.at[idx, 'name'] = name

In [554]:
correct_ingredients = {
    79: ['carrots', 'cauliflower', 'corn flour', 'breadcrumbs', 'turmeric', 'paprika'],
    169: ['arugula', 'pear', 'blue cheese', 'walnuts', 'balsamic vinegar', 'olive oil'],
    198: ['quinoa', 'potatoes', 'carrots', 'onion', 'garlic', 'vegetable broth', 'spinach', 'lemon juice'],
    199: ['cod fillet', 'broccoli', 'salt', 'pepper', 'lemon', 'garlic', 'olive oil']
}

for idx, ingredients in correct_ingredients.items():
    russian_result_df.at[idx, 'ingredients'] = ingredients

In [555]:
russian_result_df.to_pickle("../dataset/russian_result.pkl")