In [1]:
#Instal libreries with pip install numpy pandas googletrans==4.0.0-rc1

In [31]:
import numpy as np 
import pandas as pd
import ast
from googletrans import Translator

In [20]:
#Importing the dataset of original recipes
recipes_df = pd.read_csv('gz_recipe.csv', index_col=0)

In [27]:
recipes_df.head()

Unnamed: 0,Nome,Categoria,Persone/Pezzi,Ingredienti,Steps
0,Tiramisù,Dolci,8,"[['Mascarpone', '750g'], ['Uova', '260g'], ['S...",Per preparare il tiramisù preparate il caffé c...
1,Cookies,Dolci,12,"[['Farina 00', '195g'], ['Burro', '100g'], ['B...","Per preparare i cookies, assicuratevi che il b..."
2,Pancake allo sciroppo d'acero,Dolci,4,"[['Burro', '25g'], ['Farina 00', '125g'], ['Uo...",Iniziamo la preparazione dei pancake fondendo ...
3,Crema al mascarpone,Dolci,4,"[['Mascarpone', '500g'], ['Zucchero', '125g'],...",Per preparare la crema al mascarpone versate i...
4,Crepe dolci e salate (ricetta base),Dolci,15,"[['Uova', '3'], ['Farina 00', '250g'], ['Latte...",Per preparare le crepe dolci e salate iniziate...


In [23]:
#Removing not needed column
recipes_df.drop(columns=['Link'], inplace=True)

In [29]:
#Changing the column names
new_column_names = {
    'Nome': 'Name',
    'Categoria': 'Category',
    'Persone/Pezzi': 'Servings',
    'Ingredienti': 'Ingredients',
}
recipes_df.rename(columns=new_column_names, inplace=True)

In [25]:
#Droping the rows with missing values
recipes_df = recipes_df.dropna() 
recipes_df.reset_index(drop=True, inplace=True)

In [32]:
#Translating the recipes to English
translator = Translator()

def translate_column(column_data):
    results = []
    for text in column_data:
        try:
            translated_text = translator.translate(str(text), src='it', dest='en').text
            results.append(translated_text)
        except Exception as e:
            print(f"Error translating text: {text}, Error: {e}")
            results.append("")
    return pd.Series(results)

recipes_df["Name"] = translate_column(recipes_df["Name"])


In [None]:
def translate_column_steps(column_data):
    return column_data.apply(lambda text: translator.translate(text, src='it', dest='en').text if pd.notnull(text) else text)

recipes_df["Steps"] = translate_column_steps(recipes_df["Steps"])

In [None]:
#Extracting unique ingredients and categories and translating them to English
recipes_df["Ingredients"] = recipes_df["Ingredients"].apply(lambda x: ast.literal_eval(x) if pd.notnull(x) else [])

unique_ingredients = set()
for ingredient_list in recipes_df["Ingredients"]:
    for item in ingredient_list:
        unique_ingredients.add(item[0])
        
translations = {}
for ingredient in unique_ingredients:
    try:
        translations[ingredient] = translator.translate(ingredient, src='it', dest='en').text
    except Exception as e:
        print(f"Error translating ingredient: {ingredient}, Error: {e}")
        translations[ingredient] = ingredient

def translate_ingredients(ingredient_list):
    return [[translations[item[0]], item[1]] for item in ingredient_list]

recipes_df["Ingredients"] = recipes_df["Ingredients"].apply(translate_ingredients)

unique_categories = set(recipes_df["Categories"])

category_translations = {}
for category in unique_categories:
    try:
        category_translations[category] = translator.translate(category, src='it', dest='en').text
    except Exception as e:
        print(f"Error translating category: {category}, Error: {e}")
        category_translations[category] = category

recipes_df["Categories"] = recipes_df["Categories"].apply(lambda x: category_translations[x])


In [22]:
#Droping the rows with missing values after translation
recipes_df = recipes_df.dropna() 
recipes_df.reset_index(drop=True, inplace=True)

In [None]:
# List of ingredients our YOLO model can detect
ingredient_list = [
    'apple', 'banana', 'beetroot', 'bell pepper', 'cabbage', 'capsicum', 'carrot', 'cauliflower', 
    'chilli pepper', 'corn', 'cucumber', 'eggplant', 'garlic', 'ginger', 'lemon', 'lettuce', 
    'onion', 'orange', 'pear', 'peas', 'pineapple', 'potato', 'spinach', 'sweetpotato', 'tomato', 
    'butter', 'eggs', 'flour', 'milk', 'pasta', 'sugar'
]

# Function to remove ingredients containing 'powder' or 'salt'
def remove_and_replace_ingredients(ingredients):
    cleaned_ingredients = []
    for ingredient in ingredients:
        ingredient_name = ingredient[0].lower()
        if 'yolk' in ingredient_name:
            ingredient_name = ingredient_name.replace('yolk', 'egg')
        if ('powder' not in ingredient_name and 'salt' not in ingredient_name and 'pepper' not in ingredient_name and
            'nutmeg' not in ingredient_name and 'parmigiano' not in ingredient_name and 'oil' not in ingredient_name):
            cleaned_ingredients.append([ingredient_name, ingredient[1]])
    return cleaned_ingredients

# Apply the function to the 'Ingredients' column
recipes_df['Ingredients'] = recipes_df['Ingredients'].apply(eval)  # Convert string representation of list to actual list
recipes_df['Ingredients'] = recipes_df['Ingredients'].apply(remove_and_replace_ingredients)

# Function to check if an ingredient matches any in the list
def matches_ingredient(ingredient, ingredient_list):
    for item in ingredient_list:
        if item in ingredient:
            return True
    return False

# Function to calculate the percentage of matching ingredients
def match_percentage(ingredients, ingredient_list):
    match_count = sum(matches_ingredient(ingredient[0].lower(), ingredient_list) for ingredient in ingredients)
    return match_count / len(ingredients)

# Filtered recipes where 80% or more of the ingredients match the list of ingredients
df_filtered = recipes_df[recipes_df['Ingredients'].apply(lambda x: match_percentage(x, ingredient_list) >= 0.8)]

df_filtered.head()

In [None]:
df_filtered.to_csv('filtered.csv')