In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer
import scipy.stats as stats

# Lista dei metodi di cottura
TECHNIQUES_LIST = [
    'bake', 'barbecue', 'blanch', 'blend', 'boil', 'braise', 'brine', 'broil',
    'caramelize', 'combine', 'crock pot', 'crush', 'deglaze', 'devein', 'dice',
    'distill', 'drain', 'emulsify', 'ferment', 'freeze', 'fry', 'grate', 'griddle',
    'grill', 'knead', 'leaven', 'marinate', 'mash', 'melt', 'microwave', 'parboil',
    'pickle', 'poach', 'pour', 'pressure cook', 'puree', 'refrigerate', 'roast', 'saute',
    'scald', 'scramble', 'shred', 'simmer', 'skillet', 'slow cook', 'smoke', 'smooth',
    'soak', 'sous-vide', 'steam', 'stew', 'strain', 'tenderize', 'thicken', 'toast',
    'toss', 'whip', 'whisk'
]

# Step 1: Carica il CSV in un DataFrame
data = pd.read_csv('dataset/RAW_merged_top_smallest.csv')

# Step 2: Estrazione e preprocessamento degli ingredienti e dei metodi di cottura
data['ingredients'] = data['ingredients'].apply(eval)  # Converti la stringa in lista
data['techniques'] = data['techniques'].apply(eval)  # Converti la stringa in lista

# Binarizza gli ingredienti e i metodi di cottura
mlb_ingredients = MultiLabelBinarizer()
ingredients_encoded = mlb_ingredients.fit_transform(data['ingredients'])

mlb_techniques = MultiLabelBinarizer(classes=TECHNIQUES_LIST)
techniques_encoded = mlb_techniques.fit_transform(data['techniques'])

# Step 3: Calcolo della Correlazione
correlation_matrix = np.zeros((ingredients_encoded.shape[1], techniques_encoded.shape[1]))

for i in range(ingredients_encoded.shape[1]):
    for j in range(techniques_encoded.shape[1]):
        correlation_matrix[i, j] = stats.pearsonr(ingredients_encoded[:, i], techniques_encoded[:, j])[0]

# Step 4: Predizione dei Metodi di Cottura
def find_best_techniques(ingredients, top_n=3):
    ingredient_indices = mlb_ingredients.transform([ingredients])[0]
    average_correlations = correlation_matrix[ingredient_indices == 1].mean(axis=0)
    best_techniques_indices = average_correlations.argsort()[-top_n:][::-1]
    best_techniques = mlb_techniques.classes_[best_techniques_indices]
    return best_techniques

# Esempio di utilizzo
new_ingredients = ['winter squash', 'mexican seasoning', 'honey']
best_techniques = find_best_techniques(new_ingredients)
print(best_techniques)

In [1]:
import csv
import ast
import numpy as np

TECHNIQUES_LIST = [
    'bake', 'barbecue', 'blanch', 'blend', 'boil', 'braise', 'brine', 'broil',
    'caramelize', 'combine', 'crock pot', 'crush', 'deglaze', 'devein', 'dice',
    'distill', 'drain', 'emulsify', 'ferment', 'freeze', 'fry', 'grate', 'griddle',
    'grill', 'knead', 'leaven', 'marinate', 'mash', 'melt', 'microwave', 'parboil',
    'pickle', 'poach', 'pour', 'pressure cook', 'puree', 'refrigerate', 'roast', 'saute',
    'scald', 'scramble', 'shred', 'simmer', 'skillet', 'slow cook', 'smoke', 'smooth',
    'soak', 'sous-vide', 'steam', 'stew', 'strain', 'tenderize', 'thicken', 'toast',
    'toss', 'whip', 'whisk'
]

def load_data(csv_file):
    data = []
    with open(csv_file, 'r') as f:
        reader = csv.DictReader(f)
        for row in reader:
            techniques = ast.literal_eval(row['techniques'])
            ingredients = ast.literal_eval(row['ingredients'])
            data.append((techniques, ingredients))
    return data

def calculate_correlation(ingredients, data):
    ingredient_indices = {ingredient: [] for ingredient in ingredients}
    
    for techniques, ingr_list in data:
        for ingredient in ingredients:
            if ingredient in ingr_list:
                ingredient_indices[ingredient].append(techniques)
    
    # Calculate the average correlation for each technique
    correlations = np.zeros(len(TECHNIQUES_LIST))
    for ingredient in ingredient_indices:
        if ingredient_indices[ingredient]:
            correlations += np.mean(ingredient_indices[ingredient], axis=0)
    
    # Normalize by the number of ingredients found
    correlations /= len(ingredients)
    
    return correlations

def get_top_techniques(correlations, top_n=5):
    technique_correlations = list(zip(TECHNIQUES_LIST, correlations))
    technique_correlations.sort(key=lambda x: x[1], reverse=True)
    return technique_correlations[:top_n]

# Example usage
csv_file = 'dataset/RAW_merged_top_smallest.csv'
data = load_data(csv_file)

ingredients = "pasta, tomato, fish".split(", ")
correlations = calculate_correlation(ingredients, data)
top_techniques = get_top_techniques(correlations)

print("Top techniques:")
for technique, score in top_techniques:
    print(f"{technique}: {score}")

ValueError: operands could not be broadcast together with shapes (58,) (45,) (58,) 

In [2]:
import pandas as pd

# Supponiamo che il file CSV sia chiamato 'recipes.csv'
file_path = 'dataset/RAW_merged_top_smallest.csv'

# Leggi i dati dal CSV
df = pd.read_csv(file_path)

# Lista estesa di parole chiave per i metodi di cottura
cooking_keywords = ["bake", "boil", "braise", "broil", "deep fry", "grill", "pan fry", "poach", "pressure cook", "roast", "sauté", "simmer", "steam", "stew", "blanch", "caramelize", "clarify", "deglaze", "flambé", "fold", "glaze", "marinate", "pickle", "reduce", "sear", "smoke", "sous-vide", "temper", "whip"]

# Funzione ottimizzata per estrarre i metodi di cottura
def extract_cooking_methods(steps):
    methods = set()
    steps_list = eval(steps)
    
    for step in steps_list:
        for keyword in cooking_keywords:
            if keyword in step:
                methods.add(keyword)
    return list(methods)

# Applica la funzione per creare una nuova colonna
df['cooking_methods'] = df['steps'].apply(extract_cooking_methods)

# Salva il DataFrame in un nuovo CSV
output_file_path = 'updated_recipes.csv'
df.to_csv(output_file_path, index=False)

print(f'Il nuovo CSV è stato salvato come {output_file_path}')

Il nuovo CSV è stato salvato come updated_recipes.csv
