# Download Data

In [4]:
import requests
import pandas as pd
from io import BytesIO

def download_csv(csv_url):
    response = requests.get(csv_url)
    response.raise_for_status()

    # Qui puoi modificare 'sep' o aggiungere altri parametri se necessario
    try:
        df = pd.read_csv(BytesIO(response.content), sep=',')
    except pd.errors.ParserError:
        df = pd.read_csv(BytesIO(response.content), sep=';', on_bad_lines='skip')

    return df

In [None]:
recipes_df = download_csv("https://github.com/GiovTemp/SustainaMeal_Case_Study/raw/main/data/valid_recipes_dataset.csv")

In [None]:
recipes_df

Unnamed: 0.1,title,sustainability_score,sustainability_label,Unnamed: 0,recipe_id,description,author_id,duration,directions,ingredients,...,sugars [g],protein [g],direction_size,ingredients_sizes,who_score,fsa_score,nutri_score,normalization_comment,ingredient_food_kg_urls,ingredient_food_kg_names
0,Jack O Lantern 'o Fragrance,0.002409,0,495656,466123,I suppose that this isn't really a recipe as m...,169430,30.0,['When you cut the lid out of the pumpkin do ...,"['pumpkin liking', 'candle pumpkin', 'pumpkin ...",...,0.3,0.6,7,4,0.302381,0.250,0.25,,,
1,Boiled Radishes,0.002502,0,294268,179840,These taste a lot like new potatoes. The boil...,254469,22.0,"['Trim ends and bad spots off Radishes.', 'Boi...","['radishes size bag bunch', 'water radishes']",...,2.4,0.8,2,2,0.293040,1.000,0.75,,['http://idea.rpi.edu/heals/kb/ingredientname/...,"['applesauce', 'garlic clove', 'ginger', 'mola..."
2,Horseradish Applesauce,0.002590,0,364381,174389,"I'm not sure this even qualifies as a recipe, ...",226918,22.0,"['Blend applesauce and horseradish. ', 'Refrig...","['applesauce', 'horseradish']",...,0.6,0.3,2,2,0.313757,0.875,0.50,,['http://idea.rpi.edu/heals/kb/ingredientname/...,"['all - purpose flour', 'butter', 'butternut s..."
3,Survival Necklace,0.002623,0,43445,95937,Fun for kids to make and a great way to keep t...,25792,10.0,['Other supplies: 18 inches string or yarn or ...,['Fruit Loops cereal cup apple jacks kind hole...,...,1.2,8.8,7,2,0.319215,0.750,0.25,,['http://idea.rpi.edu/heals/kb/ingredientname/...,"['Old Bay Seasoning', 'Worcestershire sauce', ..."
4,Apple Cider Reduction,0.002623,0,59722,521756,I saw a post about this on Facebook and gave i...,171084,155.0,"['In 6-8 quart stock pan, measure out 2 cups o...","['gallon apple cider fresh', 'gallon apple cid...",...,0.0,0.0,10,2,0.214286,0.750,0.75,,['http://idea.rpi.edu/heals/kb/ingredientname/...,"['bean sprouts', 'chicken thighs', 'fresh ging..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100865,Curried Lamb on Rice,0.951024,2,103145,373244,"With modification this recipe is from ""The Dia...",133174,65.0,"['Over medium heat, melt the butter in a large...","['lamb', 'medium onion', 'cloves', 'butter', '...",...,3.4,28.7,5,13,0.223549,0.625,0.25,,['http://idea.rpi.edu/heals/kb/ingredientname/...,"['Dijon mustard', 'Polish sausage', 'apricot p..."
100866,Rich Lamb Curry,0.960504,2,442694,268247,The recipe isn't as time consuming as it looks...,491437,110.0,"['Heat 1 tbsp olive oil in dutch oven. ', 'Co...","['oil', 'lamb', 'flour', 'salt pepper', 'onion...",...,30.4,36.1,9,21,0.153328,0.000,0.00,,,
100867,"Middle Eastern Slow-Cooked Stew With Lamb, Chi...",0.968458,2,359960,144850,From Cooking Light. Per 3/4 c. serving: 310 ca...,37779,112.0,['Let the oil get heating in a large pot over ...,"['oil', 'lamb', 'onions rings', 'water', 'clov...",...,20.1,20.8,12,19,0.205566,0.250,0.25,,['http://idea.rpi.edu/heals/kb/ingredientname/...,"['anchovies', 'celery ribs', 'diced tomatoes',..."
100868,Five Meat Chili Con Carne With Beans,0.971454,2,37637,13568,yum,20571,210.0,"['In a large pot brown meat in stages, about a...","['beef', 'pork', 'lamb', 'sausage', 'sausage',...",...,6.6,14.5,7,15,0.197568,0.250,0.25,,['http://idea.rpi.edu/heals/kb/ingredientname/...,"[""French\\'s French fried onions"", 'crabmeat',..."


In [None]:
# Calcoliamo il valore medio della colonna 'score'.
mean_score = recipes_df['who_score'].mean()

# Contiamo le occorrenze dove 'score' è minore di determinati threshold.
occurrences = {f'who_score<{i/10}': (recipes_df['who_score'] < i/10).sum() for i in range(1, 11)}

mean_score, occurrences

(0.21546167007856573,
 {'who_score<0.1': 4916,
  'who_score<0.2': 35460,
  'who_score<0.3': 93832,
  'who_score<0.4': 100783,
  'who_score<0.5': 100870,
  'who_score<0.6': 100870,
  'who_score<0.7': 100870,
  'who_score<0.8': 100870,
  'who_score<0.9': 100870,
  'who_score<1.0': 100870})

In [None]:
def categorize_healthiness(score):
    if score < 0.15:
        return '2'
    elif 0.15 <= score < 0.25:
        return '1'
    else:
        return '0'

recipes_df['healthiness_label'] = recipes_df['who_score'].apply(categorize_healthiness)


In [None]:
csv_file_path = 'final_recipes_healthiness.csv'
recipes_df.to_csv(csv_file_path, index=False)  # Imposta index=False per non includere l'indice del DataFrame nel file CSV

csv_file_path

'final_recipes_healthiness.csv'