In [1]:
import pandas as pd
import ast

In [2]:
df = pd.read_csv("cleaned_recipe_dataset.csv")

In [3]:
df['tags'][0]

"['60-minutes-or-less', 'time-to-make', 'course', 'main-ingredient', 'cuisine', 'preparation', 'occasion', 'north-american', 'side-dishes', 'vegetables', 'mexican', 'easy', 'fall', 'holiday-event', 'vegetarian', 'winter', 'dietary', 'christmas', 'seasonal', 'squash']"

In [4]:
# cuisine, diet_type and time_to_make mappings defined
cuisine_mapping = {
    'asian': 'Asian',
    'cambodian': 'Asian',
    'beijing': 'Chinese',
    'pakistani': 'Indian',
    'hungarian': 'European',
    'swedish': 'European',
    'austrian': 'European',
    'english': 'European',
    'spanish': 'European',
    'moroccan': 'Middle Eastern',
    'lebanese': 'Middle Eastern',
    'palestinian': 'Middle Eastern',
    'caribbean': 'Caribbean/Latin American',
    'creole': 'Caribbean/Latin American',
    'south-american': 'Caribbean/Latin American',
    'congolese': 'Caribbean/Latin American',
    'indonesian': 'Asian',
    'cantonese': 'Chinese',
    'cuban': 'Caribbean/Latin American',
    'peruvian': 'Caribbean/Latin American',
    'norwegian': 'European',
    'portuguese': 'European',
    'czech': 'European',
    'venezuelan': 'Caribbean/Latin American',
    'iranian-persian': 'Middle Eastern',
    'german': 'European',
    'laotian': 'Asian',
    'scandinavian': 'European',
    'belgian': 'European',
    'italian': 'European',
    'french': 'European',
    'thai': 'Asian',
    'greek': 'European',
    'new-zealand': 'Australian',
    'southern-united-states': 'American',
    'angolan': 'African',
    'namibian': 'African',
    'american': 'American',
    'southern-united-states': 'American',
    'southwestern-united-states':'American',
    'indian': 'Indian',
    'african': 'African',
    'amish-mennonite': 'American',
    'argentine': 'Caribbean/Latin American',
    'australian': 'Australian',
    'brazilian': 'Caribbean/Latin American',
    'british-columbian': 'American',
    'cajun': 'American',
    'californian': 'American',
    'canadian': 'American',
    'central-american': 'Caribbean/Latin American',
    'chinese': 'Chinese',
    'colombian': 'Caribbean/Latin American',
    'costa-rican': 'Caribbean/Latin American',
    'danish': 'European',
    'dutch': 'European',
    'ecuadorean': 'Caribbean/Latin American',
    'egyptian': 'Middle Eastern',
    'ethiopian': 'African',
    'filipino': 'Asian',
    'finnish': 'European',
    'georgian': 'European',
    'guatemalan': 'Caribbean/Latin American',
    'hawaiian': 'American',
    'honduran': 'Caribbean/Latin American',
    'icelandic': 'European',
    'iraqi': 'Middle Eastern',
    'irish': 'European',
    'japanese': 'Asian',
    'jewish-ashkenazi': 'European',
    'jewish-sephardi': 'Middle Eastern',
    'korean': 'Asian',
    'malaysian': 'Asian',
    'mexican': 'Caribbean/Latin American',
    'middle-eastern': 'Middle Eastern',
    'mongolian': 'Asian',
    'nepalese': 'Indian',
    'nigerian': 'African',
    'north-american': 'American',
    'northeastern-united-states': 'American',
    'ontario': 'Canadian',
    'pacific-northwest': 'American',
    'polish': 'European',
    'polynesian': 'Oceanian',
    'puerto-rican': 'Caribbean/Latin American',
    'european': 'European',
    'micro-melanesia': 'Oceanian',
    'south-west-pacific': 'Oceanian',
    'midwestern': 'American',
    'native-american': 'American',
    'pennsylvania-dutch': 'American',
    'quebec': 'Canadian',
    'russian': 'Russian',
    'saudi-arabian': 'Middle Eastern',
    'scottish': 'European',
    'somalian': 'African',
    'south-african': 'African',
    'sudanese': 'African',
    'tex-mex': 'Caribbean/Latin American',
    'turkish': 'Middle Eastern',
    'vietnamese': 'Asian'
}

diet_type_mapping = {
    'vegan': 'Vegan',
    'vegetarian': 'Vegetarian',
    'low-carb': 'Low Carb',
    'low-sodium': 'Low Sodium',
    'low-calorie': 'Low Calorie',
    'gluten-free': 'Gluten Free',
    'dairy-free': 'Dairy Free',
    'healthy-2': 'Healthy',
    'high-calcium': 'High Calcium',
    'high-fiber': 'High Fiber',
    'high-protein': 'High Protein',
    'low-fat': 'Low Fat',
    'low-cholesterol': 'Low Cholesterol',
    'diabetic': 'Diabetic Friendly',
    'gluten-free': 'Gluten Free',
    'healthy': 'Healthy',
    'healthy-2': 'Healthy',
    'high-calcium': 'High Calcium',
    'high-fiber': 'High Fiber',
    'high-in-something': 'Special Nutrient Focus',
    'high-protein': 'High Protein',
    'kosher': 'Kosher',
    'lactose': 'Lactose Free',
    'low-calorie': 'Low Calorie',
    'low-carb': 'Low Carb',
    'low-cholesterol': 'Low Cholesterol',
    'low-fat': 'Low Fat',
    'low-in-something': 'Special Nutrient Focus',
    'low-protein': 'Low Protein',
    'low-saturated-fat': 'Low Saturated Fat',
    'low-sodium': 'Low Sodium',
    'vegan': 'Vegan',
    'vegetarian': 'Vegetarian'
    
}

time_to_make_mapping = {
    '1-day-or-more': 'More than 24 hours',
    '15-minutes-or-less': '15 minutes or less',
    '30-minutes-or-less': '30 minutes or less',
    '4-hours-or-less': '4 hours or less', 
    '60-minutes-or-less': '60 minutes or less'
}

In [5]:
# function to map the words with their tags 
def map_words_for_tags(word_list, mapping):
    return [mapping.get(word, word) for word in word_list]

In [6]:
# turn the column into a list 
df['tags'] = df['tags'].apply(eval)  
df['tags']

0         [60-minutes-or-less, time-to-make, course, mai...
1         [30-minutes-or-less, time-to-make, course, mai...
2         [time-to-make, course, preparation, main-dish,...
3         [60-minutes-or-less, time-to-make, course, mai...
4         [weeknight, time-to-make, course, main-ingredi...
                                ...                        
231631    [ham, 60-minutes-or-less, time-to-make, course...
231632    [15-minutes-or-less, time-to-make, course, pre...
231633    [60-minutes-or-less, time-to-make, course, mai...
231634    [30-minutes-or-less, time-to-make, course, pre...
231635    [30-minutes-or-less, time-to-make, course, pre...
Name: tags, Length: 231636, dtype: object

In [7]:
# delete words not in the mappings dictionaries 
df['tags'] = df['tags'].apply(lambda tags: [word for word in tags if any(word in mapping for mapping in (cuisine_mapping, diet_type_mapping, time_to_make_mapping))])

In [8]:
# replace with the value from the respective mapping dictionaries to make them uniform
df['tags'] = df['tags'].apply(lambda tags: map_words_for_tags(tags, cuisine_mapping))
df['tags'] = df['tags'].apply(lambda tags: map_words_for_tags(tags, diet_type_mapping))
df['tags'] = df['tags'].apply(lambda tags: map_words_for_tags(tags, time_to_make_mapping))

In [9]:
df['tags'][0]

['60 minutes or less', 'American', 'Caribbean/Latin American', 'Vegetarian']

In [10]:
# create new columns for each of the cuisine, diet_type, and time_to_make tags
new_df = df.copy()
new_df['cuisine'] = new_df['tags'].apply(lambda tags: [tag for tag in tags if tag in cuisine_mapping.values()])
new_df['diet_type'] = new_df['tags'].apply(lambda tags: [tag for tag in tags if tag in diet_type_mapping.values()])
new_df['time_to_make'] = new_df['tags'].apply(lambda tags: [tag for tag in tags if tag in time_to_make_mapping.values()])

In [11]:
new_df.drop(columns=['tags'], inplace=True)
new_df

Unnamed: 0,name,recipe_id,minutes,nutrition,n_steps,steps,ingredients,n_ingredients,rating,cuisine,diet_type,time_to_make
0,arriba baked winter squash mexican style,137739,55,"[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...","['winter squash', 'mexican seasoning', 'mixed ...",7,5,"[American, Caribbean/Latin American]",[Vegetarian],[60 minutes or less]
1,a bit different breakfast pizza,31490,30,"[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"['preheat oven to 425 degrees f', 'press dough...","['prepared pizza crust', 'sausage patty', 'egg...",6,0,"[American, American, American]",[],[30 minutes or less]
2,all in the kitchen chili,112140,130,"[269.8, 22.0, 32.0, 48.0, 39.0, 27.0, 5.0]",6,"['brown ground beef in large pot', 'add choppe...","['ground beef', 'yellow onions', 'diced tomato...",13,4,[],[],[4 hours or less]
3,alouette potatoes,59389,45,"[368.1, 17.0, 10.0, 2.0, 14.0, 8.0, 20.0]",11,['place potatoes in a large pot of lightly sal...,"['spreadable cheese with garlic and herbs', 'n...",11,4,[],[],[60 minutes or less]
4,amish tomato ketchup for canning,44061,190,"[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",5,['mix all ingredients& boil for 2 1 / 2 hours ...,"['tomato juice', 'apple cider vinegar', 'sugar...",8,5,"[American, American, American, American]",[Vegetarian],[4 hours or less]
...,...,...,...,...,...,...,...,...,...,...,...,...
231631,zydeco soup,486161,60,"[415.2, 26.0, 34.0, 26.0, 44.0, 21.0, 15.0]",7,"['heat oil in a 4-quart dutch oven', 'add cele...","['celery', 'onion', 'green sweet pepper', 'gar...",22,5,"[American, American, American, American, Carib...",[],[60 minutes or less]
231632,zydeco spice mix,493372,5,"[14.8, 0.0, 2.0, 58.0, 1.0, 0.0, 1.0]",1,['mix all ingredients together thoroughly'],"['paprika', 'salt', 'garlic powder', 'onion po...",13,5,[],"[Vegan, Vegetarian]",[15 minutes or less]
231633,zydeco ya ya deviled eggs,308080,40,"[59.2, 6.0, 2.0, 3.0, 6.0, 5.0, 0.0]",7,"['in a bowl , combine the mashed yolks and may...","['hard-cooked eggs', 'mayonnaise', 'dijon must...",8,5,[],[],[60 minutes or less]
231634,cookies by design cookies on a stick,298512,29,"[188.0, 11.0, 57.0, 11.0, 7.0, 21.0, 9.0]",9,['place melted butter in a large mixing bowl a...,"['butter', 'eagle brand condensed milk', 'ligh...",10,1,[],"[High Calcium, Special Nutrient Focus]",[30 minutes or less]


In [12]:
# categorize minutes 
new_df2 = new_df.copy()
less_min_limit = new_df2['minutes'].quantile(1/3)  
more_min_limit = new_df2['minutes'].quantile(2/3)

In [13]:
def minutes_categories(mins):
    if mins <= less_min_limit:
        return 'less'
    elif mins <= more_min_limit:
        return 'medium'
    else:
        return 'more'

new_df2['minutes_category'] = new_df2['minutes'].apply(minutes_categories)

In [14]:
new_df2

Unnamed: 0,name,recipe_id,minutes,nutrition,n_steps,steps,ingredients,n_ingredients,rating,cuisine,diet_type,time_to_make,minutes_category
0,arriba baked winter squash mexican style,137739,55,"[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...","['winter squash', 'mexican seasoning', 'mixed ...",7,5,"[American, Caribbean/Latin American]",[Vegetarian],[60 minutes or less],medium
1,a bit different breakfast pizza,31490,30,"[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"['preheat oven to 425 degrees f', 'press dough...","['prepared pizza crust', 'sausage patty', 'egg...",6,0,"[American, American, American]",[],[30 minutes or less],medium
2,all in the kitchen chili,112140,130,"[269.8, 22.0, 32.0, 48.0, 39.0, 27.0, 5.0]",6,"['brown ground beef in large pot', 'add choppe...","['ground beef', 'yellow onions', 'diced tomato...",13,4,[],[],[4 hours or less],more
3,alouette potatoes,59389,45,"[368.1, 17.0, 10.0, 2.0, 14.0, 8.0, 20.0]",11,['place potatoes in a large pot of lightly sal...,"['spreadable cheese with garlic and herbs', 'n...",11,4,[],[],[60 minutes or less],medium
4,amish tomato ketchup for canning,44061,190,"[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",5,['mix all ingredients& boil for 2 1 / 2 hours ...,"['tomato juice', 'apple cider vinegar', 'sugar...",8,5,"[American, American, American, American]",[Vegetarian],[4 hours or less],more
...,...,...,...,...,...,...,...,...,...,...,...,...,...
231631,zydeco soup,486161,60,"[415.2, 26.0, 34.0, 26.0, 44.0, 21.0, 15.0]",7,"['heat oil in a 4-quart dutch oven', 'add cele...","['celery', 'onion', 'green sweet pepper', 'gar...",22,5,"[American, American, American, American, Carib...",[],[60 minutes or less],more
231632,zydeco spice mix,493372,5,"[14.8, 0.0, 2.0, 58.0, 1.0, 0.0, 1.0]",1,['mix all ingredients together thoroughly'],"['paprika', 'salt', 'garlic powder', 'onion po...",13,5,[],"[Vegan, Vegetarian]",[15 minutes or less],less
231633,zydeco ya ya deviled eggs,308080,40,"[59.2, 6.0, 2.0, 3.0, 6.0, 5.0, 0.0]",7,"['in a bowl , combine the mashed yolks and may...","['hard-cooked eggs', 'mayonnaise', 'dijon must...",8,5,[],[],[60 minutes or less],medium
231634,cookies by design cookies on a stick,298512,29,"[188.0, 11.0, 57.0, 11.0, 7.0, 21.0, 9.0]",9,['place melted butter in a large mixing bowl a...,"['butter', 'eagle brand condensed milk', 'ligh...",10,1,[],"[High Calcium, Special Nutrient Focus]",[30 minutes or less],medium


In [15]:
# outliers are anything that is more than 24 hours
num_rows = len(new_df2[new_df2['minutes'] > 1440])
print(num_rows)

2000


In [16]:
# remove outliers 
new_df2 = new_df2[new_df2['minutes'] <= 1440]

new_df2.reset_index(drop=True, inplace=True)
new_df2

Unnamed: 0,name,recipe_id,minutes,nutrition,n_steps,steps,ingredients,n_ingredients,rating,cuisine,diet_type,time_to_make,minutes_category
0,arriba baked winter squash mexican style,137739,55,"[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...","['winter squash', 'mexican seasoning', 'mixed ...",7,5,"[American, Caribbean/Latin American]",[Vegetarian],[60 minutes or less],medium
1,a bit different breakfast pizza,31490,30,"[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"['preheat oven to 425 degrees f', 'press dough...","['prepared pizza crust', 'sausage patty', 'egg...",6,0,"[American, American, American]",[],[30 minutes or less],medium
2,all in the kitchen chili,112140,130,"[269.8, 22.0, 32.0, 48.0, 39.0, 27.0, 5.0]",6,"['brown ground beef in large pot', 'add choppe...","['ground beef', 'yellow onions', 'diced tomato...",13,4,[],[],[4 hours or less],more
3,alouette potatoes,59389,45,"[368.1, 17.0, 10.0, 2.0, 14.0, 8.0, 20.0]",11,['place potatoes in a large pot of lightly sal...,"['spreadable cheese with garlic and herbs', 'n...",11,4,[],[],[60 minutes or less],medium
4,amish tomato ketchup for canning,44061,190,"[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",5,['mix all ingredients& boil for 2 1 / 2 hours ...,"['tomato juice', 'apple cider vinegar', 'sugar...",8,5,"[American, American, American, American]",[Vegetarian],[4 hours or less],more
...,...,...,...,...,...,...,...,...,...,...,...,...,...
229631,zydeco soup,486161,60,"[415.2, 26.0, 34.0, 26.0, 44.0, 21.0, 15.0]",7,"['heat oil in a 4-quart dutch oven', 'add cele...","['celery', 'onion', 'green sweet pepper', 'gar...",22,5,"[American, American, American, American, Carib...",[],[60 minutes or less],more
229632,zydeco spice mix,493372,5,"[14.8, 0.0, 2.0, 58.0, 1.0, 0.0, 1.0]",1,['mix all ingredients together thoroughly'],"['paprika', 'salt', 'garlic powder', 'onion po...",13,5,[],"[Vegan, Vegetarian]",[15 minutes or less],less
229633,zydeco ya ya deviled eggs,308080,40,"[59.2, 6.0, 2.0, 3.0, 6.0, 5.0, 0.0]",7,"['in a bowl , combine the mashed yolks and may...","['hard-cooked eggs', 'mayonnaise', 'dijon must...",8,5,[],[],[60 minutes or less],medium
229634,cookies by design cookies on a stick,298512,29,"[188.0, 11.0, 57.0, 11.0, 7.0, 21.0, 9.0]",9,['place melted butter in a large mixing bowl a...,"['butter', 'eagle brand condensed milk', 'ligh...",10,1,[],"[High Calcium, Special Nutrient Focus]",[30 minutes or less],medium


In [17]:
# categorize n_steps
less_step_limit = new_df2['n_steps'].quantile(1/3)  
more_step_limit = new_df2['n_steps'].quantile(2/3)

In [18]:
def steps_categories(steps):
    if steps <= less_step_limit:
        return 'less'
    elif steps <= more_step_limit:
        return 'medium'
    else:
        return 'more'

new_df2['steps_category'] = new_df2['n_steps'].apply(steps_categories)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df2['steps_category'] = new_df2['n_steps'].apply(steps_categories)


In [19]:
new_df2

Unnamed: 0,name,recipe_id,minutes,nutrition,n_steps,steps,ingredients,n_ingredients,rating,cuisine,diet_type,time_to_make,minutes_category,steps_category
0,arriba baked winter squash mexican style,137739,55,"[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...","['winter squash', 'mexican seasoning', 'mixed ...",7,5,"[American, Caribbean/Latin American]",[Vegetarian],[60 minutes or less],medium,medium
1,a bit different breakfast pizza,31490,30,"[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"['preheat oven to 425 degrees f', 'press dough...","['prepared pizza crust', 'sausage patty', 'egg...",6,0,"[American, American, American]",[],[30 minutes or less],medium,medium
2,all in the kitchen chili,112140,130,"[269.8, 22.0, 32.0, 48.0, 39.0, 27.0, 5.0]",6,"['brown ground beef in large pot', 'add choppe...","['ground beef', 'yellow onions', 'diced tomato...",13,4,[],[],[4 hours or less],more,less
3,alouette potatoes,59389,45,"[368.1, 17.0, 10.0, 2.0, 14.0, 8.0, 20.0]",11,['place potatoes in a large pot of lightly sal...,"['spreadable cheese with garlic and herbs', 'n...",11,4,[],[],[60 minutes or less],medium,medium
4,amish tomato ketchup for canning,44061,190,"[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",5,['mix all ingredients& boil for 2 1 / 2 hours ...,"['tomato juice', 'apple cider vinegar', 'sugar...",8,5,"[American, American, American, American]",[Vegetarian],[4 hours or less],more,less
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229631,zydeco soup,486161,60,"[415.2, 26.0, 34.0, 26.0, 44.0, 21.0, 15.0]",7,"['heat oil in a 4-quart dutch oven', 'add cele...","['celery', 'onion', 'green sweet pepper', 'gar...",22,5,"[American, American, American, American, Carib...",[],[60 minutes or less],more,less
229632,zydeco spice mix,493372,5,"[14.8, 0.0, 2.0, 58.0, 1.0, 0.0, 1.0]",1,['mix all ingredients together thoroughly'],"['paprika', 'salt', 'garlic powder', 'onion po...",13,5,[],"[Vegan, Vegetarian]",[15 minutes or less],less,less
229633,zydeco ya ya deviled eggs,308080,40,"[59.2, 6.0, 2.0, 3.0, 6.0, 5.0, 0.0]",7,"['in a bowl , combine the mashed yolks and may...","['hard-cooked eggs', 'mayonnaise', 'dijon must...",8,5,[],[],[60 minutes or less],medium,less
229634,cookies by design cookies on a stick,298512,29,"[188.0, 11.0, 57.0, 11.0, 7.0, 21.0, 9.0]",9,['place melted butter in a large mixing bowl a...,"['butter', 'eagle brand condensed milk', 'ligh...",10,1,[],"[High Calcium, Special Nutrient Focus]",[30 minutes or less],medium,medium


In [20]:
empty_list_count = new_df2['cuisine'].apply(len).eq(0).sum()
print(empty_list_count)

139463


In [21]:
new_df2['nutrition'].dtype

dtype('O')

In [22]:
new_df2['nutrition'] = new_df2['nutrition'].apply(ast.literal_eval)

new_df2['calories'] = [nut[0] if isinstance(nut, list) and len(nut) > 0 else np.nan for nut in new_df2['nutrition']]
new_df2['total fat'] = [nut[1] if isinstance(nut, list) and len(nut) > 1 else np.nan for nut in new_df2['nutrition']]
new_df2['sugar'] = [nut[2] if isinstance(nut, list) and len(nut) > 2 else np.nan for nut in new_df2['nutrition']]
new_df2['sodium'] = [nut[3] if isinstance(nut, list) and len(nut) > 3 else np.nan for nut in new_df2['nutrition']]
new_df2['protein'] = [nut[4] if isinstance(nut, list) and len(nut) > 4 else np.nan for nut in new_df2['nutrition']]
new_df2['saturated fat'] = [nut[5] if isinstance(nut, list) and len(nut) > 5 else np.nan for nut in new_df2['nutrition']]
new_df2['carbohydrates'] = [nut[6] if isinstance(nut, list) and len(nut) > 6 else np.nan for nut in new_df2['nutrition']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df2['nutrition'] = new_df2['nutrition'].apply(ast.literal_eval)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df2['calories'] = [nut[0] if isinstance(nut, list) and len(nut) > 0 else np.nan for nut in new_df2['nutrition']]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df2['total fat']

In [23]:
new_df2

Unnamed: 0,name,recipe_id,minutes,nutrition,n_steps,steps,ingredients,n_ingredients,rating,cuisine,...,time_to_make,minutes_category,steps_category,calories,total fat,sugar,sodium,protein,saturated fat,carbohydrates
0,arriba baked winter squash mexican style,137739,55,"[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...","['winter squash', 'mexican seasoning', 'mixed ...",7,5,"[American, Caribbean/Latin American]",...,[60 minutes or less],medium,medium,51.5,0.0,13.0,0.0,2.0,0.0,4.0
1,a bit different breakfast pizza,31490,30,"[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"['preheat oven to 425 degrees f', 'press dough...","['prepared pizza crust', 'sausage patty', 'egg...",6,0,"[American, American, American]",...,[30 minutes or less],medium,medium,173.4,18.0,0.0,17.0,22.0,35.0,1.0
2,all in the kitchen chili,112140,130,"[269.8, 22.0, 32.0, 48.0, 39.0, 27.0, 5.0]",6,"['brown ground beef in large pot', 'add choppe...","['ground beef', 'yellow onions', 'diced tomato...",13,4,[],...,[4 hours or less],more,less,269.8,22.0,32.0,48.0,39.0,27.0,5.0
3,alouette potatoes,59389,45,"[368.1, 17.0, 10.0, 2.0, 14.0, 8.0, 20.0]",11,['place potatoes in a large pot of lightly sal...,"['spreadable cheese with garlic and herbs', 'n...",11,4,[],...,[60 minutes or less],medium,medium,368.1,17.0,10.0,2.0,14.0,8.0,20.0
4,amish tomato ketchup for canning,44061,190,"[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",5,['mix all ingredients& boil for 2 1 / 2 hours ...,"['tomato juice', 'apple cider vinegar', 'sugar...",8,5,"[American, American, American, American]",...,[4 hours or less],more,less,352.9,1.0,337.0,23.0,3.0,0.0,28.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229631,zydeco soup,486161,60,"[415.2, 26.0, 34.0, 26.0, 44.0, 21.0, 15.0]",7,"['heat oil in a 4-quart dutch oven', 'add cele...","['celery', 'onion', 'green sweet pepper', 'gar...",22,5,"[American, American, American, American, Carib...",...,[60 minutes or less],more,less,415.2,26.0,34.0,26.0,44.0,21.0,15.0
229632,zydeco spice mix,493372,5,"[14.8, 0.0, 2.0, 58.0, 1.0, 0.0, 1.0]",1,['mix all ingredients together thoroughly'],"['paprika', 'salt', 'garlic powder', 'onion po...",13,5,[],...,[15 minutes or less],less,less,14.8,0.0,2.0,58.0,1.0,0.0,1.0
229633,zydeco ya ya deviled eggs,308080,40,"[59.2, 6.0, 2.0, 3.0, 6.0, 5.0, 0.0]",7,"['in a bowl , combine the mashed yolks and may...","['hard-cooked eggs', 'mayonnaise', 'dijon must...",8,5,[],...,[60 minutes or less],medium,less,59.2,6.0,2.0,3.0,6.0,5.0,0.0
229634,cookies by design cookies on a stick,298512,29,"[188.0, 11.0, 57.0, 11.0, 7.0, 21.0, 9.0]",9,['place melted butter in a large mixing bowl a...,"['butter', 'eagle brand condensed milk', 'ligh...",10,1,[],...,[30 minutes or less],medium,medium,188.0,11.0,57.0,11.0,7.0,21.0,9.0


In [24]:
new_df2.to_csv('processed_recipe_dataset.csv', index=False)  