In [None]:
import pandas as pd
import ast

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
RAW_recipes = pd.read_csv('/content/drive/MyDrive/ADM Project/data/RAW_recipes.csv')

In [None]:
RAW_recipes.isnull().sum()

name                 1
id                   0
minutes              0
contributor_id       0
submitted            0
tags                 0
nutrition            0
n_steps              0
steps                0
description       4979
ingredients          0
n_ingredients        0
dtype: int64

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

columns_to_scale = ['minutes', 'n_steps', 'n_ingredients']

# Apply Min-Max Scaling to the selected columns
RAW_recipes[columns_to_scale] = scaler.fit_transform(RAW_recipes[columns_to_scale])

# Define weights for each feature
weight_minutes = 0.3
weight_steps = 0.5
weight_ingredients = 0.2

# Calculate the combined feature
RAW_recipes['combined_feature'] = (RAW_recipes['minutes'] * weight_minutes +
                                   RAW_recipes['n_steps'] * weight_steps +
                                   RAW_recipes['n_ingredients'] * weight_ingredients)

print(RAW_recipes[['minutes', 'n_steps', 'n_ingredients', 'combined_feature']])
RAW_recipes.head()

             minutes   n_steps  n_ingredients  combined_feature
0       2.561137e-08  0.075862       0.142857          0.066502
1       1.396984e-08  0.062069       0.119048          0.054844
2       6.053597e-08  0.041379       0.285714          0.077833
3       2.095476e-08  0.075862       0.238095          0.085550
4       8.847564e-08  0.034483       0.166667          0.050575
...              ...       ...            ...               ...
231632  2.793968e-08  0.048276       0.500000          0.124138
231633  2.328306e-09  0.006897       0.285714          0.060591
231634  1.862645e-08  0.048276       0.166667          0.057471
231635  1.350418e-08  0.062069       0.214286          0.073892
231636  9.313226e-09  0.034483       0.142857          0.045813

[231637 rows x 4 columns]


Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients,combined_feature
0,arriba baked winter squash mexican style,137739,2.561137e-08,47892,2005-09-16,"['60-minutes-or-less', 'time-to-make', 'course...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",0.075862,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",0.142857,0.066502
1,a bit different breakfast pizza,31490,1.396984e-08,26278,2002-06-17,"['30-minutes-or-less', 'time-to-make', 'course...","[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",0.062069,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"['prepared pizza crust', 'sausage patty', 'egg...",0.119048,0.054844
2,all in the kitchen chili,112140,6.053597e-08,196586,2005-02-25,"['time-to-make', 'course', 'preparation', 'mai...","[269.8, 22.0, 32.0, 48.0, 39.0, 27.0, 5.0]",0.041379,"['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,"['ground beef', 'yellow onions', 'diced tomato...",0.285714,0.077833
3,alouette potatoes,59389,2.095476e-08,68585,2003-04-14,"['60-minutes-or-less', 'time-to-make', 'course...","[368.1, 17.0, 10.0, 2.0, 14.0, 8.0, 20.0]",0.075862,['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...","['spreadable cheese with garlic and herbs', 'n...",0.238095,0.08555
4,amish tomato ketchup for canning,44061,8.847564e-08,41706,2002-10-25,"['weeknight', 'time-to-make', 'course', 'main-...","[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",0.034483,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"['tomato juice', 'apple cider vinegar', 'sugar...",0.166667,0.050575


In [None]:
columns_to_remove = ['submitted','tags','steps','description']
RAW_recipes = RAW_recipes.drop(columns=columns_to_remove)

In [None]:
# Extracting unique ingredients from the new dataset
all_ingredients_new = set()
for ingredients_list in RAW_recipes['ingredients']:
    ingredients = ast.literal_eval(ingredients_list)
    all_ingredients_new.update(ingredients)

# Assigning unique IDs to each ingredient in the new dataset
ingredient_to_id_new = {ingredient: i for i, ingredient in enumerate(all_ingredients_new)}

# Transforming recipes into lists of ingredient IDs for the new dataset
RAW_recipes['ingredient_ids'] = RAW_recipes['ingredients'].apply(lambda x: [ingredient_to_id_new[ingredient] for ingredient in ast.literal_eval(x)])

# Saving the ingredient-to-ID mapping for the new dataset as a CSV file
mapping_df_new = pd.DataFrame(list(ingredient_to_id_new.items()), columns=['Ingredient', 'ID'])
mapping_csv_path = '/content/drive/MyDrive/ADM Project/data/ingredient_to_id_mapping_new.csv'
mapping_df_new.to_csv(mapping_csv_path, index=False)

RAW_recipes.head(), mapping_csv_path


(                                         name      id       minutes  \
 0  arriba   baked winter squash mexican style  137739  2.561137e-08   
 1            a bit different  breakfast pizza   31490  1.396984e-08   
 2                   all in the kitchen  chili  112140  6.053597e-08   
 3                          alouette  potatoes   59389  2.095476e-08   
 4          amish  tomato ketchup  for canning   44061  8.847564e-08   
 
    contributor_id                                   nutrition   n_steps  \
 0           47892       [51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]  0.075862   
 1           26278   [173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]  0.062069   
 2          196586  [269.8, 22.0, 32.0, 48.0, 39.0, 27.0, 5.0]  0.041379   
 3           68585   [368.1, 17.0, 10.0, 2.0, 14.0, 8.0, 20.0]  0.075862   
 4           41706   [352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]  0.034483   
 
                                          ingredients  n_ingredients  \
 0  ['winter squash', 'mexican seaso

In [None]:
RAW_recipes = RAW_recipes.rename(columns={'name':'recipe_name','id':'recipe_id','combined_feature':'recipe_complexity','ingredient_id':'tokenized_ingredient_id'})

In [None]:
RAW_recipes = RAW_recipes.drop(['ingredients','recipe_name'], axis = 1)

In [None]:
def extract_calories(nutrition_str):
    # Extract the first value from the nutrition string
    calories = float(nutrition_str.strip('[]').split(',')[0])
    return calories

# Apply the function to the nutrition column to create a new calories column
RAW_recipes['calories'] = RAW_recipes['nutrition'].apply(extract_calories)

# Display the updated dataframe with the calories column
RAW_recipes.head()
RAW_recipes = RAW_recipes.drop(["n_ingredients","n_steps","nutrition","minutes"], axis = 1)


In [None]:
RAW_recipes.head()

Unnamed: 0,recipe_id,contributor_id,recipe_complexity,ingredient_ids,calories
0,137739,47892,0.066502,"[5841, 3138, 9691, 4462, 13917, 8054, 13237]",51.5
1,31490,26278,0.054844,"[6885, 5104, 1377, 2042, 523, 4718]",173.4
2,112140,196586,0.077833,"[14054, 10463, 1900, 3915, 4810, 13231, 12433,...",269.8
3,59389,68585,0.08555,"[7303, 2787, 11442, 8692, 3794, 8054, 9680, 13...",368.1
4,44061,41706,0.050575,"[12959, 4191, 3258, 13237, 10646, 1463, 4222, ...",352.9


In [None]:
# Scaling the 'recipe_complexity' column to a range of 1 to 10
from sklearn.preprocessing import MinMaxScaler

# Initializing the MinMaxScaler
scaler = MinMaxScaler(feature_range=(1, 10))

# Scaling the 'recipe_complexity' column
RAW_recipes['scaled_recipe_complexity'] = scaler.fit_transform(RAW_recipes[['recipe_complexity']])

RAW_recipes.head()


Unnamed: 0,recipe_id,contributor_id,recipe_complexity,ingredient_ids,calories,scaled_recipe_complexity
0,137739,47892,0.066502,"[5841, 3138, 9691, 4462, 13917, 8054, 13237]",51.5,2.019027
1,31490,26278,0.054844,"[6885, 5104, 1377, 2042, 523, 4718]",173.4,1.819713
2,112140,196586,0.077833,"[14054, 10463, 1900, 3915, 4810, 13231, 12433,...",269.8,2.212726
3,59389,68585,0.08555,"[7303, 2787, 11442, 8692, 3794, 8054, 9680, 13...",368.1,2.344666
4,44061,41706,0.050575,"[12959, 4191, 3258, 13237, 10646, 1463, 4222, ...",352.9,1.746725


In [None]:
RAW_recipes = RAW_recipes.drop(["recipe_complexity"],axis = 1)

In [None]:
RAW_recipes = RAW_recipes.rename(columns={'scaled_recipe_complexity': 'recipe_complexity'})

In [None]:
RAW_recipes.head()

Unnamed: 0,recipe_id,contributor_id,ingredient_ids,calories,recipe_complexity
0,137739,47892,"[5841, 3138, 9691, 4462, 13917, 8054, 13237]",51.5,2.019027
1,31490,26278,"[6885, 5104, 1377, 2042, 523, 4718]",173.4,1.819713
2,112140,196586,"[14054, 10463, 1900, 3915, 4810, 13231, 12433,...",269.8,2.212726
3,59389,68585,"[7303, 2787, 11442, 8692, 3794, 8054, 9680, 13...",368.1,2.344666
4,44061,41706,"[12959, 4191, 3258, 13237, 10646, 1463, 4222, ...",352.9,1.746725


In [None]:
RAW_recipes['recipe_complexity'] = RAW_recipes['recipe_complexity'].round(3)

In [None]:
RAW_recipes

Unnamed: 0,recipe_id,contributor_id,ingredient_ids,calories,recipe_complexity
0,137739,47892,"[5841, 3138, 9691, 4462, 13917, 8054, 13237]",51.5,2.019
1,31490,26278,"[6885, 5104, 1377, 2042, 523, 4718]",173.4,1.820
2,112140,196586,"[14054, 10463, 1900, 3915, 4810, 13231, 12433,...",269.8,2.213
3,59389,68585,"[7303, 2787, 11442, 8692, 3794, 8054, 9680, 13...",368.1,2.345
4,44061,41706,"[12959, 4191, 3258, 13237, 10646, 1463, 4222, ...",352.9,1.747
...,...,...,...,...,...
231632,486161,227978,"[7895, 2741, 11708, 3029, 8054, 13490, 4205, 3...",415.2,3.004
231633,493372,1500678,"[4205, 13237, 7767, 853, 14371, 6800, 3691, 76...",14.8,1.918
231634,308080,37779,"[9522, 7868, 3792, 13735, 8733, 13237, 7369, 1...",59.2,1.865
231635,298512,506822,"[13917, 7685, 3957, 1296, 14620, 9253, 3229, 8...",188.0,2.145


In [None]:
RAW_recipes.to_csv('/content/drive/MyDrive/ADM Project/data/recipe_pp.csv', index = False)