## Import libraries

In [1]:
# Import libraries
import pandas as pd

## Load data and replace ingredients

In [2]:
# Load the datasets
df_main = pd.read_csv('../data/RAW_recipes_with_one_cuisine.csv')  # Replace with the actual file path
df_mapping = pd.read_csv('../data/ingr_map.csv')  # Replace with the actual file path

# Create a dictionary to map raw ingredients to replaced ingredients
mapping_dict = pd.Series(df_mapping.replaced.values, index=df_mapping.raw_ingr).to_dict()

# Function to replace ingredients
def replace_ingredients(ingredient_list):
    return [mapping_dict.get(ingredient, ingredient) for ingredient in ingredient_list]

# Apply the mapping to the 'ingredients' column in the main dataset
df_main['replaced_ingredients'] = df_main['ingredients'].apply(eval)  # Convert string representation of list to actual list
df_main['replaced_ingredients'] = df_main['replaced_ingredients'].apply(replace_ingredients)

In [3]:
# Check data
df_main.head()

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients,Cuisine_Tags,replaced_ingredients
0,arriba baked winter squash mexican style,137739,55,47892,2005-09-16,"['60-minutes-or-less', 'time-to-make', 'course...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",7,['mexican'],"[winter squash, mexican seasoning, mixed spice..."
1,a bit different breakfast pizza,31490,30,26278,2002-06-17,"['30-minutes-or-less', 'time-to-make', 'course...","[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"['prepared pizza crust', 'sausage patty', 'egg...",6,['northeastern-united-states'],"[pizza crust, sausage, egg, milk, salt and pep..."
2,amish tomato ketchup for canning,44061,190,41706,2002-10-25,"['weeknight', 'time-to-make', 'course', 'main-...","[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"['tomato juice', 'apple cider vinegar', 'sugar...",8,['northeastern-united-states'],"[tomato juice, apple cider vinegar, sugar, sal..."
3,aww marinated olives,25274,15,21730,2002-04-14,"['15-minutes-or-less', 'time-to-make', 'course...","[380.7, 53.0, 7.0, 24.0, 6.0, 24.0, 6.0]",4,['toast the fennel seeds and lightly crush the...,my italian mil was thoroughly impressed by my ...,"['fennel seed', 'green olive', 'ripe olive', '...",9,['canadian'],"[fennel seed, green olive, ripe olive, garlic,..."
4,chile rellenos,43026,45,52268,2002-10-14,"['60-minutes-or-less', 'time-to-make', 'course...","[94.0, 10.0, 0.0, 11.0, 11.0, 21.0, 0.0]",9,"['drain green chiles', 'sprinkle cornstarch on...",a favorite from a local restaurant no longer i...,"['egg roll wrap', 'whole green chili', 'cheese...",5,['southwestern-united-states'],"[egg roll wrap, whole green chili, cheese, cor..."


## Check the unique ingredients

In [6]:
# Flatten the list of ingredients and get unique values
unique_ingredients = set(ingredient for sublist in df_main['replaced_ingredients'] for ingredient in sublist)

# Get the number of unique ingredients
num_unique_ingredients = len(unique_ingredients)

print(f'The number of unique ingredients is: {num_unique_ingredients}')

The number of unique ingredients is: 7755


In [8]:
unique_ingredients

{'beef gravy',
 'dried mild red chili pepper',
 'quick-cooking rolled oat',
 'herb stuffing mix',
 'jello gelatin',
 'cornmeal mix',
 'orange flower water',
 'wax bean',
 'cod steak',
 'winter melon',
 'allspice berry',
 'barilla lasagna',
 'turkey dripping',
 'lemonade mix',
 'dried italian seasoning',
 'hard green pear',
 'cantonese roast pork',
 'jellyfish',
 'blueberry jam',
 'black bean garlic sauce',
 'spicy hot',
 'whey protein',
 'munchee cheese',
 'ginger ale',
 'roasted cumin seed',
 'country ham',
 'biscottus',
 'white chocolate baking square',
 'roasted almond',
 'red jell-o',
 'persimmon pulp',
 'turkey stuffing mix',
 'double cream brie cheese',
 'cooking spray',
 'no-sugar-added strawberry preserve',
 'dried seaweed flake',
 'white wine',
 'canned snail',
 'medium sherry',
 'beefsteak tomato',
 'dried chili pepper flake',
 'cellophane noodle',
 'magic shell ice cream topping',
 'cocktail frank',
 'cilantro leaf',
 'baby mustard cres',
 'old-fashioned oatmeal',
 'hot pepp

## Check ingredients replaced

In [None]:
# search a specific ingredient
search_ing = 'french vanilla pudding and pie filling mix'
search_ing = search_ing.lower()
search_ing = search_ing.strip()
search_ing

# search for the ingredient in the unique ingredients and print the rows in the dataframe
search_ing_df = df_main[df_main['ingredients'].apply(lambda x: search_ing in x)]
search_ing_df['ingredients']

# print the whole text of the ingredient
search_ing_df['ingredients']

In [9]:
# find the id 159469 in the dataframe show the ingredients
df_main[df_main['id'] == 159469]['replaced_ingredients'].values[0]

['french vanilla pudding and pie filling mix',
 'milk',
 'whipping cream',
 'vanilla extract']

In [10]:
df_main[df_main['id'] == 159469]['ingredients'].values[0]

"['sugar free fat free french vanilla pudding and pie filling mix', 'milk', 'whipping cream', 'vanilla extract']"

## Save to CSV file

In [12]:
# Save or display the modified dataset
df_main.to_csv('../data/RAW_recipes_with_one_cuisine_replaced_ingr.csv', index=False)
print(df_main.head())

                                         name      id  minutes  \
0  arriba   baked winter squash mexican style  137739       55   
1            a bit different  breakfast pizza   31490       30   
2          amish  tomato ketchup  for canning   44061      190   
3                       aww  marinated olives   25274       15   
4                              chile rellenos   43026       45   

   contributor_id   submitted  \
0           47892  2005-09-16   
1           26278  2002-06-17   
2           41706  2002-10-25   
3           21730  2002-04-14   
4           52268  2002-10-14   

                                                tags  \
0  ['60-minutes-or-less', 'time-to-make', 'course...   
1  ['30-minutes-or-less', 'time-to-make', 'course...   
2  ['weeknight', 'time-to-make', 'course', 'main-...   
3  ['15-minutes-or-less', 'time-to-make', 'course...   
4  ['60-minutes-or-less', 'time-to-make', 'course...   

                                   nutrition  n_steps  \
0      [51