# Imports

In [197]:
import pandas as pd
import numpy as np
import string
from nltk.corpus import stopwords 
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import matplotlib.pyplot as plt
from collections import Counter
import re
from ast import literal_eval

# Exploratory Data Analysis

## Loading dataset

In [198]:
df = pd.read_csv("/Users/chrissibierich/code/christopherbierich/FeedMe/raw_data/Recipes/Food Ingredients and Recipe Dataset with Image Name Mapping.csv")

## Overview

In [199]:
df.head()

Unnamed: 0.1,Unnamed: 0,Title,Ingredients,Instructions,Image_Name,Cleaned_Ingredients
0,0,Miso-Butter Roast Chicken With Acorn Squash Pa...,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...","Pat chicken dry with paper towels, season all ...",miso-butter-roast-chicken-acorn-squash-panzanella,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher..."
1,1,Crispy Salt and Pepper Potatoes,"['2 large egg whites', '1 pound new potatoes (...",Preheat oven to 400°F and line a rimmed baking...,crispy-salt-and-pepper-potatoes-dan-kluger,"['2 large egg whites', '1 pound new potatoes (..."
2,2,Thanksgiving Mac and Cheese,"['1 cup evaporated milk', '1 cup whole milk', ...",Place a rack in middle of oven; preheat to 400...,thanksgiving-mac-and-cheese-erick-williams,"['1 cup evaporated milk', '1 cup whole milk', ..."
3,3,Italian Sausage and Bread Stuffing,"['1 (¾- to 1-pound) round Italian loaf, cut in...",Preheat oven to 350°F with rack in middle. Gen...,italian-sausage-and-bread-stuffing-240559,"['1 (¾- to 1-pound) round Italian loaf, cut in..."
4,4,Newton's Law,"['1 teaspoon dark brown sugar', '1 teaspoon ho...",Stir together brown sugar and hot water in a c...,newtons-law-apple-bourbon-cocktail,"['1 teaspoon dark brown sugar', '1 teaspoon ho..."


In [200]:
df.shape

(13501, 6)

In [201]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13501 entries, 0 to 13500
Data columns (total 6 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Unnamed: 0           13501 non-null  int64 
 1   Title                13496 non-null  object
 2   Ingredients          13501 non-null  object
 3   Instructions         13493 non-null  object
 4   Image_Name           13501 non-null  object
 5   Cleaned_Ingredients  13501 non-null  object
dtypes: int64(1), object(5)
memory usage: 633.0+ KB


In [202]:
df.rename(columns={'Ingredients': 'Uncleaned_Ingredients'}, inplace=True)

In [203]:
df.rename(columns={'Cleaned_Ingredients': 'Ingredients'}, inplace=True)

In [204]:
def text_to_list(text):
    text = text.replace("['", "")
    text = text.replace("']", "")
    my_list = text.split("', '")
    return my_list

In [205]:
df['Ingredients'] = df['Ingredients'].apply(lambda x: text_to_list(x))

## Data Cleaning

In [206]:
stop_words = set(stopwords.words('english')) 
lemmatizer = WordNetLemmatizer()
list_words = ['storebought', 'garnish', 'homemade', 'fresh',
              'coarsely', 'grated', 'evaporated', 'pound',
              'new', 'inch', 'diameter', 'torn',
              'sturdy', 'loaf', 'ground', 'flake',
              'piece', 'gala', 'cored','melted',
              'unsalted','salted','whole', 'divided',
              'kosher','cup', 'tsp', 'tbsp',
              'small', 'medium', 'large', 'lb',
              'finely', 'thinly', 'chopped', 'freshly',
              'sliced', 'cut', 'crushed', 'teaspoon',
              'plus', 'room', 'temperature', 'dry',
              'lady', 'oz', 'total', 'goodquality',
              'tablespoon', 'g', 'ounce', 'peeled']

In [207]:
def cleaning(list_x):
    cleaned_list = []
    list_x = list(map(lambda x: x.lower(), list_x))
    list_x = list(map(lambda x: ''.join(word for word in x if not word.isdigit()), list_x))
    list_x = list(map(lambda x: ''.join(word for word in x if not word in string.punctuation), list_x))
    for i in list_x:
        word_tokens = word_tokenize(i)
        word_tokens = [lemmatizer.lemmatize(w) for w in word_tokens]
        text = [w for w in word_tokens if not w in stop_words if not w in list_words if w.isalpha()]
        cleaned_list.append(" ".join(text))
    return cleaned_list

In [208]:
df['Ingredients'] = df['Ingredients'].apply(lambda x: cleaning(x))

In [209]:
df.isnull().sum()

Unnamed: 0               0
Title                    5
Uncleaned_Ingredients    0
Instructions             8
Image_Name               0
Ingredients              0
dtype: int64

In [210]:
df.head()

Unnamed: 0.1,Unnamed: 0,Title,Uncleaned_Ingredients,Instructions,Image_Name,Ingredients
0,0,Miso-Butter Roast Chicken With Acorn Squash Pa...,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...","Pat chicken dry with paper towels, season all ...",miso-butter-roast-chicken-acorn-squash-panzanella,"[chicken, salt, acorn squash, sage, rosemary, ..."
1,1,Crispy Salt and Pepper Potatoes,"['2 large egg whites', '1 pound new potatoes (...",Preheat oven to 400°F and line a rimmed baking...,crispy-salt-and-pepper-potatoes-dan-kluger,"[egg white, potato, salt, black pepper, rosema..."
2,2,Thanksgiving Mac and Cheese,"['1 cup evaporated milk', '1 cup whole milk', ...",Place a rack in middle of oven; preheat to 400...,thanksgiving-mac-and-cheese-erick-williams,"[milk, milk, garlic powder, onion powder, smok..."
3,3,Italian Sausage and Bread Stuffing,"['1 (¾- to 1-pound) round Italian loaf, cut in...",Preheat oven to 350°F with rack in middle. Gen...,italian-sausage-and-bread-stuffing-240559,"[round italian cube, olive oil, sweet italian ..."
4,4,Newton's Law,"['1 teaspoon dark brown sugar', '1 teaspoon ho...",Stir together brown sugar and hot water in a c...,newtons-law-apple-bourbon-cocktail,"[dark brown sugar, hot water, bourbon, lemon j..."


In [211]:
all_ingredients = []
def add_all_ingredients(list_x):
    for i in list_x:
        all_ingredients.append(i)

In [212]:
df['Ingredients'].apply(lambda x: add_all_ingredients(x))

0        None
1        None
2        None
3        None
4        None
         ... 
13496    None
13497    None
13498    None
13499    None
13500    None
Name: Ingredients, Length: 13501, dtype: object

### Most common features

In [213]:
all_words = []
def add_all_words(list_x):
    for i in list_x:
        split = i.split()
        for word in split:
            all_words.append(word)

In [214]:
df['Ingredients'].apply(lambda x: add_all_words(x))

0        None
1        None
2        None
3        None
4        None
         ... 
13496    None
13497    None
13498    None
13499    None
13500    None
Name: Ingredients, Length: 13501, dtype: object

In [215]:
len(all_words)

389511

In [216]:
counts = Counter(all_words)
len(counts)

7025

In [217]:
sorted_counts = sorted(counts.items(), key=lambda x:x[1])
sorted_dict = dict(sorted_counts)

In [218]:
len(sorted_dict)

7025

In [219]:
list_ingredients = []
list_frequencies = []
for key in sorted_dict.keys():
    list_ingredients.append(key)

for value in sorted_dict.values():
    list_frequencies.append(value)

In [220]:
data_freq = {'ingredient': list_ingredients, 'frequency': list_frequencies}

In [221]:
df_freq = pd.DataFrame(data=data_freq)

In [222]:
df_freq.tail(20)

Unnamed: 0,ingredient,frequency
7005,stick,2672
7006,water,2696
7007,cream,3032
7008,flour,3412
7009,onion,3577
7010,white,3622
7011,red,3632
7012,leaf,3743
7013,egg,4119
7014,clove,4136


### Transposing labels

In [223]:
default_ingredients = ['apple', 'banana', 'beef', 'blueberries',
                       'bread', 'butter', 'carrot', 'cheese',
                       'chicken', 'chocolate',
                       'corn', 'eggs', 'flour', 'cheese',
                       'beans', 'ham', 'cream',
                       'lime', 'milk', 'mushrooms','onion',
                       'potato', 'shrimp', 'spinach', 'strawberries',
                       'sugar', 'tomato']

In [224]:
lemmatized = [lemmatizer.lemmatize(w) for w in default_ingredients]
lemmatized_default = lemmatized
lemmatized_default

['apple',
 'banana',
 'beef',
 'blueberry',
 'bread',
 'butter',
 'carrot',
 'cheese',
 'chicken',
 'chocolate',
 'corn',
 'egg',
 'flour',
 'cheese',
 'bean',
 'ham',
 'cream',
 'lime',
 'milk',
 'mushroom',
 'onion',
 'potato',
 'shrimp',
 'spinach',
 'strawberry',
 'sugar',
 'tomato']

In [225]:
siders = ['oil', 'cider', 'broth', 'juice', 'brisket', 'cream', 'ravioli', 'sauce']

In [226]:
def re_labelling_exp_2(list_x):
    relabeled_list = []
    for i in list_x:
        word_tokens = word_tokenize(i)
        label = i
        for ingredient in lemmatized_default:
            if ingredient in word_tokens:
                if not any(word in siders for word in i.split(' ')):
                    label = ingredient
        relabeled_list.append(label)
    return relabeled_list

In [227]:
df['labelled ingredients'] = df['Ingredients'].apply(lambda x: re_labelling_exp_2(x))

In [228]:
df.head()

Unnamed: 0.1,Unnamed: 0,Title,Uncleaned_Ingredients,Instructions,Image_Name,Ingredients,labelled ingredients
0,0,Miso-Butter Roast Chicken With Acorn Squash Pa...,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...","Pat chicken dry with paper towels, season all ...",miso-butter-roast-chicken-acorn-squash-panzanella,"[chicken, salt, acorn squash, sage, rosemary, ...","[chicken, salt, acorn squash, sage, rosemary, ..."
1,1,Crispy Salt and Pepper Potatoes,"['2 large egg whites', '1 pound new potatoes (...",Preheat oven to 400°F and line a rimmed baking...,crispy-salt-and-pepper-potatoes-dan-kluger,"[egg white, potato, salt, black pepper, rosema...","[egg, potato, salt, black pepper, rosemary, th..."
2,2,Thanksgiving Mac and Cheese,"['1 cup evaporated milk', '1 cup whole milk', ...",Place a rack in middle of oven; preheat to 400...,thanksgiving-mac-and-cheese-erick-williams,"[milk, milk, garlic powder, onion powder, smok...","[milk, milk, garlic powder, onion, smoked papr..."
3,3,Italian Sausage and Bread Stuffing,"['1 (¾- to 1-pound) round Italian loaf, cut in...",Preheat oven to 350°F with rack in middle. Gen...,italian-sausage-and-bread-stuffing-240559,"[round italian cube, olive oil, sweet italian ...","[round italian cube, olive oil, sweet italian ..."
4,4,Newton's Law,"['1 teaspoon dark brown sugar', '1 teaspoon ho...",Stir together brown sugar and hot water in a c...,newtons-law-apple-bourbon-cocktail,"[dark brown sugar, hot water, bourbon, lemon j...","[sugar, hot water, bourbon, lemon juice, butte..."


### One-Hot_Encoder

In [229]:
# def one_hot_encoder(dataframe):
#     for ingredient in lemmatized_default:
#         dataframe[ingredient] = 0
#     for i in range(dataframe.shape[0]):
#         for ingredient in lemmatized_default:
#             if ingredient in dataframe['labelled ingredients'][i]:
#                 dataframe[ingredient][i] = 1
#             else:
#                 dataframe[ingredient][i] = 0
#     return dataframe

In [230]:
# def create_matrix_row(dataframe):
#     for ingredient in lemmatized_default:
#         dataframe[ingredient] = 0


# def one_hot_encoder_row(row):
#     for ingredient in lemmatized_default:
#         if ingredient in row['labelled ingredients']:
#             row[ingredient][i] = 1
#         else:
#             row[ingredient][i] = 0
#     return row

In [231]:
def one_hot_encoder(row):
    for ingredient in lemmatized_default:
        if ingredient in row['labelled ingredients']:
            row[ingredient] = 1
        else:
            row[ingredient] = 0
    return row

In [232]:
df_final = df.apply(one_hot_encoder, axis=1)

In [233]:
df_final.head()

Unnamed: 0.1,Unnamed: 0,Title,Uncleaned_Ingredients,Instructions,Image_Name,Ingredients,labelled ingredients,apple,banana,beef,...,lime,milk,mushroom,onion,potato,shrimp,spinach,strawberry,sugar,tomato
0,0,Miso-Butter Roast Chicken With Acorn Squash Pa...,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...","Pat chicken dry with paper towels, season all ...",miso-butter-roast-chicken-acorn-squash-panzanella,"[chicken, salt, acorn squash, sage, rosemary, ...","[chicken, salt, acorn squash, sage, rosemary, ...",1,0,0,...,0,0,0,1,0,0,0,0,0,0
1,1,Crispy Salt and Pepper Potatoes,"['2 large egg whites', '1 pound new potatoes (...",Preheat oven to 400°F and line a rimmed baking...,crispy-salt-and-pepper-potatoes-dan-kluger,"[egg white, potato, salt, black pepper, rosema...","[egg, potato, salt, black pepper, rosemary, th...",0,0,0,...,0,0,0,0,1,0,0,0,0,0
2,2,Thanksgiving Mac and Cheese,"['1 cup evaporated milk', '1 cup whole milk', ...",Place a rack in middle of oven; preheat to 400...,thanksgiving-mac-and-cheese-erick-williams,"[milk, milk, garlic powder, onion powder, smok...","[milk, milk, garlic powder, onion, smoked papr...",0,0,0,...,0,1,0,1,0,0,0,0,0,0
3,3,Italian Sausage and Bread Stuffing,"['1 (¾- to 1-pound) round Italian loaf, cut in...",Preheat oven to 350°F with rack in middle. Gen...,italian-sausage-and-bread-stuffing-240559,"[round italian cube, olive oil, sweet italian ...","[round italian cube, olive oil, sweet italian ...",0,0,0,...,0,0,0,1,0,0,0,0,0,0
4,4,Newton's Law,"['1 teaspoon dark brown sugar', '1 teaspoon ho...",Stir together brown sugar and hot water in a c...,newtons-law-apple-bourbon-cocktail,"[dark brown sugar, hot water, bourbon, lemon j...","[sugar, hot water, bourbon, lemon juice, butte...",0,0,0,...,0,0,0,0,0,0,0,0,1,0


drop unmatched
Rename appropritately 
Reorder appropritately
Add vectorized ingredients

### Finishing touches

In [234]:
df_final.drop(columns='Unnamed: 0', inplace=True)

In [235]:
df_final.rename(columns={'Uncleaned_Ingredients': 'Ingredients', 'Ingredients': 'Cleaned_Ingredients', 'labelled ingredients': 'Relabelled_Ingredients'}, inplace=True)

In [236]:
def tokenize(list_x):
    tokenized = []
    for i in list_x:
        word_tokens = word_tokenize(i)
        for x in word_tokens:
            tokenized.append(x)
    return tokenized
    

In [237]:
df_final['Tokenized_Ingredients'] = df_final['Relabelled_Ingredients'].apply(lambda x: tokenize(x))

In [238]:
df_final.head()

Unnamed: 0,Title,Ingredients,Instructions,Image_Name,Cleaned_Ingredients,Relabelled_Ingredients,apple,banana,beef,blueberry,...,milk,mushroom,onion,potato,shrimp,spinach,strawberry,sugar,tomato,Tokenized_Ingredients
0,Miso-Butter Roast Chicken With Acorn Squash Pa...,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...","Pat chicken dry with paper towels, season all ...",miso-butter-roast-chicken-acorn-squash-panzanella,"[chicken, salt, acorn squash, sage, rosemary, ...","[chicken, salt, acorn squash, sage, rosemary, ...",1,0,0,0,...,0,0,1,0,0,0,0,0,0,"[chicken, salt, acorn, squash, sage, rosemary,..."
1,Crispy Salt and Pepper Potatoes,"['2 large egg whites', '1 pound new potatoes (...",Preheat oven to 400°F and line a rimmed baking...,crispy-salt-and-pepper-potatoes-dan-kluger,"[egg white, potato, salt, black pepper, rosema...","[egg, potato, salt, black pepper, rosemary, th...",0,0,0,0,...,0,0,0,1,0,0,0,0,0,"[egg, potato, salt, black, pepper, rosemary, t..."
2,Thanksgiving Mac and Cheese,"['1 cup evaporated milk', '1 cup whole milk', ...",Place a rack in middle of oven; preheat to 400...,thanksgiving-mac-and-cheese-erick-williams,"[milk, milk, garlic powder, onion powder, smok...","[milk, milk, garlic powder, onion, smoked papr...",0,0,0,0,...,1,0,1,0,0,0,0,0,0,"[milk, milk, garlic, powder, onion, smoked, pa..."
3,Italian Sausage and Bread Stuffing,"['1 (¾- to 1-pound) round Italian loaf, cut in...",Preheat oven to 350°F with rack in middle. Gen...,italian-sausage-and-bread-stuffing-240559,"[round italian cube, olive oil, sweet italian ...","[round italian cube, olive oil, sweet italian ...",0,0,0,0,...,0,0,1,0,0,0,0,0,0,"[round, italian, cube, olive, oil, sweet, ital..."
4,Newton's Law,"['1 teaspoon dark brown sugar', '1 teaspoon ho...",Stir together brown sugar and hot water in a c...,newtons-law-apple-bourbon-cocktail,"[dark brown sugar, hot water, bourbon, lemon j...","[sugar, hot water, bourbon, lemon juice, butte...",0,0,0,0,...,0,0,0,0,0,0,0,1,0,"[sugar, hot, water, bourbon, lemon, juice, but..."


In [239]:
df_final = df_final[['Title', 'Instructions','Image_Name',
                     'Ingredients', 'Cleaned_Ingredients', 'Relabelled_Ingredients', 'Tokenized_Ingredients',
                     'apple','banana','beef','blueberry','bread','butter','carrot','cheese','chicken','chocolate','corn',
                     'egg','flour','cheese','bean','ham','cream','lime','milk','mushroom','onion','potato','shrimp',
                     'spinach','strawberry','sugar','tomato']]

In [240]:
df_final.head()

Unnamed: 0,Title,Instructions,Image_Name,Ingredients,Cleaned_Ingredients,Relabelled_Ingredients,Tokenized_Ingredients,apple,banana,beef,...,lime,milk,mushroom,onion,potato,shrimp,spinach,strawberry,sugar,tomato
0,Miso-Butter Roast Chicken With Acorn Squash Pa...,"Pat chicken dry with paper towels, season all ...",miso-butter-roast-chicken-acorn-squash-panzanella,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...","[chicken, salt, acorn squash, sage, rosemary, ...","[chicken, salt, acorn squash, sage, rosemary, ...","[chicken, salt, acorn, squash, sage, rosemary,...",1,0,0,...,0,0,0,1,0,0,0,0,0,0
1,Crispy Salt and Pepper Potatoes,Preheat oven to 400°F and line a rimmed baking...,crispy-salt-and-pepper-potatoes-dan-kluger,"['2 large egg whites', '1 pound new potatoes (...","[egg white, potato, salt, black pepper, rosema...","[egg, potato, salt, black pepper, rosemary, th...","[egg, potato, salt, black, pepper, rosemary, t...",0,0,0,...,0,0,0,0,1,0,0,0,0,0
2,Thanksgiving Mac and Cheese,Place a rack in middle of oven; preheat to 400...,thanksgiving-mac-and-cheese-erick-williams,"['1 cup evaporated milk', '1 cup whole milk', ...","[milk, milk, garlic powder, onion powder, smok...","[milk, milk, garlic powder, onion, smoked papr...","[milk, milk, garlic, powder, onion, smoked, pa...",0,0,0,...,0,1,0,1,0,0,0,0,0,0
3,Italian Sausage and Bread Stuffing,Preheat oven to 350°F with rack in middle. Gen...,italian-sausage-and-bread-stuffing-240559,"['1 (¾- to 1-pound) round Italian loaf, cut in...","[round italian cube, olive oil, sweet italian ...","[round italian cube, olive oil, sweet italian ...","[round, italian, cube, olive, oil, sweet, ital...",0,0,0,...,0,0,0,1,0,0,0,0,0,0
4,Newton's Law,Stir together brown sugar and hot water in a c...,newtons-law-apple-bourbon-cocktail,"['1 teaspoon dark brown sugar', '1 teaspoon ho...","[dark brown sugar, hot water, bourbon, lemon j...","[sugar, hot water, bourbon, lemon juice, butte...","[sugar, hot, water, bourbon, lemon, juice, but...",0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [241]:
df_final.to_csv('/Users/chrissibierich/code/christopherbierich/FeedMe/raw_data/Recipes/Cleaned_Recepies.csv')

### Feature Engineering: Prep Time

In [242]:
# def prep_time(sentence):
#     default_time = 2
#     prep_time = 0
#     # seperate string into a list of sentences seperated by "."
#     split_string = sentence.split('.')
#     for sentence in split_string:
#         word_tokenize = tokenize(sentence)
#         if any(word in word_tokenize in 'minutes'))
#         for word in word_tokenize:
#             if word.isdigit():
#                 prep_time += int(word)
#             else:
#                 prep_time += default_time
                
#     return prep_time

In [243]:
# example = df_final.iloc[1,1]

In [244]:
# result = prep_time(example)

In [245]:
# result

Idea about prep_time
- loop over each sentence (if no time intervall is indicated) --> add default value
                          (if time intervals are given, take the latter one)
- differentiate between minutes, hours and F'

/d --> matches any digits
/d/d to consequtive digits 
/d{2} --> exactly do digits 

adding default value of 5 minutes to each value and extract the value and add 

In [246]:
pattern = '(\d+) minutes'
text = example

In [247]:
search = re.findall(pattern, text)

In [248]:
search

['4', '10']

In [249]:
pattern_hour = '(\d) hour'
pattern_hour

'(\\d) hour'

In [250]:
example = df_final.iloc[2,1]
example

'Place a rack in middle of oven; preheat to 400°. Bring evaporated milk and whole milk to a bare simmer in a large saucepan over medium heat. Whisk in garlic powder, onion powder, paprika, pepper, and 1 tsp. salt. Working in batches, whisk in three fourths of the cheddar, then all of the cream cheese.\nMeanwhile, bring a large pot of generously salted water to a boil (it should have a little less salt than seawater). Cook macaroni, stirring occasionally, until very al dente, about 4 minutes. Drain in a colander.\nAdd macaroni to cheese sauce in pan and mix until well coated. Evenly spread out half of macaroni mixture in a 13x9" baking dish. Sprinkle half of remaining cheddar evenly over. Layer remaining macaroni mixture on top and sprinkle with remaining cheddar. Bake until all of the cheese is melted, about 10 minutes. Let cool slightly before serving.'

In [259]:
def prep_time(instructions):
    prep_time = 0
    default_time = 2
    pattern_minutes = '(\d+) minutes'
    pattern_hour = '(\d) hour'
    sentences = instructions.split(".")
    prep_time += default_time * len(sentences)
    for sentence in sentences:
        search_minutes = re.findall(pattern_minutes, sentence)
        search_hours = re.findall(pattern_hour, sentence)
        for m in search_minutes:
            prep_time += int(m)
        for h in search_hours:
            prep_time += (int(h) * 60)
    return prep_time
        
    # adding all numbers within list
        
        
    

In [260]:
time = prep_time(example)

In [261]:
time

44

In [254]:
df_prep_time = pd.read_csv('/Users/chrissibierich/code/christopherbierich/FeedMe/raw_data/Recipes/reduced_dataset.csv')

In [255]:
df_prep_time.head()

Unnamed: 0.1,Unnamed: 0,Title,Instructions,Image_Name,Ingredients,Cleaned_Ingredients,Relabelled_Ingredients,Tokenized_Ingredients,apple,banana,...,lime,milk,mushroom,onion,potato,shrimp,spinach,strawberry,sugar,tomato
0,1,Crispy Salt and Pepper Potatoes,Preheat oven to 400°F and line a rimmed baking...,crispy-salt-and-pepper-potatoes-dan-kluger,"['2 large egg whites', '1 pound new potatoes (...","['egg white', 'potato', 'salt', 'black pepper'...","['egg', 'potato', 'salt', 'black pepper', 'ros...","['egg', 'potato', 'salt', 'black', 'pepper', '...",0,0,...,0,0,0,0,1,0,0,0,0,0
1,2,Thanksgiving Mac and Cheese,Place a rack in middle of oven; preheat to 400...,thanksgiving-mac-and-cheese-erick-williams,"['1 cup evaporated milk', '1 cup whole milk', ...","['milk', 'milk', 'garlic powder', 'onion powde...","['milk', 'milk', 'garlic powder', 'onion', 'sm...","['milk', 'milk', 'garlic', 'powder', 'onion', ...",0,0,...,0,1,0,1,0,0,0,0,0,0
2,4,Newton's Law,Stir together brown sugar and hot water in a c...,newtons-law-apple-bourbon-cocktail,"['1 teaspoon dark brown sugar', '1 teaspoon ho...","['dark brown sugar', 'hot water', 'bourbon', '...","['sugar', 'hot water', 'bourbon', 'lemon juice...","['sugar', 'hot', 'water', 'bourbon', 'lemon', ...",0,0,...,0,0,0,0,0,0,0,0,1,0
3,5,Warm Comfort,Place 2 chamomile tea bags in a heatsafe vesse...,warm-comfort-tequila-chamomile-toddy,"['2 chamomile tea bags', '1½ oz. reposado tequ...","['chamomile tea bag', 'reposado tequila', 'lem...","['chamomile tea bag', 'reposado tequila', 'lem...","['chamomile', 'tea', 'bag', 'reposado', 'tequi...",0,0,...,0,0,0,0,0,0,0,0,0,0
4,7,Turmeric Hot Toddy,"For the turmeric syrup, combine ½ cup hot wate...",turmeric-hot-toddy-claire-sprouse,"['¼ cup granulated sugar', '¾ tsp. ground turm...","['granulated sugar', 'turmeric', 'amontillado ...","['sugar', 'turmeric', 'amontillado sherry', 'b...","['sugar', 'turmeric', 'amontillado', 'sherry',...",0,0,...,0,0,0,0,0,0,0,0,1,0


In [256]:
df_prep_time['Instructions'].isnull().sum()
df_prep_time_na = df_prep_time.dropna()

In [257]:
df_prep_time['Ingredients'] = df_prep_time['Cleaned_Ingredients'].apply(literal_eval)
df_prep_time['Cleaned_Ingredients'] = df_prep_time['Cleaned_Ingredients'].apply(literal_eval)
df_prep_time['Relabelled_Ingredients'] = df_prep_time['Relabelled_Ingredients'].apply(literal_eval)
df_prep_time['Tokenized_Ingredients'] = df_prep_time['Tokenized_Ingredients'].apply(literal_eval)

In [263]:
df_prep_time_na['Prep Time'] = df_prep_time_na['Instructions'].apply(lambda x: prep_time(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_prep_time_na['Prep Time'] = df_prep_time_na['Instructions'].apply(lambda x: prep_time(x))


In [264]:
df_prep_time_na.shape

(7873, 36)

In [266]:
df_prep_time_na.head(20)

Unnamed: 0.1,Unnamed: 0,Title,Instructions,Image_Name,Ingredients,Cleaned_Ingredients,Relabelled_Ingredients,Tokenized_Ingredients,apple,banana,...,milk,mushroom,onion,potato,shrimp,spinach,strawberry,sugar,tomato,Prep Time
0,1,Crispy Salt and Pepper Potatoes,Preheat oven to 400°F and line a rimmed baking...,crispy-salt-and-pepper-potatoes-dan-kluger,"['2 large egg whites', '1 pound new potatoes (...","['egg white', 'potato', 'salt', 'black pepper'...","['egg', 'potato', 'salt', 'black pepper', 'ros...","['egg', 'potato', 'salt', 'black', 'pepper', '...",0,0,...,0,0,0,1,0,0,0,0,0,34
1,2,Thanksgiving Mac and Cheese,Place a rack in middle of oven; preheat to 400...,thanksgiving-mac-and-cheese-erick-williams,"['1 cup evaporated milk', '1 cup whole milk', ...","['milk', 'milk', 'garlic powder', 'onion powde...","['milk', 'milk', 'garlic powder', 'onion', 'sm...","['milk', 'milk', 'garlic', 'powder', 'onion', ...",0,0,...,1,0,1,0,0,0,0,0,0,44
2,4,Newton's Law,Stir together brown sugar and hot water in a c...,newtons-law-apple-bourbon-cocktail,"['1 teaspoon dark brown sugar', '1 teaspoon ho...","['dark brown sugar', 'hot water', 'bourbon', '...","['sugar', 'hot water', 'bourbon', 'lemon juice...","['sugar', 'hot', 'water', 'bourbon', 'lemon', ...",0,0,...,0,0,0,0,0,0,0,1,0,12
3,5,Warm Comfort,Place 2 chamomile tea bags in a heatsafe vesse...,warm-comfort-tequila-chamomile-toddy,"['2 chamomile tea bags', '1½ oz. reposado tequ...","['chamomile tea bag', 'reposado tequila', 'lem...","['chamomile tea bag', 'reposado tequila', 'lem...","['chamomile', 'tea', 'bag', 'reposado', 'tequi...",0,0,...,0,0,0,0,0,0,0,0,0,21
4,7,Turmeric Hot Toddy,"For the turmeric syrup, combine ½ cup hot wate...",turmeric-hot-toddy-claire-sprouse,"['¼ cup granulated sugar', '¾ tsp. ground turm...","['granulated sugar', 'turmeric', 'amontillado ...","['sugar', 'turmeric', 'amontillado sherry', 'b...","['sugar', 'turmeric', 'amontillado', 'sherry',...",0,0,...,0,0,0,0,0,0,0,1,0,30
5,10,Hot Pimento Cheese Dip,Put the chipotle peppers and adobo sauce in a ...,hot-pimento-cheese-dip-polina-chesnakova,['1 (7 oz./200 g) can chipotle in adobo sauce'...,"['chipotle adobo sauce', 'garlic clove minced'...","['chipotle adobo sauce', 'garlic clove minced'...","['chipotle', 'adobo', 'sauce', 'garlic', 'clov...",0,0,...,1,0,0,0,0,0,0,0,0,38
6,13,Caesar Salad Roast Chicken,Place a rack in lower third of oven; preheat t...,caesar-salad-roast-chicken,"['8 anchovies, mashed to a paste', '8 garlic c...","['anchovy mashed paste', 'garlic clove', 'mayo...","['anchovy mashed paste', 'garlic clove', 'mayo...","['anchovy', 'mashed', 'paste', 'garlic', 'clov...",0,0,...,0,0,0,0,0,0,0,0,0,199
7,15,Gorditas con Camarones,"Stir masa, 1 Tbsp. lard, 1 tsp. Diamond Crysta...",gorditas-con-camarones,['1⅓ cups instant corn masa flour (such as Mas...,"['instant corn masa flour maseca', 'lard veget...","['flour', 'lard vegetable oil', 'diamond cryst...","['flour', 'lard', 'vegetable', 'oil', 'diamond...",0,0,...,0,0,1,0,1,0,0,0,0,360
8,16,Enfrijoladas,"Using 2 Tbsp. oil, brush both sides of each to...",enfrijoladas,"['5 Tbsp. vegetable oil, divided', '8 corn tor...","['vegetable oil', 'corn tortilla', 'chorizo', ...","['vegetable oil', 'corn', 'chorizo', 'garlic c...","['vegetable', 'oil', 'corn', 'chorizo', 'garli...",0,0,...,0,0,1,0,0,0,0,0,0,63
9,17,Caramelized Plantain Parfait,Heat oil in a large nonstick skillet over medi...,caramelized-plantain-parfait,"['6 Tbsp. virgin coconut oil', '4 ripe (spotte...","['virgin coconut oil', 'ripe spotted plátanos ...","['virgin coconut oil', 'ripe spotted plátanos ...","['virgin', 'coconut', 'oil', 'ripe', 'spotted'...",0,0,...,1,0,0,0,0,0,0,0,0,13


In [267]:
df_prep_time_na.to_csv('/Users/chrissibierich/code/christopherbierich/FeedMe/raw_data/Recipes/Recipes_incl_pre_time.csv')

In [270]:
def prep_time_range(prep_time_min):
    prep_time_h = round(prep_time_min / 60)
    return f'{prep_time_h} - {prep_time_h + 1} hours'

In [271]:
df_prep_time_na['Prep Time Range'] = df_prep_time_na['Prep Time'].apply(lambda x: prep_time_range(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_prep_time_na['Prep Time Range'] = df_prep_time_na['Prep Time'].apply(lambda x: prep_time_range(x))


In [272]:
df_prep_time_na

Unnamed: 0.1,Unnamed: 0,Title,Instructions,Image_Name,Ingredients,Cleaned_Ingredients,Relabelled_Ingredients,Tokenized_Ingredients,apple,banana,...,mushroom,onion,potato,shrimp,spinach,strawberry,sugar,tomato,Prep Time,Prep Time Range
0,1,Crispy Salt and Pepper Potatoes,Preheat oven to 400°F and line a rimmed baking...,crispy-salt-and-pepper-potatoes-dan-kluger,"['2 large egg whites', '1 pound new potatoes (...","['egg white', 'potato', 'salt', 'black pepper'...","['egg', 'potato', 'salt', 'black pepper', 'ros...","['egg', 'potato', 'salt', 'black', 'pepper', '...",0,0,...,0,0,1,0,0,0,0,0,34,1 - 2 hours
1,2,Thanksgiving Mac and Cheese,Place a rack in middle of oven; preheat to 400...,thanksgiving-mac-and-cheese-erick-williams,"['1 cup evaporated milk', '1 cup whole milk', ...","['milk', 'milk', 'garlic powder', 'onion powde...","['milk', 'milk', 'garlic powder', 'onion', 'sm...","['milk', 'milk', 'garlic', 'powder', 'onion', ...",0,0,...,0,1,0,0,0,0,0,0,44,1 - 2 hours
2,4,Newton's Law,Stir together brown sugar and hot water in a c...,newtons-law-apple-bourbon-cocktail,"['1 teaspoon dark brown sugar', '1 teaspoon ho...","['dark brown sugar', 'hot water', 'bourbon', '...","['sugar', 'hot water', 'bourbon', 'lemon juice...","['sugar', 'hot', 'water', 'bourbon', 'lemon', ...",0,0,...,0,0,0,0,0,0,1,0,12,0 - 1 hours
3,5,Warm Comfort,Place 2 chamomile tea bags in a heatsafe vesse...,warm-comfort-tequila-chamomile-toddy,"['2 chamomile tea bags', '1½ oz. reposado tequ...","['chamomile tea bag', 'reposado tequila', 'lem...","['chamomile tea bag', 'reposado tequila', 'lem...","['chamomile', 'tea', 'bag', 'reposado', 'tequi...",0,0,...,0,0,0,0,0,0,0,0,21,0 - 1 hours
4,7,Turmeric Hot Toddy,"For the turmeric syrup, combine ½ cup hot wate...",turmeric-hot-toddy-claire-sprouse,"['¼ cup granulated sugar', '¾ tsp. ground turm...","['granulated sugar', 'turmeric', 'amontillado ...","['sugar', 'turmeric', 'amontillado sherry', 'b...","['sugar', 'turmeric', 'amontillado', 'sherry',...",0,0,...,0,0,0,0,0,0,1,0,30,0 - 1 hours
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7875,13494,Ginger-Pecan Roulade with Honey-Glazed Pecans,Preheat oven to 350°F. Line bottom and sides o...,ginger-pecan-roulade-with-honey-glazed-pecans-...,"['1/2 stick (1/4 cup) unsalted butter, melted,...","['stick butter additional brushing pan', 'peca...","['butter', 'pecan toasted cooled', 'flour', 'u...","['butter', 'pecan', 'toasted', 'cooled', 'flou...",0,0,...,0,0,0,0,0,0,1,0,55,1 - 2 hours
7876,13496,Brownie Pudding Cake,Preheat the oven to 350°F. Into a bowl sift to...,brownie-pudding-cake-14408,"['1 cup all-purpose flour', '2/3 cup unsweeten...","['allpurpose flour', 'unsweetened cocoa powder...","['flour', 'unsweetened cocoa powder', 'doublea...","['flour', 'unsweetened', 'cocoa', 'powder', 'd...",0,0,...,0,0,0,0,0,0,1,0,54,1 - 2 hours
7877,13498,Rice with Soy-Glazed Bonito Flakes and Sesame ...,"If using katsuo bushi flakes from package, moi...",rice-with-soy-glazed-bonito-flakes-and-sesame-...,['Leftover katsuo bushi (dried bonito flakes) ...,['leftover katsuo bushi dried bonito making da...,['leftover katsuo bushi dried bonito making da...,"['leftover', 'katsuo', 'bushi', 'dried', 'boni...",0,0,...,0,0,0,0,0,0,1,0,47,1 - 2 hours
7878,13499,Spanakopita,Melt 1 tablespoon butter in a 12-inch heavy sk...,spanakopita-107344,['1 stick (1/2 cup) plus 1 tablespoon unsalted...,"['stick butter', 'baby spinach', 'feta crumble...","['butter', 'spinach', 'feta crumbled scant', '...","['butter', 'spinach', 'feta', 'crumbled', 'sca...",0,0,...,0,0,0,0,1,0,0,0,71,1 - 2 hours


In [273]:
df_prep_time_na.to_csv('/Users/chrissibierich/code/christopherbierich/FeedMe/raw_data/Recipes/final_recipes_thurday_morning.csv')