# Imports

In [1]:
import pandas as pd
import numpy as np
import string
from nltk.corpus import stopwords 
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import matplotlib.pyplot as plt
from collections import Counter

# Exploratory Data Analysis

## Loading dataset

In [2]:
df = pd.read_csv("/Users/chrissibierich/code/christopherbierich/FeedMe/raw_data/Recipes/Food Ingredients and Recipe Dataset with Image Name Mapping.csv")

## Overview

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,Title,Ingredients,Instructions,Image_Name,Cleaned_Ingredients
0,0,Miso-Butter Roast Chicken With Acorn Squash Pa...,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...","Pat chicken dry with paper towels, season all ...",miso-butter-roast-chicken-acorn-squash-panzanella,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher..."
1,1,Crispy Salt and Pepper Potatoes,"['2 large egg whites', '1 pound new potatoes (...",Preheat oven to 400°F and line a rimmed baking...,crispy-salt-and-pepper-potatoes-dan-kluger,"['2 large egg whites', '1 pound new potatoes (..."
2,2,Thanksgiving Mac and Cheese,"['1 cup evaporated milk', '1 cup whole milk', ...",Place a rack in middle of oven; preheat to 400...,thanksgiving-mac-and-cheese-erick-williams,"['1 cup evaporated milk', '1 cup whole milk', ..."
3,3,Italian Sausage and Bread Stuffing,"['1 (¾- to 1-pound) round Italian loaf, cut in...",Preheat oven to 350°F with rack in middle. Gen...,italian-sausage-and-bread-stuffing-240559,"['1 (¾- to 1-pound) round Italian loaf, cut in..."
4,4,Newton's Law,"['1 teaspoon dark brown sugar', '1 teaspoon ho...",Stir together brown sugar and hot water in a c...,newtons-law-apple-bourbon-cocktail,"['1 teaspoon dark brown sugar', '1 teaspoon ho..."


In [4]:
df.shape

(13501, 6)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13501 entries, 0 to 13500
Data columns (total 6 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Unnamed: 0           13501 non-null  int64 
 1   Title                13496 non-null  object
 2   Ingredients          13501 non-null  object
 3   Instructions         13493 non-null  object
 4   Image_Name           13501 non-null  object
 5   Cleaned_Ingredients  13501 non-null  object
dtypes: int64(1), object(5)
memory usage: 633.0+ KB


In [6]:
df.rename(columns={'Ingredients': 'Uncleaned_Ingredients'}, inplace=True)

In [7]:
df.rename(columns={'Cleaned_Ingredients': 'Ingredients'}, inplace=True)

In [8]:
def text_to_list(text):
    text = text.replace("['", "")
    text = text.replace("']", "")
    my_list = text.split("', '")
    return my_list

In [9]:
df['Ingredients'] = df['Ingredients'].apply(lambda x: text_to_list(x))

## Data Cleaning

1. Lowercase everything 
2. Remove numbers
3. Remove punctuation
4. Remove stopwords
5. Remove scale words ['inches', 'teaspoon', medium, large, smalllb, inch, 1/2, 3/4]

In [10]:
stop_words = set(stopwords.words('english')) 
lemmatizer = WordNetLemmatizer()
list_words = ['storebought', 'garnish', 'homemade', 'fresh',
              'coarsely', 'grated', 'evaporated', 'pound',
              'new', 'inch', 'diameter', 'torn',
              'sturdy', 'loaf', 'ground', 'flake',
              'piece', 'gala', 'cored','melted',
              'unsalted','salted','whole', 'divided',
              'kosher','cup', 'tsp', 'tbsp',
              'small', 'medium', 'large', 'lb',
              'finely', 'thinly', 'chopped', 'freshly',
              'sliced', 'cut', 'crushed', 'teaspoon',
              'plus', 'room', 'temperature', 'dry',
              'lady', 'oz', 'total', 'goodquality',
              'tablespoon', 'g', 'ounce', 'peeled']

In [11]:
def cleaning(list_x):
    cleaned_list = []
    list_x = list(map(lambda x: x.lower(), list_x))
    list_x = list(map(lambda x: ''.join(word for word in x if not word.isdigit()), list_x))
    list_x = list(map(lambda x: ''.join(word for word in x if not word in string.punctuation), list_x))
    for i in list_x:
        word_tokens = word_tokenize(i)
        word_tokens = [lemmatizer.lemmatize(w) for w in word_tokens]
        text = [w for w in word_tokens if not w in stop_words if not w in list_words if w.isalpha()]
        cleaned_list.append(" ".join(text))
    return cleaned_list

In [12]:
df['Ingredients'] = df['Ingredients'].apply(lambda x: cleaning(x))

In [13]:
df.iloc[0,4]

'miso-butter-roast-chicken-acorn-squash-panzanella'

In [14]:
df.isnull().sum()

Unnamed: 0               0
Title                    5
Uncleaned_Ingredients    0
Instructions             8
Image_Name               0
Ingredients              0
dtype: int64

In [15]:
df.head()

Unnamed: 0.1,Unnamed: 0,Title,Uncleaned_Ingredients,Instructions,Image_Name,Ingredients
0,0,Miso-Butter Roast Chicken With Acorn Squash Pa...,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...","Pat chicken dry with paper towels, season all ...",miso-butter-roast-chicken-acorn-squash-panzanella,"[chicken, salt, acorn squash, sage, rosemary, ..."
1,1,Crispy Salt and Pepper Potatoes,"['2 large egg whites', '1 pound new potatoes (...",Preheat oven to 400°F and line a rimmed baking...,crispy-salt-and-pepper-potatoes-dan-kluger,"[egg white, potato, salt, black pepper, rosema..."
2,2,Thanksgiving Mac and Cheese,"['1 cup evaporated milk', '1 cup whole milk', ...",Place a rack in middle of oven; preheat to 400...,thanksgiving-mac-and-cheese-erick-williams,"[milk, milk, garlic powder, onion powder, smok..."
3,3,Italian Sausage and Bread Stuffing,"['1 (¾- to 1-pound) round Italian loaf, cut in...",Preheat oven to 350°F with rack in middle. Gen...,italian-sausage-and-bread-stuffing-240559,"[round italian cube, olive oil, sweet italian ..."
4,4,Newton's Law,"['1 teaspoon dark brown sugar', '1 teaspoon ho...",Stir together brown sugar and hot water in a c...,newtons-law-apple-bourbon-cocktail,"[dark brown sugar, hot water, bourbon, lemon j..."


In [16]:
all_ingredients = []
def add_all_ingredients(list_x):
    for i in list_x:
        all_ingredients.append(i)

In [17]:
df['Ingredients'].apply(lambda x: add_all_ingredients(x))

0        None
1        None
2        None
3        None
4        None
         ... 
13496    None
13497    None
13498    None
13499    None
13500    None
Name: Ingredients, Length: 13501, dtype: object

In [18]:
all_ingredients_df = pd.DataFrame(all_ingredients)

In [19]:
all_ingredients_df.to_csv('/Users/chrissibierich/code/christopherbierich/all_ingredients.csv')

check whether recipe includes 30 items from fridge 
- Map most common features 
- One-Hot-Endoder
- Disregards elements considered as condiments 

### Most common features

In [20]:
all_words = []
def add_all_words(list_x):
    for i in list_x:
        split = i.split()
        for word in split:
            all_words.append(word)

In [21]:
df['Ingredients'].apply(lambda x: add_all_words(x))

0        None
1        None
2        None
3        None
4        None
         ... 
13496    None
13497    None
13498    None
13499    None
13500    None
Name: Ingredients, Length: 13501, dtype: object

In [22]:
len(all_words)

389511

In [23]:
counts = Counter(all_words)
len(counts)

7025

In [24]:
sorted_counts = sorted(counts.items(), key=lambda x:x[1])
sorted_dict = dict(sorted_counts)

In [25]:
len(sorted_dict)

7025

In [26]:
list_ingredients = []
list_frequencies = []
for key in sorted_dict.keys():
    list_ingredients.append(key)

for value in sorted_dict.values():
    list_frequencies.append(value)

In [27]:
data_freq = {'ingredient': list_ingredients, 'frequency': list_frequencies}

In [28]:
df_freq = pd.DataFrame(data=data_freq)

In [29]:
df_freq.tail(20)

Unnamed: 0,ingredient,frequency
7005,stick,2672
7006,water,2696
7007,cream,3032
7008,flour,3412
7009,onion,3577
7010,white,3622
7011,red,3632
7012,leaf,3743
7013,egg,4119
7014,clove,4136


Word for word in each string:
  if word = to any of the 30 ingredients:
   label as word from default ingredients 
   from the model or is considered a document:
  if words such as oil, broth, etc. are accomponied, do label as 
    

### Transposing labels

Create a list of condoments and ingredients from the model ()

In [30]:
default_ingredients = ['apple', 'banana', 'beef', 'blueberries',
                       'bread', 'butter', 'carrot', 'cheese',
                       'chicken', 'chocolate',
                       'corn', 'eggs', 'flour', 'cheese',
                       'beans', 'ham', 'cream',
                       'lime', 'milk', 'mushrooms','onion',
                       'potato', 'shrimp', 'spinach', 'strawberries',
                       'sugar', 'tomato']

In [31]:
lemmatized = [lemmatizer.lemmatize(w) for w in default_ingredients]
lemmatized_default = lemmatized
lemmatized_default

['apple',
 'banana',
 'beef',
 'blueberry',
 'bread',
 'butter',
 'carrot',
 'cheese',
 'chicken',
 'chocolate',
 'corn',
 'egg',
 'flour',
 'cheese',
 'bean',
 'ham',
 'cream',
 'lime',
 'milk',
 'mushroom',
 'onion',
 'potato',
 'shrimp',
 'spinach',
 'strawberry',
 'sugar',
 'tomato']

In [32]:
occurences_words = []

for word in lemmatized_default:
    
    if word in list_ingredients:
        occurences_words.append(word)
        
occurences_words

['apple',
 'banana',
 'beef',
 'blueberry',
 'bread',
 'butter',
 'carrot',
 'cheese',
 'chicken',
 'chocolate',
 'corn',
 'egg',
 'flour',
 'cheese',
 'bean',
 'ham',
 'cream',
 'lime',
 'milk',
 'mushroom',
 'onion',
 'potato',
 'shrimp',
 'spinach',
 'strawberry',
 'sugar',
 'tomato']

In [33]:
siders = ['oil', 'cider', 'broth', 'juice', 'brisket', 'cream', 'ravioli', 'sauce']

In [34]:
def re_labelling_exp_2(list_x):
    relabeled_list = []
    for i in list_x:
        word_tokens = word_tokenize(i)
        label = i
        for ingredient in lemmatized_default:
            if ingredient in word_tokens:
                if not any(word in siders for word in i.split(' ')):
                    label = ingredient
        relabeled_list.append(label)
    return relabeled_list

In [35]:
df['labelled ingredients'] = df['Ingredients'].apply(lambda x: re_labelling_exp_2(x))

In [36]:
df.head()

Unnamed: 0.1,Unnamed: 0,Title,Uncleaned_Ingredients,Instructions,Image_Name,Ingredients,labelled ingredients
0,0,Miso-Butter Roast Chicken With Acorn Squash Pa...,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...","Pat chicken dry with paper towels, season all ...",miso-butter-roast-chicken-acorn-squash-panzanella,"[chicken, salt, acorn squash, sage, rosemary, ...","[chicken, salt, acorn squash, sage, rosemary, ..."
1,1,Crispy Salt and Pepper Potatoes,"['2 large egg whites', '1 pound new potatoes (...",Preheat oven to 400°F and line a rimmed baking...,crispy-salt-and-pepper-potatoes-dan-kluger,"[egg white, potato, salt, black pepper, rosema...","[egg, potato, salt, black pepper, rosemary, th..."
2,2,Thanksgiving Mac and Cheese,"['1 cup evaporated milk', '1 cup whole milk', ...",Place a rack in middle of oven; preheat to 400...,thanksgiving-mac-and-cheese-erick-williams,"[milk, milk, garlic powder, onion powder, smok...","[milk, milk, garlic powder, onion, smoked papr..."
3,3,Italian Sausage and Bread Stuffing,"['1 (¾- to 1-pound) round Italian loaf, cut in...",Preheat oven to 350°F with rack in middle. Gen...,italian-sausage-and-bread-stuffing-240559,"[round italian cube, olive oil, sweet italian ...","[round italian cube, olive oil, sweet italian ..."
4,4,Newton's Law,"['1 teaspoon dark brown sugar', '1 teaspoon ho...",Stir together brown sugar and hot water in a c...,newtons-law-apple-bourbon-cocktail,"[dark brown sugar, hot water, bourbon, lemon j...","[sugar, hot water, bourbon, lemon juice, butte..."


What to do tomorrow:
- on-hot encouder 
- Implement condoments (oil, salt, water, )

Filter by recipes that we can never do (stuff with fish, or vegetables)

df.iloc[0,5]

In [37]:
example = df.iloc[3,6]

### One-Hot_Encoder

In [38]:
df_exp = df.head(20)

In [39]:
df_exp.head(3)

Unnamed: 0.1,Unnamed: 0,Title,Uncleaned_Ingredients,Instructions,Image_Name,Ingredients,labelled ingredients
0,0,Miso-Butter Roast Chicken With Acorn Squash Pa...,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...","Pat chicken dry with paper towels, season all ...",miso-butter-roast-chicken-acorn-squash-panzanella,"[chicken, salt, acorn squash, sage, rosemary, ...","[chicken, salt, acorn squash, sage, rosemary, ..."
1,1,Crispy Salt and Pepper Potatoes,"['2 large egg whites', '1 pound new potatoes (...",Preheat oven to 400°F and line a rimmed baking...,crispy-salt-and-pepper-potatoes-dan-kluger,"[egg white, potato, salt, black pepper, rosema...","[egg, potato, salt, black pepper, rosemary, th..."
2,2,Thanksgiving Mac and Cheese,"['1 cup evaporated milk', '1 cup whole milk', ...",Place a rack in middle of oven; preheat to 400...,thanksgiving-mac-and-cheese-erick-williams,"[milk, milk, garlic powder, onion powder, smok...","[milk, milk, garlic powder, onion, smoked papr..."


In [40]:
df_exp.head()

Unnamed: 0.1,Unnamed: 0,Title,Uncleaned_Ingredients,Instructions,Image_Name,Ingredients,labelled ingredients
0,0,Miso-Butter Roast Chicken With Acorn Squash Pa...,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...","Pat chicken dry with paper towels, season all ...",miso-butter-roast-chicken-acorn-squash-panzanella,"[chicken, salt, acorn squash, sage, rosemary, ...","[chicken, salt, acorn squash, sage, rosemary, ..."
1,1,Crispy Salt and Pepper Potatoes,"['2 large egg whites', '1 pound new potatoes (...",Preheat oven to 400°F and line a rimmed baking...,crispy-salt-and-pepper-potatoes-dan-kluger,"[egg white, potato, salt, black pepper, rosema...","[egg, potato, salt, black pepper, rosemary, th..."
2,2,Thanksgiving Mac and Cheese,"['1 cup evaporated milk', '1 cup whole milk', ...",Place a rack in middle of oven; preheat to 400...,thanksgiving-mac-and-cheese-erick-williams,"[milk, milk, garlic powder, onion powder, smok...","[milk, milk, garlic powder, onion, smoked papr..."
3,3,Italian Sausage and Bread Stuffing,"['1 (¾- to 1-pound) round Italian loaf, cut in...",Preheat oven to 350°F with rack in middle. Gen...,italian-sausage-and-bread-stuffing-240559,"[round italian cube, olive oil, sweet italian ...","[round italian cube, olive oil, sweet italian ..."
4,4,Newton's Law,"['1 teaspoon dark brown sugar', '1 teaspoon ho...",Stir together brown sugar and hot water in a c...,newtons-law-apple-bourbon-cocktail,"[dark brown sugar, hot water, bourbon, lemon j...","[sugar, hot water, bourbon, lemon juice, butte..."


In [41]:
def one_hot_encoder(dataframe):
    for ingredient in lemmatized_default:
        dataframe[ingredient] = 0
    for i in range(dataframe.shape[0]):
        for ingredient in lemmatized_default:
            if ingredient in dataframe['labelled ingredients'][i]:
                dataframe[ingredient][i] = 1
            else:
                dataframe[ingredient][i] = 0
    return dataframe

In [42]:
#df_exp_2 = one_hot_encoder(df_exp)

In [44]:
def create_matrix_row(dataframe):
    for ingredient in lemmatized_default:
        dataframe[ingredient] = 0


def one_hot_encoder_row(row):
    for ingredient in lemmatized_default:
        if ingredient in row['labelled ingredients']:
            row[ingredient][i] = 1
        else:
            row[ingredient][i] = 0
    return row

In [47]:
def one_hot_encoder_row(row):
    for ingredient in lemmatized_default:
        if ingredient in row['labelled ingredients']:
            row[ingredient] = 1
        else:
            row[ingredient] = 0
    return row

In [51]:
one_hot_encoder_row(df.iloc[0])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  row[ingredient] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  row[ingredient] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  row[ingredient] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  row[ingredient] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in th

Unnamed: 0                                                               0
Title                    Miso-Butter Roast Chicken With Acorn Squash Pa...
Uncleaned_Ingredients    ['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...
Instructions             Pat chicken dry with paper towels, season all ...
Image_Name               miso-butter-roast-chicken-acorn-squash-panzanella
Ingredients              [chicken, salt, acorn squash, sage, rosemary, ...
labelled ingredients     [chicken, salt, acorn squash, sage, rosemary, ...
apple                                                                    1
banana                                                                   0
beef                                                                     0
blueberry                                                                0
bread                                                                    1
butter                                                                   1
carrot                   

In [55]:
df.apply(one_hot_encoder_row, axis=1)

Unnamed: 0.1,Unnamed: 0,Title,Uncleaned_Ingredients,Instructions,Image_Name,Ingredients,labelled ingredients,apple,banana,beef,...,lime,milk,mushroom,onion,potato,shrimp,spinach,strawberry,sugar,tomato
0,0,Miso-Butter Roast Chicken With Acorn Squash Pa...,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...","Pat chicken dry with paper towels, season all ...",miso-butter-roast-chicken-acorn-squash-panzanella,"[chicken, salt, acorn squash, sage, rosemary, ...","[chicken, salt, acorn squash, sage, rosemary, ...",1,0,0,...,0,0,0,1,0,0,0,0,0,0
1,1,Crispy Salt and Pepper Potatoes,"['2 large egg whites', '1 pound new potatoes (...",Preheat oven to 400°F and line a rimmed baking...,crispy-salt-and-pepper-potatoes-dan-kluger,"[egg white, potato, salt, black pepper, rosema...","[egg, potato, salt, black pepper, rosemary, th...",0,0,0,...,0,0,0,0,1,0,0,0,0,0
2,2,Thanksgiving Mac and Cheese,"['1 cup evaporated milk', '1 cup whole milk', ...",Place a rack in middle of oven; preheat to 400...,thanksgiving-mac-and-cheese-erick-williams,"[milk, milk, garlic powder, onion powder, smok...","[milk, milk, garlic powder, onion, smoked papr...",0,0,0,...,0,1,0,1,0,0,0,0,0,0
3,3,Italian Sausage and Bread Stuffing,"['1 (¾- to 1-pound) round Italian loaf, cut in...",Preheat oven to 350°F with rack in middle. Gen...,italian-sausage-and-bread-stuffing-240559,"[round italian cube, olive oil, sweet italian ...","[round italian cube, olive oil, sweet italian ...",0,0,0,...,0,0,0,1,0,0,0,0,0,0
4,4,Newton's Law,"['1 teaspoon dark brown sugar', '1 teaspoon ho...",Stir together brown sugar and hot water in a c...,newtons-law-apple-bourbon-cocktail,"[dark brown sugar, hot water, bourbon, lemon j...","[sugar, hot water, bourbon, lemon juice, butte...",0,0,0,...,0,0,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13496,13496,Brownie Pudding Cake,"['1 cup all-purpose flour', '2/3 cup unsweeten...",Preheat the oven to 350°F. Into a bowl sift to...,brownie-pudding-cake-14408,"[allpurpose flour, unsweetened cocoa powder, d...","[flour, unsweetened cocoa powder, doubleacting...",0,0,0,...,0,1,0,0,0,0,0,0,1,0
13497,13497,Israeli Couscous with Roasted Butternut Squash...,"['1 preserved lemon', '1 1/2 pound butternut s...",Preheat oven to 475°F.\nHalve lemons and scoop...,israeli-couscous-with-roasted-butternut-squash...,"[preserved lemon, butternut squash seeded dice...","[preserved lemon, butternut squash seeded dice...",0,0,0,...,0,0,0,1,0,0,0,0,0,0
13498,13498,Rice with Soy-Glazed Bonito Flakes and Sesame ...,['Leftover katsuo bushi (dried bonito flakes) ...,"If using katsuo bushi flakes from package, moi...",rice-with-soy-glazed-bonito-flakes-and-sesame-...,[leftover katsuo bushi dried bonito making das...,[leftover katsuo bushi dried bonito making das...,0,0,0,...,0,0,0,0,0,0,0,0,1,0
13499,13499,Spanakopita,['1 stick (1/2 cup) plus 1 tablespoon unsalted...,Melt 1 tablespoon butter in a 12-inch heavy sk...,spanakopita-107344,"[stick butter, baby spinach, feta crumbled sca...","[butter, spinach, feta crumbled scant, nutmeg,...",0,0,0,...,0,0,0,0,0,0,1,0,0,0
