In [1]:
import pandas as pd

In [2]:
# WhatsCooking is the folder my data is in

food = pd.read_json('../WhatsCooking/TrainingData/train.json')

In [3]:
## Stolen from https://towardsdatascience.com/dealing-with-list-values-in-pandas-dataframes-a177e534f173

## What this does: makes a dataframe (whose values are booleans) from a list of lists 
    ## makes a separate column for each "distinct" item
    ## second entry is a list where each "distinct" item only appears once

def boolean_df(item_lists, unique_items):
# Create empty dict
    bool_dict = {}
    
    # Loop through all the tags
    for i, item in enumerate(unique_items):
        
        # Apply boolean mask
        bool_dict[item] = item_lists.apply(lambda x: item in x)
            
    # Return the results as a dataframe
    return pd.DataFrame(bool_dict)

In [4]:
## Getting the second input for boolean_df

## 1. Getting a list of all ingredients in all recipes
total_ingredients = []

for i in range(len(food['ingredients'])):
    total_ingredients.extend(food['ingredients'][i])
    
## 2. Changing the list of all ingredients to a series
tot_ingred_series = pd.Series(total_ingredients)

## 3. Getting the "unique" ingredients
unique = tot_ingred_series.value_counts().index

In [5]:
## Making a new dataframe 
    ## each ingredient item is listed by column and entry is True/False based on whether recipe contains it

ingred_bool = boolean_df(food['ingredients'], unique)

In [6]:
## Here we are replacing the old ingredients column with the new boolean dataframe

## 1. Removing the ingredients column from the original dataframe so that it can be replaced 
remove_ingred = food.drop(['ingredients'], axis='columns')

## 2. Concatenate original dataframe (minus ingredients column) with new boolean ingredients dataframe 
food_bool = pd.concat([remove_ingred, ingred_bool], axis=1)

In [7]:
## OPTIONAL: use 0's and 1's instead of True's and False's

FOOD = food_bool.replace({True: 1, False: 0})

At the end we have two new dataframes:
1. food_bool which uses booleans
2. FOOD which uses 0 and 1

In [9]:
food_bool

Unnamed: 0,id,cuisine,salt,olive oil,onions,water,garlic,sugar,garlic cloves,butter,...,whole garam masala,rose hip tea bags,small potatoes,cherry syrup,frozen fruit,sauerkraut juice,no-calorie sweetener,Bob Evans Italian Sausage,extra firm silken tofu,crushed cheese crackers
0,10259,greek,False,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,25693,southern_us,True,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,20130,filipino,True,False,False,False,False,False,False,True,...,False,False,False,False,False,False,False,False,False,False
3,22213,indian,True,False,False,True,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,13162,indian,True,False,True,True,False,False,False,True,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39769,29109,irish,True,False,False,False,False,False,False,True,...,False,False,False,False,False,False,False,False,False,False
39770,11462,italian,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
39771,2238,irish,True,False,False,False,False,True,False,True,...,False,False,False,False,False,False,False,False,False,False
39772,41882,chinese,False,False,False,False,False,True,False,False,...,False,False,False,False,False,False,False,False,False,False


In [10]:
FOOD

Unnamed: 0,id,cuisine,salt,olive oil,onions,water,garlic,sugar,garlic cloves,butter,...,whole garam masala,rose hip tea bags,small potatoes,cherry syrup,frozen fruit,sauerkraut juice,no-calorie sweetener,Bob Evans Italian Sausage,extra firm silken tofu,crushed cheese crackers
0,10259,greek,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,25693,southern_us,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,20130,filipino,1,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
3,22213,indian,1,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,13162,indian,1,0,1,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39769,29109,irish,1,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
39770,11462,italian,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
39771,2238,irish,1,0,0,0,0,1,0,1,...,0,0,0,0,0,0,0,0,0,0
39772,41882,chinese,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
