In [9]:
import numpy as np
import pandas as pd

In [2]:
x = np.array([2, 3, 5, 7, 11, 13])
x * 2

array([ 4,  6, 10, 14, 22, 26])

In [6]:
data = ['peter', 'Paul', 'MARY', 'gUIDO']
[s.capitalize() for s in data]

['Peter', 'Paul', 'Mary', 'Guido']

In [8]:
data = ['peter', 'Paul', None, 'MARY', 'gUIDO']
[s.capitalize() for s in data]

AttributeError: 'NoneType' object has no attribute 'capitalize'

In [10]:
names = pd.Series(data)
names

0    peter
1     Paul
2     None
3     MARY
4    gUIDO
dtype: object

In [11]:
names.str.capitalize()

0    Peter
1     Paul
2     None
3     Mary
4    Guido
dtype: object

In [19]:
names[names.isna() == False & names.str.isalpha()]

0    peter
1     Paul
3     MARY
4    gUIDO
dtype: object

In [21]:
names["b"] = 1

In [27]:
names.reindex()
names

0    peter
1     Paul
2     None
3     MARY
4    gUIDO
b        2
dtype: object

In [24]:
names

0    peter
1     Paul
2     None
3     MARY
4    gUIDO
b        1
dtype: object

In [35]:
df = pd.DataFrame([['peter', 'M', '5'], ['Paul', 'M', '5'], ['MARY', 'F', '5'], ['gUIDO', 'M', '4']])
df

Unnamed: 0,0,1,2
0,peter,M,5
1,Paul,M,5
2,MARY,F,5
3,gUIDO,M,4


In [50]:
df.loc[:, 1].str.get_dummies()

Unnamed: 0,F,M
0,0,1
1,0,1
2,1,0
3,0,1


In [52]:
df.merge(df.loc[:, 1].str.get_dummies(), left_index=True, right_index=True)

Unnamed: 0,0,1,2,F,M
0,peter,M,5,0,1
1,Paul,M,5,0,1
2,MARY,F,5,1,0
3,gUIDO,M,4,0,1


In [56]:
# json format!
try:
    recipes = pd.read_json('recipeitems-latest.json')
except ValueError as e:
    print("Error: could not read json file", e)

Error: could not read json file Trailing data


In [71]:
from io import StringIO
with open('recipeitems-latest.json') as f:
    data = (line.strip() for line in f)
    data_json = "[{0}]".format(','.join(data))

recipes = pd.read_json(StringIO(data_json))
print(recipes.shape)
recipes.head()

(173278, 17)


Unnamed: 0,_id,name,ingredients,url,image,ts,cookTime,source,recipeYield,datePublished,prepTime,description,totalTime,creator,recipeCategory,dateModified,recipeInstructions
0,{'$oid': '5160756b96cc62079cc2db15'},Drop Biscuits and Sausage Gravy,Biscuits\n3 cups All-purpose Flour\n2 Tablespo...,http://thepioneerwoman.com/cooking/2013/03/dro...,http://static.thepioneerwoman.com/cooking/file...,{'$date': 1365276011104},PT30M,thepioneerwoman,12,2013-03-11,PT10M,"Late Saturday afternoon, after Marlboro Man ha...",,,,,
1,{'$oid': '5160756d96cc62079cc2db16'},Hot Roast Beef Sandwiches,12 whole Dinner Rolls Or Small Sandwich Buns (...,http://thepioneerwoman.com/cooking/2013/03/hot...,http://static.thepioneerwoman.com/cooking/file...,{'$date': 1365276013902},PT20M,thepioneerwoman,12,2013-03-13,PT20M,"When I was growing up, I participated in my Ep...",,,,,
2,{'$oid': '5160756f96cc6207a37ff777'},Morrocan Carrot and Chickpea Salad,Dressing:\n1 tablespoon cumin seeds\n1/3 cup /...,http://www.101cookbooks.com/archives/moroccan-...,http://www.101cookbooks.com/mt-static/images/f...,{'$date': 1365276015332},,101cookbooks,,2013-01-07,PT15M,A beauty of a carrot salad - tricked out with ...,,,,,
3,{'$oid': '5160757096cc62079cc2db17'},Mixed Berry Shortcake,Biscuits\n3 cups All-purpose Flour\n2 Tablespo...,http://thepioneerwoman.com/cooking/2013/03/mix...,http://static.thepioneerwoman.com/cooking/file...,{'$date': 1365276016700},PT15M,thepioneerwoman,8,2013-03-18,PT15M,It's Monday! It's a brand new week! The birds ...,,,,,
4,{'$oid': '5160757496cc6207a37ff778'},Pomegranate Yogurt Bowl,For each bowl: \na big dollop of Greek yogurt\...,http://www.101cookbooks.com/archives/pomegrana...,http://www.101cookbooks.com/mt-static/images/f...,{'$date': 1365276020318},,101cookbooks,Serves 1.,2013-01-20,PT5M,A simple breakfast bowl made with Greek yogurt...,,,,,


In [73]:
# messy data

recipes.loc[:, "ingredients"].str.len().describe()

count    173278.000000
mean        244.617926
std         146.705285
min           0.000000
25%         147.000000
50%         221.000000
75%         314.000000
max        9067.000000
Name: ingredients, dtype: float64

In [82]:
recipes.iloc[np.argmax(recipes["ingredients"].str.len()), 1]
# or like this
# recipes["name"][np.argmax(recipes["ingredients"].str.len())]

'Carrot Pineapple Spice &amp; Brownie Layer Cake with Whipped Cream &amp; Cream Cheese Frosting and Marzipan Carrots'

In [86]:
recipes["description"].str.contains("[Bb]reakfast").sum()

3524

In [87]:
spice_list = ['salt', 'pepper', 'oregano', 'sage', 'parsley', 'rosemary', 'tarragon', 'thyme', 'paprika', 'cumin']

In [88]:
import re
spice_df = pd.DataFrame(
    dict(
        (spice, recipes["ingredients"].str.contains(spice, re.IGNORECASE)) for spice in spice_list
    )
)
print(spice_df.shape)
spice_df.head()

(173278, 10)


Unnamed: 0,salt,pepper,oregano,sage,parsley,rosemary,tarragon,thyme,paprika,cumin
0,False,False,False,True,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False
2,True,True,False,False,False,False,False,False,False,True
3,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False


In [94]:
# now we can say, give me all the recipes that contain certain spices
selection = spice_df[(spice_df["parsley"] == True) & (spice_df["paprika"] == True) & (spice_df["tarragon"] == True)]
len(selection)

10

In [104]:
recipes.loc[selection.index, "name"]
# or recipes.iloc[selection.index, 1]
# or recipes["name"][selection.index]

2069      All cremat with a Little Gem, dandelion and wa...
74964                         Lobster with Thermidor butter
93768      Burton's Southern Fried Chicken with White Gravy
113926                     Mijo's Slow Cooker Shredded Beef
137686                     Asparagus Soup with Poached Eggs
140530                                 Fried Oyster Po’boys
158475                Lamb shank tagine with herb tabbouleh
158486                 Southern fried chicken in buttermilk
163175            Fried Chicken Sliders with Pickles + Slaw
165243                        Bar Tartine Cauliflower Salad
Name: name, dtype: object

In [114]:
selection = recipes[recipes["ingredients"].str.contains("[Pp]otatoe")]
recipes.loc[selection.index, "name"]

7                     Eggs in Hash Brown Nests
19                          Pizza Potato Skins
39                       Homemade Potato Chips
72          Sweet Potato Wedges with Lime Mayo
74                         Tortilla de Patatas
                          ...                 
173201                        Hearty Beef Stew
173205    Great, Great Grandmother’s Kinklings
173227          Cheesy Scalloped Potato Gratin
173243    Asian Coconut Water Roasted Potatoes
173270                  Roasted Potato Bunnies
Name: name, Length: 7974, dtype: object

In [112]:
recipes.iloc[143829].ingredients.split("\n")

['¼ cups FOR THE SALAD:',
 '1 cup Uncooked Barley',
 '1 can Water',
 '1 can Chickpeas (15 Oz. Can) Rinsed And Drained',
 '1 whole White Cannellini Beans (15 Oz. Can) Rinsed And Drained',
 '1 whole Green Bell Pepper, Stem And Seeds Removed Then Finely Diced',
 '2 whole Fresh Tomato, Diced',
 '¼ whole Sun-Dried Tomatoes, Diced',
 '2 Tablespoons Red Onion, Chopped',
 '2 Tablespoons Chopped Fresh Basil',
 '1 Tablespoon FOR THE DRESSING:',
 '1 Tablespoon Olive Oil',
 '4 cloves Lemon Juice',
 '¼ teaspoons Low-fat Mayonnaise',
 '¼ teaspoons Garlic, Mined']