# Retreive Data from Yummly

## 1. Set up API request

In [1]:
# imports

import requests
import json
import pandas as pd
import numpy as np

In [2]:
# store ID and Key

app_id = 'e2b9bebc'
app_key = '4193215272970d956cfd5384a08580a9'

Create variables for URLs. The base_url is for the search_recipes API call. The metadata_url is for searching for valid search terms.

In [3]:
# URLs

base_url = 'http://api.yummly.com/v1/api/recipes?'
metadata_url = 'http://api.yummly.com/v1/api/metadata/'

In [4]:
# headers with yummly ID and Key

headers = {'X-Yummly-App-ID':'e2b9bebc', 'X-Yummly-App-Key':'4193215272970d956cfd5384a08580a9'}

In [5]:
# params

parameters = {'q':'soup', 'maxResult': 500} 

# NOTE: maxResult can be 1,000, limiting to 100 for now

In [6]:
# Call API

response = requests.get(base_url, headers=headers, params=parameters)

In [7]:
# Check status code

response.status_code

200

## 2. Parse JSON from API request

In [8]:
# Convert JSON to python dictionaries and lists

guac = response.json()

In [9]:
# View type of object it is

type(guac)

dict

In [10]:
# View top level keys

response_keys = guac.keys()
response_keys

[u'matches', u'totalMatchCount', u'attribution', u'facetCounts', u'criteria']

In [11]:
guac['totalMatchCount']

102607

In [13]:
# The matches key has all the data in it - view a sub dictionary

guac['matches'][15]

{u'attributes': {u'course': [u'Soups']},
 u'flavors': {u'bitter': 0.16666666666666666,
  u'meaty': 0.16666666666666666,
  u'piquant': 0.3333333333333333,
  u'salty': 0.16666666666666666,
  u'sour': 0.3333333333333333,
  u'sweet': 0.16666666666666666},
 u'id': u'Spicy-Chicken-Soup-1613454',
 u'imageUrlsBySize': {u'90': u'https://lh3.googleusercontent.com/mBGqwouu7qm1LlA2yyiGB_SYerdGo7TL4rV9gqwlixqzzu3hZ1CNButZWhZVQxMlEsuu9LqC5mD01NMApfNRxg=s90-c'},
 u'ingredients': [u'rotisserie chicken',
  u'olive oil',
  u'onions',
  u'shiitake',
  u'garlic cloves',
  u'ginger',
  u'low sodium chicken broth',
  u'cayenne pepper',
  u'baby spinach',
  u'kosher salt',
  u'freshly ground pepper',
  u'scallions',
  u'lime wedges'],
 u'rating': 3,
 u'recipeName': u'Spicy Chicken Soup',
 u'smallImageUrls': [u'https://lh3.googleusercontent.com/obmx5d3TapQ5e0GX9XVgbxA6_Sxi9t-oddn6xoXQZU-I97PfvKU1ud_ixo0oLUHZuz-X3Ot7EJjOI2m9Dpcd=s90'],
 u'sourceDisplayName': u'Bon App\xe9tit',
 u'totalTimeInSeconds': 2100}

## 3. Extract data, place into dictionaries

In [14]:
# Create dicts to put data into

recipe_info_dict = {}
flavors_dict = {}
ingredients_dict = {}
courses_dict = {}
cuisine_dict = {}

In [15]:
# pull data in for loop
for item in guac['matches']:
    
    # Get basic recipe info and put into list
    recipe_info = []
    recipe_info.append(item.get('recipeName'))
    recipe_info.append(item.get('totalTimeInSeconds'))
    recipe_info.append(item.get('sourceDisplayName'))
    recipe_info.append(item.get('rating'))
    
    # Add to recipe_info_dict
    recipe_info_dict[item.get('id')] = recipe_info
    
    # Add data to dicts for courses, flavors and cuisines
    courses_dict[item.get('id')] = item['attributes'].get('course')
    flavors_dict[item.get('id')] = item.get('flavors')
    cuisine_dict[item.get('id')] = item['attributes'].get('cuisine')
    ingredients_dict[item.get('id')] = [x.lower() for x in item.get('ingredients')]


## 4. Convert dictionaries to pandas DataFrames

## 4a. Ingredients

In [106]:
# Read in master_ingredients list

master_ingredients = pd.read_csv('../Assets/master_ingredients.txt', names=['ingredient'])

In [107]:
# Convert all ingredients to lowercase and put into list

master_ingredients_list = [x.lower() for x in master_ingredients.ingredient]

In [108]:
# Use for loop to check each list of ingredients for membership in master ingredients

ingredients_dict_pandas = {}
for recipe, ingredient_list in ingredients_dict.iteritems():
    ing_dict = {}
    for master_ingredient in master_ingredients_list:
        if master_ingredient in ingredient_list:
            ing_dict[master_ingredient] = 1
        else:
            ing_dict[master_ingredient] = 0
    
        ingredients_dict_pandas[recipe] = ing_dict



In [109]:
# Convert to dataframe

ingredients_df = pd.DataFrame(ingredients_dict_pandas)

In [110]:
# Transpose

ingredients_df = ingredients_df.T

In [113]:
# View head

ingredients_df.head()

UnicodeDecodeError: 'ascii' codec can't decode byte 0xd7 in position 0: ordinal not in range(128)

                                                 cake   corn bread  \
0_-of-the-calories_-Beet-soup-1684369               0            0   
13-Bean-Soup-1682837                                0            0   
15-Bean-Turkey-Soup-1378516                         0            0   
20-Minute-Homemade-Chicken-Noodle-Soup-1278881      0            0   
20-Minute-Miso-Turmeric-Soup-998359                 0            0   

                                                (ri) 1 whiskey  0 flour  \
0_-of-the-calories_-Beet-soup-1684369                        0        0   
13-Bean-Soup-1682837                                         0        0   
15-Bean-Turkey-Soup-1378516                                  0        0   
20-Minute-Homemade-Chicken-Noodle-Soup-1278881               0        0   
20-Minute-Miso-Turmeric-Soup-998359                          0        0   

                                                00 flour  \
0_-of-the-calories_-Beet-soup-1684369                  0   
13-Bean-

## 4b. Recipe info

In [25]:
recipe_info_df = pd.DataFrame.from_dict(recipe_info_dict, orient='index')

In [26]:
recipe_info_df.columns = ['rec_name', 'tot_time_seconds', 'rec_source', 
                         'rating']

In [27]:
recipe_info_df.head()

Unnamed: 0,rec_name,tot_time_seconds,rec_source,rating
Detoxifying-Carrot-Soup-1683466,Detoxifying Carrot Soup,1800.0,Organic Authority,3
Pumpkin-Soup-1310493,Pumpkin Soup,2700.0,The Pioneer Woman,4
Gazpacho-Soup-1595161,Gazpacho Soup,1200.0,Xander Friendly Foods,4
Tomato-Soup-_Salmorejo_-1690687,Tomato Soup (Salmorejo),3900.0,Perspective Portions,4
Chicken-Cassoulet-Style-Soup-1627530,Chicken Cassoulet-Style Soup,26700.0,Better Homes and Gardens,3


## 4c. Flavors

In [28]:
# create flavor_df

flavor_df = pd.DataFrame(flavors_dict).transpose()

In [29]:
# view flavor_df
flavor_df.head()

Unnamed: 0,bitter,meaty,piquant,salty,sour,sweet
0_-of-the-calories_-Beet-soup-1684369,0.166667,0.166667,0.0,0.166667,0.833333,0.333333
13-Bean-Soup-1682837,0.166667,0.0,0.166667,0.166667,0.166667,0.166667
15-Bean-Turkey-Soup-1378516,0.166667,0.166667,0.0,0.166667,0.166667,0.166667
20-Minute-Homemade-Chicken-Noodle-Soup-1278881,0.166667,0.166667,0.0,0.166667,0.166667,0.166667
20-Minute-Miso-Turmeric-Soup-998359,,,,,,


## 4d. Courses

In [30]:
# Read in master course list
master_course_df = pd.read_csv('../Assets/master_courses.csv', names=['course'])

In [31]:
# Get course column
master_course_list = master_course_df.course

In [32]:
# For loop to generate dict for dataframe
courses_dict_pandas = {}
for recipe, course_list in courses_dict.iteritems():
    cour_dict = {}
    for master_course in master_course_list:
        try:
            if master_course in course_list:
                cour_dict[master_course] = 1
            else:
                cour_dict[master_course] = 0
        except TypeError:
            cour_dict[master_course] = 0
    
        courses_dict_pandas[recipe] = cour_dict

In [33]:
# Create DataFrame
courses_df = pd.DataFrame(courses_dict_pandas)

In [34]:
# Transpose
courses_df = courses_df.T

In [35]:
courses_df.head(2)

Unnamed: 0,Appetizers,Beverages,Breads,Breakfast and Brunch,Cocktails,Condiments and Sauces,Desserts,Lunch,Main Dishes,Salads,Side Dishes,Snacks,Soups
0_-of-the-calories_-Beet-soup-1684369,0,0,0,0,0,0,0,0,0,0,0,0,1
13-Bean-Soup-1682837,0,0,0,0,0,0,0,0,0,0,0,0,1


## 4e. Cuisines

In [36]:
# Read in master cuisines
master_cuisine_df = pd.read_csv('../Assets/master_cuisines.csv', names = ['cuisine'])

In [37]:
# Pull out cuisine column
master_cuisine_list = master_cuisine_df.cuisine

In [38]:
cuisine_dict_pandas = {}
for recipe, cuisine_list in cuisine_dict.iteritems():
    cuis_dict = {}
    for master_cuisine in master_cuisine_list:
        try:
            if master_cuisine in cuisine_list:
                cuis_dict[master_cuisine] = 1
            else:
                cuis_dict[master_cuisine] = 0
        except TypeError:
            cuis_dict[master_cuisine] = 0
    
        cuisine_dict_pandas[recipe] = cuis_dict

In [39]:
cuisine_df = pd.DataFrame(cuisine_dict_pandas)

In [40]:
cuisine_df = cuisine_df.T

In [41]:
cuisine_df.head(2)

Unnamed: 0,American,Asian,Barbecue,Cajun & Creole,Chinese,Cuban,English,French,German,Greek,...,Kid-Friendly,Mediterranean,Mexican,Moroccan,Portuguese,Southern & Soul Food,Southwestern,Spanish,Swedish,Thai
0_-of-the-calories_-Beet-soup-1684369,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
13-Bean-Soup-1682837,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# Pushing DataFrames up to SQL

In [99]:
from sqlalchemy import create_engine

In [116]:
engine = create_engine('postgresql://treytrey3:113315th3@recipeproject3.czcsc2tr7kct.us-east-1.rds.amazonaws.com:5432/dsicapstone3')

In [101]:
name = 'ingredients'

In [120]:
# this is giving an error
# ingredients_df.to_sql(name, engine, flavor='postgres', if_exists='replace')

In [64]:
def clean_text(row):
    # return the list of decoded cell in the Series instead 
    return [r.decode('unicode_escape').encode('ascii', 'ignore') for r in row]