# Retreive Data from Yummly

## 1. Set up API request

In [340]:
# imports

import requests
import json
import pandas as pd
import numpy as np

In [341]:
# store ID and Key

app_id = 'e2b9bebc'
app_key = '4193215272970d956cfd5384a08580a9'

Create variables for URLs. The base_url is for the search_recipes API call. The metadata_url is for searching for valid search terms.

In [380]:
# URLs

base_url = 'http://api.yummly.com/v1/api/recipes?'
metadata_url = 'http://api.yummly.com/v1/api/metadata/'

In [381]:
# headers with yummly ID and Key

_headers = {'X-Yummly-App-ID':'e2b9bebc', 'X-Yummly-App-Key':'4193215272970d956cfd5384a08580a9'}

In [382]:
# params

parameters = {'q':'brownies', 'maxResult': 500, 'start':4500} 

# NOTE: maxResult can be 1,000, limiting to 100 for now

In [383]:
# Call API

response = requests.get(base_url, headers=_headers, params=parameters)

In [384]:
# Check status code

response.status_code

200

## 2. Parse JSON from API request

In [385]:
# Convert JSON to python dictionaries and lists

api_call = response.json()

In [386]:
# View type of object it is

type(api_call)

dict

In [387]:
# View top level keys

response_keys = api_call.keys()
response_keys

[u'matches', u'totalMatchCount', u'attribution', u'facetCounts', u'criteria']

In [388]:
api_call['totalMatchCount']

15040

In [389]:
# The matches key has all the data in it - view a sub dictionary

api_call['matches'][15]

{u'attributes': {u'course': [u'Desserts']},
 u'flavors': {u'bitter': 0.16666666666666666,
  u'meaty': 0.6666666666666666,
  u'piquant': 0.0,
  u'salty': 0.16666666666666666,
  u'sour': 0.6666666666666666,
  u'sweet': 0.8333333333333334},
 u'id': u'Orange-Chocolate-Brownies-1181575',
 u'imageUrlsBySize': {u'90': u'http://lh3.googleusercontent.com/RMTtmxEoekuzuIWMxftRVty0wIfjS1PcdoKYsSyPu4zDlCngUbjnJTe-_b6xSM-yMRbGneXL_G84Tc9jQ56JgA=s90-c'},
 u'ingredients': [u'dark chocolate',
  u'eggs',
  u'sugar',
  u'orange juice',
  u'olive oil',
  u'cocoa powder',
  u'all-purpose flour',
  u'baking powder',
  u'orange zest'],
 u'rating': 4,
 u'recipeName': u'Orange Chocolate Brownies',
 u'smallImageUrls': [u'http://lh3.googleusercontent.com/5IrIP0VD7yggDP8gJ6637ay7G91ApJKdGpEFNoRbY_7bqD3cXN8y7Rn-YDt1hJo24CpkUOGb2sm2lmSt1-NeRw=s90'],
 u'sourceDisplayName': u'Give Recipe',
 u'totalTimeInSeconds': 2700}

## 3. Extract data, place into dictionaries

In [390]:
# Create dicts to put data into

recipe_info_dict = {}
flavors_dict = {}
ingredients_dict = {}
courses_dict = {}
cuisine_dict = {}

In [391]:
# pull data in for loop
for item in api_call['matches']:
    
    # Get basic recipe info and put into list
    recipe_info = []
    recipe_info.append(item.get('recipeName'))
    recipe_info.append(item.get('totalTimeInSeconds'))
    recipe_info.append(item.get('sourceDisplayName'))
    recipe_info.append(item.get('rating'))
    
    # Add to recipe_info_dict
    recipe_info_dict[item.get('id')] = recipe_info
    
    # Add data to dicts for courses, flavors and cuisines
    courses_dict[item.get('id')] = item['attributes'].get('course')
    flavors_dict[item.get('id')] = item.get('flavors')
    cuisine_dict[item.get('id')] = item['attributes'].get('cuisine')
    ingredients_dict[item.get('id')] = [x.lower() for x in item.get('ingredients')]


## 4. Convert dictionaries to pandas DataFrames

## 4a. Ingredients

In [392]:
# Read in master_ingredients list

master_ingredients = pd.read_csv('../Assets/master_ingredients.txt', names=['ingredient'])

In [393]:
# Convert all ingredients to lowercase and put into list

master_ingredients_list = [x.lower() for x in master_ingredients.ingredient]

In [394]:
# Use for loop to check each list of ingredients for membership in master ingredients

ingredients_dict_pandas = {}
for recipe, ingredient_list in ingredients_dict.iteritems():
    ing_dict = {}
    for master_ingredient in master_ingredients_list:
        if master_ingredient in ingredient_list:
            ing_dict[master_ingredient] = 1
        else:
            ing_dict[master_ingredient] = 0
    
        ingredients_dict_pandas[recipe] = ing_dict



In [395]:
# Convert to dataframe

ingredients_df = pd.DataFrame(ingredients_dict_pandas)

In [396]:
# Transpose

ingredients_df = ingredients_df.T

In [397]:
# View head

ingredients_df.head(2)

Unnamed: 0,cake,corn bread,(ri) 1 whiskey,0 flour,00 flour,1% chocolate low-fat milk,1% low-fat buttermilk,1% low-fat chocolate milk,1% low-fat cottage cheese,1% low-fat evaporated milk,...,zoom quick hot cereal,zucchini,zucchini blossoms,zucchini noodles,zucchini salad,zuivelspread,zulka pure cane sugar,zuurkoolspek,zwack liqueur,zwieback
100-Calorie-Raspberry-Chocolate-Chip-Protein-Brownies-1118036,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
100-calorie-Black-Bean-Brownies-1265945,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## 4b. Recipe info

In [398]:
recipe_info_df = pd.DataFrame.from_dict(recipe_info_dict, orient='index')

In [399]:
recipe_info_df.columns = ['rec_name', 'tot_time_seconds', 'rec_source', 
                         'rating']

In [400]:
recipe_info_df.head()

Unnamed: 0,rec_name,tot_time_seconds,rec_source,rating
Heavenly-Hash-Brownies-1175252,Heavenly Hash Brownies,1800.0,It Bakes Me Happy,4
Spinach-Brownies-1607705,Spinach Brownies,3300.0,AllRecipes,4
Coffee-cream-brownies-344321,Coffee Cream Brownies,3000.0,MyRecipes,4
White-Chocolate-Oreo-Brownie-Parfait-578476,White Chocolate Oreo Brownie Parfait,3420.0,Inside BruCrew Life,4
Chewy-Chocolate-Brownies-with-a-Whipped-Cookies-_-Cream-_-Mascarpone-Topping-1008194,Chewy Chocolate Brownies with a Whipped Cookie...,2700.0,Port and Fin,4


## 4c. Flavors

In [401]:
# create flavor_df

flavor_df = pd.DataFrame(flavors_dict).transpose()

In [402]:
# view flavor_df
flavor_df.head()

Unnamed: 0,bitter,meaty,piquant,salty,sour,sweet
100-Calorie-Raspberry-Chocolate-Chip-Protein-Brownies-1118036,0.833333,0.833333,0.0,0.166667,0.666667,0.666667
100-calorie-Black-Bean-Brownies-1265945,,,,,,
2-Ingredient-Nutella-Brownies-_Gluten-Free_-1027379,0.166667,0.833333,0.0,0.166667,0.0,0.833333
3-Ingredient-Fudgy-Chocolate-Blender-Brownies-1587022,0.833333,0.5,0.0,0.0,0.5,0.5
3-Ingredient-Fudgy-Nutella-Brownie-Bites-Secret-Recipe-Club-1326403,0.166667,0.833333,0.0,0.166667,0.0,0.833333


## 4d. Courses

In [403]:
# Read in master course list
master_course_df = pd.read_csv('../Assets/master_courses.csv', names=['course'])

In [404]:
# Get course column
master_course_list = master_course_df.course

In [405]:
# For loop to generate dict for dataframe
courses_dict_pandas = {}
for recipe, course_list in courses_dict.iteritems():
    cour_dict = {}
    for master_course in master_course_list:
        try:
            if master_course in course_list:
                cour_dict[master_course] = 1
            else:
                cour_dict[master_course] = 0
        except TypeError:
            cour_dict[master_course] = 0
    
        courses_dict_pandas[recipe] = cour_dict

In [406]:
# Create DataFrame
courses_df = pd.DataFrame(courses_dict_pandas)

In [407]:
# Transpose
courses_df = courses_df.T

In [408]:
courses_df.head(2)

Unnamed: 0,Appetizers,Beverages,Breads,Breakfast and Brunch,Cocktails,Condiments and Sauces,Desserts,Lunch,Main Dishes,Salads,Side Dishes,Snacks,Soups
100-Calorie-Raspberry-Chocolate-Chip-Protein-Brownies-1118036,0,0,0,0,0,0,1,0,0,0,0,0,0
100-calorie-Black-Bean-Brownies-1265945,0,0,0,0,0,0,1,0,0,0,0,0,0


## 4e. Cuisines

In [409]:
# Read in master cuisines
master_cuisine_df = pd.read_csv('../Assets/master_cuisines.csv', names = ['cuisine'])

In [410]:
# Pull out cuisine column
master_cuisine_list = master_cuisine_df.cuisine

In [411]:
cuisine_dict_pandas = {}
for recipe, cuisine_list in cuisine_dict.iteritems():
    cuis_dict = {}
    for master_cuisine in master_cuisine_list:
        try:
            if master_cuisine in cuisine_list:
                cuis_dict[master_cuisine] = 1
            else:
                cuis_dict[master_cuisine] = 0
        except TypeError:
            cuis_dict[master_cuisine] = 0
    
        cuisine_dict_pandas[recipe] = cuis_dict

In [412]:
cuisine_df = pd.DataFrame(cuisine_dict_pandas)

In [413]:
cuisine_df = cuisine_df.T

In [414]:
cuisine_df.head(2)

Unnamed: 0,American,Asian,Barbecue,Cajun & Creole,Chinese,Cuban,English,French,German,Greek,...,Kid-Friendly,Mediterranean,Mexican,Moroccan,Portuguese,Southern & Soul Food,Southwestern,Spanish,Swedish,Thai
100-Calorie-Raspberry-Chocolate-Chip-Protein-Brownies-1118036,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
100-calorie-Black-Bean-Brownies-1265945,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# Pushing DataFrames up to SQL

In [93]:
from sqlalchemy import create_engine

In [75]:
engine = create_engine('postgresql://treytrey3:113315th3@recipeproject3.czcsc2tr7kct.us-east-1.rds.amazonaws.com:5432/dsicapstone3')

In [81]:
name = 'flavors'

In [82]:
# this is giving an error
flavor_df.to_sql(name, engine, flavor='postgres', if_exists='replace')

In [64]:
def clean_text(row):
    # return the list of decoded cell in the Series instead 
    return [r.decode('unicode_escape').encode('ascii', 'ignore') for r in row]

# Sending Dataframes to csv

In [415]:
# cuisine_df.to_csv('/Users/michaelshea/Desktop/Class/Capstone/Assets/API-pulls/brownies/cuisines/cuisine10.csv', encoding='utf-8')

In [416]:
# courses_df.to_csv('/Users/michaelshea/Desktop/Class/Capstone/Assets/API-pulls/brownies/courses/courses10.csv', encoding='utf-8')

In [417]:
# flavor_df.to_csv('/Users/michaelshea/Desktop/Class/Capstone/Assets/API-pulls/brownies/flavors/flavors10.csv', encoding='utf-8')

In [418]:
# recipe_info_df.to_csv('/Users/michaelshea/Desktop/Class/Capstone/Assets/API-pulls/brownies/recipe_info/recipe_info10.csv', encoding='utf-8')

In [419]:
# ingredients_df.to_csv('/Users/michaelshea/Desktop/Class/Capstone/Assets/API-pulls/brownies/ingredients/ingredients10.csv', encoding='utf-8')