## This code pulls recipes from the [Yummly Recipe API](https://developer.yummly.com/documentation)

You need to request access to the API before using the following code

In [1]:
import json
import requests
import pandas as pd
import numpy as np 
import re

### Build functions 

##### 1. In order to make API call,  first need to get proper search terms from the API

In [2]:
def get_search_terms(ID, KEY, term_type):
    
    '''Prints out search terms to choose from for the
    'allowed_course', 'excluded_course', 'allowed_cuisine', 
    'excluded_cuisine' parameters of the make_api_call function.
    The term_type parameter can be either 'course' or 'cuisine'.'''
    
    response = requests.get('http://api.yummly.com/v1/api/metadata/%s?_app_id=%s&_app_key=%s' % (term_type, ID, KEY))
    if response.status_code == 200:
        print "Status code normal"
        # Slice junk off beggining and end because response is in JSONP
        json_string = response.text[23:-2]
        # Decode JSON
        terms = json.loads(json_string)
        # Print out only what you want, the list of terms
        print [x['searchValue'] for x in terms]
    else:
        print "Status code error: %r" % response.status_code


##### 2. Make API call

In [3]:
def make_api_call(ID, KEY, allowed_course, excluded_course, allowed_cuisine, excluded_cuisine, max_result, start):
    
    '''Requests 500 recipes from the Yummly recipe API.
    The parameters 'ID' and 'KEY' are API credentials,
    'allowed_course', 'excluded course' 'allowed_cuisine' and
    'exluded_cuisine' can be strings or lists. 
    For proper formatting for these, use the search_terms
    function above.
    If the 'start' parameter is 0, results 0-499 will 
    be returned.'''
    
    url = 'http://api.yummly.com/v1/api/recipes?'
    headers = {'X-Yummly-App-ID':ID, 'X-Yummly-App-Key': KEY}
    parameters = {'allowedCourse[]': allowed_course, 
                  'excludedCourse[]': excluded_course,
                  'allowedCuisine[]': allowed_cuisine, 
                  'excludedCuisine[]': excluded_cuisine,
                  'maxResult': max_result, 
                  'start': start}
    response = requests.get(url, headers=headers, params=parameters)
    if response.status_code == 200:
        print "Status code normal"
        return response.json()
    else:
        print "Status code error: %r" % response.status_code

### Sample API pull

##### Get cuisine search terms

In [5]:
# Define parameters
my_id = 'e2b9bebc'
my_key = '4193215272970d956cfd5384a08580a9'
term = 'cuisine'

# Get cuisine terms
get_search_terms(ID=my_id, KEY=my_key, term_type=term)

Status code normal
[u'cuisine^cuisine-american', u'cuisine^cuisine-kid-friendly', u'cuisine^cuisine-italian', u'cuisine^cuisine-asian', u'cuisine^cuisine-mexican', u'cuisine^cuisine-southern', u'cuisine^cuisine-french', u'cuisine^cuisine-southwestern', u'cuisine^cuisine-barbecue-bbq', u'cuisine^cuisine-indian', u'cuisine^cuisine-chinese', u'cuisine^cuisine-cajun', u'cuisine^cuisine-mediterranean', u'cuisine^cuisine-greek', u'cuisine^cuisine-english', u'cuisine^cuisine-spanish', u'cuisine^cuisine-thai', u'cuisine^cuisine-german', u'cuisine^cuisine-moroccan', u'cuisine^cuisine-irish', u'cuisine^cuisine-japanese', u'cuisine^cuisine-cuban', u'cuisine^cuisine-hawaiian', u'cuisine^cuisine-swedish', u'cuisine^cuisine-hungarian', u'cuisine^cuisine-portuguese']


##### Get course search terms

In [6]:
# Define parameters
my_id = 'e2b9bebc'
my_key = '4193215272970d956cfd5384a08580a9'
term = 'course'

# Get course terms
get_search_terms(ID=my_id, KEY=my_key, term_type=term)

Status code normal
[u'course^course-Main Dishes', u'course^course-Desserts', u'course^course-Side Dishes', u'course^course-Appetizers', u'course^course-Salads', u'course^course-Breakfast and Brunch', u'course^course-Breads', u'course^course-Soups', u'course^course-Beverages', u'course^course-Condiments and Sauces', u'course^course-Cocktails', u'course^course-Snacks', u'course^course-Lunch']


##### Make API pull for Asian appetizers

In [7]:
# Define parameters
my_id = 'e2b9bebc'
my_key = '4193215272970d956cfd5384a08580a9'
allowed_course = 'course^course-Appetizers'
excluded_course = 'course^course-Main Dishes'
allowed_cuisine = 'cuisine^cuisine-asian'
excluded_cuisine = ['cuisine^cuisine-american', 'cuisine^cuisine-italian', 'cuisine^cuisine-indian',
                                    'cuisine^cuisine-mexican', 'cuisine^cuisine-mediterranean', 'cuisine^cuisine-chinese',
                                   'cuisine^cuisine-japanese']
max_result = 100
start = 500

# Make API call
api_call = make_api_call(ID=my_id, KEY=my_key, 
                         allowed_course=allowed_course, excluded_course=excluded_course,
                        allowed_cuisine=allowed_cuisine, excluded_cuisine=excluded_cuisine,
                        max_result=max_result, start=start)

Status code normal


### Extract relevant data

##### 1. Get information about the API response

In [8]:
# Get data type
print "Data type:", type(api_call)

# Print keys
response_keys = api_call.keys()
print "Response keys:", response_keys

# Print total match count
print "Total match count:", api_call['totalMatchCount']

Data type: <type 'dict'>
Response keys: [u'matches', u'totalMatchCount', u'attribution', u'facetCounts', u'criteria']
Total match count: 1737


##### 2. Extract data and store in a list of dictionaries

In [9]:
# Iterate through the content in the 'matches' key of the response
main_list = [] 
for search_result in api_call['matches']:
    sub_dict = {}
    sub_dict['id'] = search_result['id']
    sub_dict['recipe_name'] = search_result['recipeName']
    sub_dict['source_display_name'] = search_result['sourceDisplayName']
    sub_dict['course'] = search_result['attributes'].get('course')
    sub_dict['cuisine'] = search_result['attributes'].get('cuisine')
    sub_dict['ingredient_list'] = search_result['ingredients']
    
    main_list.append(sub_dict)

##### 3. Build pandas DataFrame

In [10]:
df = pd.DataFrame(main_list)

### From here, to store the data, you can either: 
1. Send this to a postgres instance on AWS
    - Use the code in the send_to_AWS_RDS folder if you choose this option
2. Save the dataframe as a csv

Regardless of how you store it, you have to make many API calls, since there's a limit to the number of results per call. To get 20k recipes, I made forty calls at 500 each. 