# Searching for Main Courses by Cuisine

## 1. Get apprpriate search parameters

In [1]:
import requests
import pandas as pd
import numpy as np
import json
import re
from sklearn.feature_extraction.text import CountVectorizer

In [2]:
# Course search terms
allowedCourse = requests.get('http://api.yummly.com/v1/api/metadata/course?_app_id=e2b9bebc&_app_key=4193215272970d956cfd5384a08580a9')

# Cuisine search terms
allowedCuisine = requests.get('http://api.yummly.com/v1/api/metadata/cuisine?_app_id=e2b9bebc&_app_key=4193215272970d956cfd5384a08580a9')

In [3]:
# Slice crap off beggining and end
response_course = allowedCourse.text[23:-2]
response_cuisine = allowedCuisine.text[24:-2]

In [4]:
# Print search terms
course_list = json.loads(response_course)
cuisine_list = json.loads(response_cuisine)
print [x['searchValue'] for x in course_list]
print [x['searchValue'] for x in cuisine_list]

[u'course^course-Main Dishes', u'course^course-Desserts', u'course^course-Side Dishes', u'course^course-Appetizers', u'course^course-Salads', u'course^course-Breakfast and Brunch', u'course^course-Breads', u'course^course-Soups', u'course^course-Beverages', u'course^course-Condiments and Sauces', u'course^course-Cocktails', u'course^course-Snacks', u'course^course-Lunch']
[u'cuisine^cuisine-american', u'cuisine^cuisine-kid-friendly', u'cuisine^cuisine-italian', u'cuisine^cuisine-asian', u'cuisine^cuisine-mexican', u'cuisine^cuisine-southern', u'cuisine^cuisine-french', u'cuisine^cuisine-southwestern', u'cuisine^cuisine-barbecue-bbq', u'cuisine^cuisine-indian', u'cuisine^cuisine-chinese', u'cuisine^cuisine-cajun', u'cuisine^cuisine-mediterranean', u'cuisine^cuisine-greek', u'cuisine^cuisine-english', u'cuisine^cuisine-spanish', u'cuisine^cuisine-thai', u'cuisine^cuisine-german', u'cuisine^cuisine-moroccan', u'cuisine^cuisine-irish', u'cuisine^cuisine-japanese', u'cuisine^cuisine-cuban',

## 2. Begin querying API

### Main courses / American

In [93]:
url = 'http://api.yummly.com/v1/api/recipes?'
headers = {'X-Yummly-App-ID':'e2b9bebc', 'X-Yummly-App-Key':'4193215272970d956cfd5384a08580a9'}

In [94]:
parameters = {'allowedCourse[]': 'course^course-Main Dishes', 
              'excludedCourse[]': ['course^course-Appetizers', 'course^course-Salads', 
                                   'course^course-Condiments and Sauces',
                                   'course^course-Lunch', 'course^course-Soups', 'course^course-Snacks',
                                   'course^course-Breakfast and Brunch', 'course^course-Side Dishes',],
              'allowedCuisine[]': 'cuisine^cuisine-american',
              'excludedCuisine[]': ['cuisine^cuisine-italian', 'cuisine^cuisine-mexican', 'cuisine^cuisine-indian',
                                    'cuisine^cuisine-mediterranean', 'cuisine^cuisine-asian'],
              'maxResult': 500}

In [95]:
response = requests.get(url, headers=headers, params=parameters)

In [96]:
response.status_code

200

In [97]:
# Decode JSON
api_call = response.json()
print type(api_call)

# Print keys
response_keys = api_call.keys()
print response_keys

# Print total match count
print api_call['totalMatchCount']

<type 'dict'>
[u'matches', u'totalMatchCount', u'attribution', u'facetCounts', u'criteria']
2970


In [111]:
# Create dicts to put data into

recipe_info_dict = {}
flavors_dict = {}
ingredients_dict = {}
courses_dict = {}
cuisine_dict = {}

In [112]:
# pull data in for loop
for item in api_call['matches']:
    
    # Get basic recipe info and put into list
    recipe_info = []
    recipe_info.append(item.get('recipeName'))
    recipe_info.append(item.get('totalTimeInSeconds'))
    recipe_info.append(item.get('sourceDisplayName'))
    recipe_info.append(len(item.get('ingredients')))
    recipe_info.append(item['attributes'].get('course'))
    recipe_info.append(item.get('rating'))
    recipe_info.append('American')
    
    # Add to recipe_info_dict
    recipe_info_dict[item.get('id')] = recipe_info
    
    # Add data to dicts for flavors and cuisines
    flavors_dict[item.get('id')] = item.get('flavors')
    cuisine_dict[item.get('id')] = item['attributes'].get('cuisine')
    
    # Process ingredients before putting in dictionary
    ingredient_string = ' '.join(item.get('ingredients')) # convert list to string
    ingredient_string = re.sub(r'\d+%\s', '', ingredient_string) # regex remove 1%, 2% etc
    ingredient_string = re.sub(r'\xae', '', ingredient_string) # regex remove \xae characters
    
    # Add ingredients to dict now
    ingredients_dict[item.get('id')] = ingredient_string 

#### Recipe info df

In [12]:
recipe_info_df = pd.DataFrame.from_dict(recipe_info_dict, orient='index')

recipe_info_df.columns = ['rec_name', 'tot_time_seconds', 'rec_source', 
                          'num_ingredients', 'course', 'rating', 'cuisine']

#### Flavor df

In [13]:
flavor_df = pd.DataFrame(flavors_dict).transpose()

#### Ingredient df

In [113]:
ingredients_df = pd.DataFrame.from_dict(ingredients_dict, orient='index')
ingredients_df.columns = ['Ingredients']

In [114]:
type(ingredients_df.Ingredients[0])

unicode

In [115]:
vect = CountVectorizer(stop_words='english')
vect.fit(ingredients_df.Ingredients)

CountVectorizer(analyzer=u'word', binary=False, decode_error=u'strict',
        dtype=<type 'numpy.int64'>, encoding=u'utf-8', input=u'content',
        lowercase=True, max_df=1.0, max_features=None, min_df=1,
        ngram_range=(1, 1), preprocessor=None, stop_words='english',
        strip_accents=None, token_pattern=u'(?u)\\b\\w\\w+\\b',
        tokenizer=None, vocabulary=None)

In [116]:
dtm = vect.transform(ingredients_df.Ingredients)
dtm.toarray()

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 1, 0],
       ..., 
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [135]:
df = pd.DataFrame(dtm.toarray(), columns=vect.get_feature_names(), index=ingredients_df.index)
df.head()

Unnamed: 0,acorn,adobo,allspice,almonds,american,ancho,andouille,apple,apples,apricots,...,wing,wondra,worcestershire,yellow,yoghurt,yogurt,york,yukon,zest,zucchini
Pressure-Cooker-Chicken_-Sausage-and-Shrimp-Jambalaya---Instant-Pot-1502891,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
Grilled-New-Orleans_style-Shrimp-Epicurious,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
Citrus_rubbed-Skirt-Steak-My-Recipes,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
Baked-Flounder-with-Fresh-Lemon-Pepper-1633060,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
Roasted-Oysters-Decadence-My-Recipes,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [126]:
from nltk import word_tokenize
from nltk import pos_tag

In [133]:
text = word_tokenize(ingredients_df.Ingredients[2]) 

In [128]:
# for i in pos_tag(text):
#     if i[1] == 'JJ':
#         print i[0]
#     elif i[1] == 'VBZ':
#         print i[0]
#     elif i[1] == 'VBG':
#         print i[0]

In [134]:
pos_tag(text)

[(u'grated', 'VBN'),
 (u'lemon', 'JJ'),
 (u'zest', 'NN'),
 (u'grated', 'VBD'),
 (u'orange', 'NN'),
 (u'kosher', 'NN'),
 (u'salt', 'NN'),
 (u'ground', 'NN'),
 (u'red', 'VBD'),
 (u'pepper', 'IN'),
 (u'garlic', 'JJ'),
 (u'cloves', 'NNS'),
 (u'skirt', 'VBP'),
 (u'steak', 'JJ'),
 (u'cooking', 'NN'),
 (u'spray', 'NN')]

Regex and vectorizer testing 

In [26]:
# my_string = re.sub(r'\d+%\s', '', my_string)

In [27]:
# my_string = my_string.lower()
# subs = re.sub(r'\xae', '', my_string)

In [31]:
# first_set = ingredients_dict.values()[0]
# ingr_string = ' '.join(first_set)
# ingr_string

In [138]:
# set_of_ten = ingredients_dict.values()
# list_of_strings = []
# for obj in set_of_ten:
#     ingr_string = ' '.join(obj)
#     list_of_strings.append(ingr_string)

In [30]:
# vect = CountVectorizer(stop_words='english')
# vect.fit(list_of_strings)

In [29]:
# simple_train_dtm = vect.transform(list_of_strings)
# simple_train_dtm.toarray()

In [28]:
# pd.DataFrame(simple_train_dtm.toarray(), columns=vect.get_feature_names()).head()