In [None]:
import json
import pandas as pd
import numpy as np
from multi_key_dict import multi_key_dict
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import linear_kernel
from IPython.display import display, HTML
pd.options.display.max_columns = 500

# Using plotly + cufflinks in offline mode

import plotly.plotly as py
import plotly.graph_objs as go
import plotly
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import cufflinks as cf
cf.set_config_file(offline=True)

In [None]:
recipe_file = open('/home/katie/01-OneDrive/01_galvanize_dsi/capstones/03-capstone_3/capstone3/data/random_recipes.txt', 'r')
ids_file = open('/home/katie/01-OneDrive/01_galvanize_dsi/capstones/03-capstone_3/capstone3/data/recipe_ids.csv', 'r')

recipes_text = recipe_file.read()
ids_text = ids_file.read()
recipe_ids = list(map(lambda x: int(x), ids_text.split(", ")))  
recipes_list = recipes_text.split("$%&->recipe_end<-&%$")
recipes_json = []
for recipe in recipes_list:
    recipes_json.append(json.loads(recipe))

In [None]:
bool_feature_cols = ['dairy_free','fodmap_friendly','gluten_free','keto','vegetarian','paleo','pescatarian',
                          'primal','vegan','whole_30', 'african', 'american','asian', 'bbq','british','cajun', 'caribbean','central_american',
                          'chinese','eastern_european','english','european','french','german','greek','indian', 'irish', 'italian','jewish','japenese','latin_american','mediterranean',
                          'mexican','middle_eastern','scottish','south_american','southern','spanish','thai','vietnamese','appetizer',
                          'batter','beverage','bread','breakfast','condiment','dessert','dinner','dip','lunch','main_dish',
                          'salad','side_dish','snack','soup','spread']

In [None]:
feature_dict = dict([('dairy free', 'dairy_free'), ('fodmap friendly', 'fodmap_friendly'), ('gluten free', 'gluten_free'),
             ('ketogenic', 'keto'), ('lacto ovo vegetarian', 'vegetarian'), ('paleolithic', 'paleo'), 
             ('pescatarian', 'pescatarian'), ('primal', 'primal'), ('vegan', 'vegan'), ('whole 30', 'whole_30'),
             ('american', 'american'), ('asian', 'asian'), ('british', 'british'), ('caribbean', 'caribbean'), 
             ('central american', 'central_american'), ('chines', 'chinese'), ('english', 'english'), 
             ('european','european'),('french', 'french'), ('german', 'german'), ('greek', 'greek'), 
             ('indian', 'indian'), ('italian', 'italian'),('jewish', 'jewish'), ('mediterranean', 'mediterranean'),
             ('mexican', 'mexican'), ('middl eastern', 'middle_eastern'),('scottish', 'scottish'),
             ('southern', 'southern'), ('spanish', 'spanish'),('vietnames', 'vietnamese'), ('antipasti', 'appetizer'),
             ('antipasto', 'appetizer'),('appetizer', 'appetizer'), ('batter', 'batter'), ('bread', 'bread'),
             ('breakfast', 'breakfast'),('brunch', 'breakfast'), ('condiment', 'condiment'), ('dessert', 'dessert'),
             ('dinner', 'dinner'), ('dip', 'dip'), ("hor d'oeuvre", 'appetizer'), ('lunch', 'lunch'),
             ('main course', 'main_dish'),('main dish', 'main_dish'), ('morning meal', 'breakfast'),('salad', 'salad'), 
             ('sauce', 'condiment'),('side dish', 'side_dish'), ('snack', 'snack'),('soup', 'soup'), 
             ('spread', 'spread'), ('starter', 'appetizer'), ('african', 'african'), ('cajun', 'cajun'), ('creol', 'cajun'),
             ('south american', 'south_american'), ('latin american', 'latin_american'), ('irish', 'irish'), ('thai', 'thai'),
             ('bbq', 'bbq'), ('barbecu', 'bbq'), ('japanes', 'japenese'), ('scandinavian', 'eastern_european'), 
             ('nordic', 'eastern_european'), ('beverage', 'beverage'), ('drink', 'beverage'), ('frosting', 'dessert'), 
             ('icing', 'dessert'), ('crust', 'bread')])

In [None]:
rows = np.zeros(shape=(len(recipes_json),len(bool_feature_cols)))
df = pd.DataFrame(rows,columns=bool_feature_cols)

In [None]:
#recipes_json[0]

In [None]:
for idx, recipe in enumerate(recipes_json):
    for cuisine in recipe['cuisines']:
        df.iloc[idx][feature_dict[cuisine]] = 1

In [None]:
for idx, recipe in enumerate(recipes_json):
    for dt in recipe['dishTypes']:
        df.iloc[idx][feature_dict[dt]] = 1

In [None]:
for idx, recipe in enumerate(recipes_json):
    for diet in recipe['diets']:
        df.iloc[idx][feature_dict[diet]] = 1

In [None]:
#df.head()

In [None]:
len(recipe_ids)

In [None]:
unique_recipe_ids = set(recipe_ids)
len(unique_recipe_ids)

In [None]:
ingredients = []
combined_ingredients = []
for recipe in recipes_json:
    _ = []
    for ingredient in recipe['extendedIngredients']:
        _.append(ingredient['name'].lower())
        combined_ingredients.append(ingredient['name'].lower())
    ingredients.append(_)

In [None]:
unique_ingredients = set(combined_ingredients)

In [None]:
unique_sort = sorted(unique_ingredients)
print(len(unique_ingredients))
print(len(combined_ingredients))

In [None]:
ingredient_strings = []
for recipe in ingredients:
    ingredient_strings.append(' '.join(recipe))

In [None]:
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(ingredient_strings)

In [None]:
ingredients_dense = X.todense()

In [None]:
ingredients_dense.shape

In [None]:
ingredients_df = pd.DataFrame(ingredients_dense, columns = vectorizer.get_feature_names())

In [None]:
ingredients_df.head()

In [None]:
ingredients_boolean_df = ingredients_df.astype(bool).astype(int)
df = df.astype(bool).astype(int)

In [None]:
ingredients_boolean_df.head()

In [None]:
combined_feat_df = pd.concat([ingredients_boolean_df, df], axis=1)

In [None]:
combined_feat_df.shape

In [None]:
cosine_similarities = linear_kernel(combined_feat_df, combined_feat_df)

In [None]:
cosine_similarities

In [None]:
num_rec = 5
results = []
pairings = []

for idx, recipe in enumerate(recipes_json):
    similar_indices = cosine_similarities[idx].argsort()[:-num_rec-2:-1]
    similar_indices = similar_indices[1:]
    recommendations = []
    for i in similar_indices:
        recommendations.append(recipe.get('id'))
        recommendations.append(recipe.get('title'))
        recommendations.append(recipes_json[i].get('title'))
        recommendations.append(recipes_json[i].get('image'))
        recommendations.append(recipes_json[i].get('winePairing').get('pairedWines'))
        recommendations.append(recipes_json[i].get('winePairing').get('pairingText'))
        recommendations.append(recipes_json[i].get('sourceUrl'))
        pairings.append((recipe.get('title'), recipes_json[i].get('title')))
    results.append(recommendations)

In [None]:
pairings

In [None]:
italian = list(combined_feat_df.query("italian == 1").index)
asian = list(combined_feat_df.query("asian == 1").index)
keto = list(combined_feat_df.query("keto == 1").index)
gf = list(combined_feat_df.query("gluten_free == 1").index)

In [None]:
base = []
for idx in gf[0:8]:
    base.append(('Gluten-Free', recipes_json[idx]['id'], recipes_json[idx]['title']))