In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from ast import literal_eval
import statistics
import nltk
import re
from scipy import stats
from venn import venn
import matplotlib
import squarify
from wordcloud import WordCloud, ImageColorGenerator
from wordcloud import STOPWORDS
from nltk.corpus import stopwords  
from gensim.parsing.preprocessing import STOPWORDS
from sklearn import preprocessing
#nltk.download('stopwords')
from nltk.tokenize import word_tokenize  
import gradio as gr
from tabulate import tabulate
%matplotlib inline

In [2]:
recipes = pd.read_csv('data/large_data/recipes.csv')
ingredient_freq = pd.read_csv('data/recipes/ingredient_freq.csv')
interactions = pd.read_csv('data/large_data/RAW_interactions.csv')

In [3]:
def calc_ingredient_ratings(ingredient_list, ingredient_freq= ingredient_freq):
    #print(ingredient_list) 
    rating = 0
    for ingredient in ingredient_list:
        try:
            freq = ingredient_freq.loc[ingredient]
        except:
            freq = 0
        rating += freq
    return rating

In [4]:
def calc_overall_score(recipe_list, priority):
    recipe_list['rating_score'] = recipe_list['avg_rating']*recipe_list['n_ratings']
    #normalize minutes, ratings, ingredient_score, number of steps, number of ingredients
    #x = recipe_list[['minutes','rating_score', 'ingredient_score','n_steps','n_ingredients']]
    #min_max_scaler = preprocessing.MinMaxScaler()
    #x_scaled = min_max_scaler.fit_transform(x)
    #df = pd.DataFrame(x_scaled, columns = ['minutes','rating_score', 'ingredient_score','n_steps','n_ingredients'] )
    #print(df.head())
#     overall_score =  df['rating_score']*priority['C'] + \
#                 df['ingredient_score']*priority['D'] - df['n_steps']*priority['A'] - \
#                 df['n_ingredients']*priority['B'] - df['minutes']*priority['E']
    overall_score = stats.zscore(recipe_list['minutes'])*priority['E'] + \
                recipe_list['rating_score']*priority['C'] + \
                recipe_list['ingredient_score']*100*priority['D'] + \
                stats.zscore(recipe_list['n_steps'])*priority['A'] + \
                stats.zscore(recipe_list['n_ingredients'])*priority['B'] 
    return overall_score

In [5]:
def get_recipes(search_phrase, recipes, priority):
    recipe_list = recipes.loc[recipes['name'].str.contains(search_phrase, case=False)].copy()
    recipe_list['ingredient_score'] = recipe_list['mod_ingredients'].apply(calc_ingredient_ratings)
    recipe_list['overall_score'] = calc_overall_score(recipe_list, priority)
    return recipe_list

In [6]:
def make_wordmap(recipe_list):
    fig = plt.figure()
    common_ingredients = recipe_list['mod_ingredients'].apply(literal_eval).explode().value_counts()
    cmap = matplotlib.cm.Blues
    norm = matplotlib.colors.Normalize(vmin=min(common_ingredients), vmax=max(common_ingredients))
    colors = [cmap(norm(value)) for value in common_ingredients]
    squarify.plot(sizes=common_ingredients[:10], label=common_ingredients.index[:10], alpha=.5, text_kwargs={"wrap": True})
    plt.axis('off')
    plt.close()
    return fig

In [7]:
def show_top_recipes(recipe_list):
    html = 'https://www.food.com/recipe/'
    top_5 = recipe_list.nlargest(5, 'overall_score').copy()
    top_5['recipe_link'] = top_5[['name','id']].apply(lambda x: f"<a href = 'https://www.food.com/recipe/{x[1]}'>{x[0]}</a>", axis = 1)
    return top_5['recipe_link'].to_list()


In [14]:
def make_wordcloud(recipe_list):
    fig = plt.figure()
    stop_words = stopwords.words('english')
    stop_words.extend(['i','ive',"i've",'didnt','them', 'little','use','added','good','great', 'think', 'taste',\
                       'recipe', 'used','made','make','still','also','baked','bake','thank','thanks','cup'])
    #stop_words = set(stop_words)
    stop_words = STOPWORDS.union(set(stop_words))
    review_list = interactions[interactions['recipe_id'].isin(recipe_list.id)]['review']
    text = " ".join(str(review) for review in review_list)
    wordcloud = WordCloud(stopwords=stop_words, background_color="white").generate(text)
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis("off")
    plt.close()
    return fig

<Figure size 432x288 with 0 Axes>

In [9]:
def get_priority(priority_1, priority_2, priority_3):
    priority_key = {"Number of Steps": "A", "Number of Ingredients":"B", "Ratings":"C",\
                    "Exoticness of Ingredients":"D", "Time to Prepare":"E"}
    priority_map = {"A": 1, "B": 1, "C": 1, "D": 1, "E": 1}
    priority = [priority_key[p] for p in [priority_1, priority_2, priority_3]]
    priority_map[priority[0]] = .4
    priority_map[priority[1]] = .3
    priority_map[priority[2]] = .2
    non_priority = list(set(priority_map.keys()) - set(priority))
    for key in non_priority:
        priority_map[key] = .05
    return priority_map    

In [17]:
def recipe_finder(recipe_name, priority_1, priority_2, priority_3):
    priority =  get_priority(priority_1, priority_2, priority_3)
    recipe_list = get_recipes(recipe_name, recipes, priority)
    return make_wordcloud(recipe_list), make_wordmap(recipe_list), *show_top_recipes(recipe_list)

options = ["Number of Steps", "Number of Ingredients", "Ratings", "Exoticness of Ingredients", "Time to Prepare"]
iface = gr.Interface(fn=recipe_finder, \
                     inputs = ["text", gr.inputs.Dropdown(options), gr.inputs.Dropdown(options), gr.inputs.Dropdown(options)],\
                               outputs=[gr.outputs.Image(plot=True, label="WordCloud from Reviews"), gr.outputs.Image(plot=True, label="WordMap from Recipes"), \
                                        gr.outputs.HTML(label = "#1 Recipe"), gr.outputs.HTML(label = "#2 Recipe"),\
                                       gr.outputs.HTML(label = "#3 Recipe"), gr.outputs.HTML(label = "#4 Recipe"),\
                                       gr.outputs.HTML(label = "#5 Recipe")], \
                    title = "Cómetelo")


In [18]:
iface.launch(share = True)

Running locally at: http://127.0.0.1:7863/
This share link will expire in 24 hours. If you need a permanent link, visit: https://gradio.app/introducing-hosted (NEW!)
Running on External URL: https://50533.gradio.app
Interface loading below...


(<Flask 'gradio.networking'>,
 'http://127.0.0.1:7863/',
 'https://50533.gradio.app')