In [1]:
import requests
from sklearn.manifold import TSNE
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
import numpy as np
from fuzzywuzzy import fuzz, process
from functools import lru_cache
%load_ext line_profiler


In [2]:
#User input examples
#theurl = 'https://www.foodnetwork.com/recipes/giada-de-laurentiis/chicken-piccata-recipe2-1913809' # for 404
theurl = 'https://tasty.co/recipe/chicken-veggie-stir-fry' #for a good response
#theurl = 'https://damndelicious.net/2019/08/06/easy-chicken-tacos/' #for a less good response
#theurl =  'https://www.thewholesomedish.com/the-best-classic-burger'

In [3]:
#function definiton block
@lru_cache()
def load_data():
    ''' Load previously generated data, all of this can be created via the other notebooks in this codebase
    except the farmers market data, which can be found here:
    https://docs.google.com/spreadsheets/d/1MOWl8Cg4xyCvAmR06cFhJ9obYR5ToZD_XhSEcgekjzY/edit#gid=1829695724'''
    
    w2vm = pickle.load(open("eatlocal/generated_data/model_w2v.pkl", 'rb')) #
    aisledict = pickle.load(open("eatlocal/generated_data/ingredient_aisle.pkl", 'rb')) #
    noise = pickle.load(open("eatlocal/generated_data/noiselist.pkl", 'rb')) #
    atFM = pickle.load(open("eatlocal/generated_data/FMproducts.pkl", 'rb')) #
    FMinfo = pickle.load(open("eatlocal/generated_data/FMfull.pkl", 'rb')) #
    
    ingvect = pickle.load(open("eatlocal/generated_data/tfidfvect_ingredients.pkl", 'rb')) #
    ingfeatures = pickle.load(open("eatlocal/generated_data/features_ingredients.pkl", 'rb'))
    fullnningredients = pickle.load(open("eatlocal/generated_data/cleaned_ingredients.pkl", 'rb'))
    fulling = []
    fulling.extend([', '.join(n) for n in fullnningredients])

    recvect = pickle.load(open("eatlocal/generated_data/tfidfvect_recipes.pkl", 'rb'))
    recfeatures = pickle.load(open("eatlocal/generated_data/features_recipes.pkl", 'rb'))
    recdoc = pickle.load(open("eatlocal/generated_data/full_recipedoc.pkl", 'rb'))

    return w2vm, aisledict, noise, atFM, FMinfo, ingvect, ingfeatures, fulling, recvect, recfeatures, recdoc

def request_comparison(userinput):
    '''Takes the user input recipe URL and extracts the recipe information 
    Note you must have a spoonacular API key for this to work
    
    Returns a comma separated string of ingredients and a list of strings including the recipe title, ingredients and instructions
    '''
    #grabbing info using Spoonacular
    mykey = open('eatlocal/generated_data/spoonapikey.txt').read().strip()
    params = {'url': userinput, 'forceExtraction': 'true', 'apiKey': mykey, 'analyze': 'true'}
    response = requests.get('https://api.spoonacular.com/recipes/extract', params=params)
    #parsing outpit
    rec = response.json()
    ingcomp = rec['extendedIngredients']
    ingredients = ','.join([ing['name'].lower() for ing in ingcomp if ing['name'] ])
    #error handling
    if '404' in rec['title']: #they don't return an actual 404 which is annoying
        cur_rec = None
    else:
        cur_rec = [rec['title'] + ' ' + ingredients + ' ' + rec['instructions']]

    return ingredients, cur_rec

def removenoise(ingredients, noise): 
    '''Do some simple noise removal on the loaded ingredients 
    
    returns the cleaned ingredient string
    
    '''
    noise_free_ing = []
    for word in ingredients.split(','):
        checked = []
        splitit = word.split()
        checked.extend(i for i in splitit if i not in noise)
        noise_free_ing.append(' '.join(checked))
    return noise_free_ing

def rulesofsimilarity(noise_free_ing, w2vm, aisledict, atFM, FMinfo):
    '''Goes through ingredients, assesses the similarity of them
    
    Returns dicts of the ingredient info compiled and the shopping list dict
    
    '''
    combineding = noise_free_ing.copy()
    
    #Create starting dict to be filled out
    output = {
        'ingredient': None,
        'where_available': None,
        'unknown': None, #unknown to word2vec
        'baking': False, #true if is baking
        'spices': False, #true if spice/seasoning
        'spice_businesses': None,
        'match': None,
        'similar_vendor': None,
        'try_fresh': None,
        'store_hasreplacement': None,
        'aisle': None,
        'cos_sim': None #this is defined in the validation call
    }
    
    #Shopping list dict
    wheretoshop = {}
    allout = []

    for i in noise_free_ing:
        thisout = output.copy()
        thisout['ingredient'] = i
        
        #Figure out if it is available at the farmers market
        highest = process.extractOne(i,atFM)
        if highest[1] >= 90:
            handle_atFM(highest[0], FMinfo, thisout, wheretoshop)
            
        #If it is NOT available
        else:  
            #find what aisle it belongs in
            curaislelist = aisledict.get(i)
            curaisles = None
            curaisletostring = None
            # sometimes things arent shelved, sometimes they're multi shelved, dealing with that here
            if curaislelist is not None:
                curaisletostring = curaislelist[0]
                curaisles = set(curaisletostring.lower().split(';'))
              
            thisout['aisle'] = curaisles
            #assess if it is a pantry item
            ispantry = handle_pantry(thisout, curaisles, FMinfo)
            if ispantry:
                continue
                    
            #use w2v to find similar ingredients
            try:
                similar = w2vm.wv.most_similar(i, topn=5)
                opposite = w2vm.wv.most_similar(similar[0][0], topn=100)
            # Just in case we find an entirely new ingredient!
            except KeyError: 
                thisout['unknown'] = i
                continue
                
            #now go through these suggestions, see what's at the market and is in the right aisle    
            item = []
            handle_matching(thisout, opposite, atFM, FMinfo, item, combineding, curaisles, aisledict)
            
            #place the item into the shopping list with a vendor suggestion
            if thisout['store_hasreplacement'] is not None:
                handle_shoppinglist(thisout, wheretoshop)
            
            
            # If it is something usually prepackaged, suggest making it fresh. If it is unavailable, find a helpful vendor
            if curaisles is not None:
                 handle_tryfresh(thisout, curaisles, aisledict, opposite)
 
        allout.append(thisout)
    return allout, wheretoshop

def validationstep(allout, fulling, ingvect, ingfeatures, recvect, recfeatures, recdoc, cur_rec, FMinfo, wheretoshop):
    '''Validate how well our suggestions fit based on cosine similarity of entire recipe'''
    initinglist = []
    thingstoremove = []
    thingstoadd = []
    #loop through ingredients
    for out in allout:
        initinglist.append(out['ingredient'])
        # If it is an item we have swapped out
        if out['where_available'] is None and 'No Match' not in out['match']:
            thingstoremove.append(out['ingredient'])
            thingstoadd.append(out['match'][0])

    # create a new ingredient list withe swapping in place
    for rem, add in zip(thingstoremove, thingstoadd):
        newlist = initinglist.copy()
        newlist.remove(rem)
        newlist.append(add)

        # based on ingredients alone find the most similar recipe we know
        newlistj = [', '.join(newlist)]
        nsf = ingvect.transform(newlistj)
        cosine_similarities = linear_kernel(nsf, ingfeatures).flatten()
        related_rec_index = cosine_similarities.argsort()[-1]
        

        #now find out, based on more features, how similar these two recipes are
        currecfeat = recvect.transform(cur_rec)
        recipe_similarity = linear_kernel(currecfeat, recfeatures[related_rec_index]).flatten()
        
        # take what we have calculated and place it into our dicts
        for out in allout:
            if out['ingredient'] == rem:
                out['cos_sim'] = recipe_similarity
                if recipe_similarity < 0.37:
                    aisle = out['aisle']
                    handle_notvalid(out, FMinfo, aisle, wheretoshop)
            if out['match'] is not None and 'No Match' in out['match']:
                out['cos_sim'] = 0
        
            
        
        

In [4]:
# Helper functions!

def handle_atFM(highest, FMinfo, thisout, wheretoshop):
    '''When an item IS available at the famers market this function figures out where and fills out the info dict appropriately'''
    matchaisle = FMinfo.loc[FMinfo['TYPES OF PRODUCTS AVAILABLE'].str.contains(highest)]
    thebiz = matchaisle['BUSINESS NAME'].tolist()
    thisout['where_available'] = thebiz

    # place in shopping list
    found = False
    for vendor in thisout['where_available']:
        if vendor in wheretoshop:
            wheretoshop[vendor].append(thisout['ingredient'])
            found = True
            break
    if not found:
        wheretoshop[thisout['where_available'][0]] = [thisout['ingredient']] 
        
def handle_pantry(thisout, curaisles, FMinfo):
    '''returning true if this ingredient is considered baking or a spice''' 
    
    #Deal with the baking outliers
    ispantry = False
    if curaisles is not None and 'baking' in curaisles:
        thisout['baking'] = True
        ispantry = True

    #Deal with the seasoning outliers
    if curaisles is not None and 'spices and seasonings' in curaisles:          
        matchaisle = FMinfo.loc[FMinfo['aisles'] == 'spices and seasonings']
        thebiz = matchaisle['BUSINESS NAME'].tolist()
        thisout['spices'] = True
        thisout['spice_businesses'] = thebiz
        ispantry = True
    return ispantry


def handle_matching(thisout, opposite, atFM, FMinfo, item, combineding, curaisles, aisledict):
    ''' Finding replacement for when it is not available at FM'''
    #loop through all possibilities
    for opp in opposite:
        
        # first extract aisle from this potential replacement ingredient
        opaislelist = aisledict.get(opp[0])
        opaisles = None
        opaisletostring = None
        if opaislelist is not None:
            opaisletostring = opaislelist[0]
            opaisles = set(opaisletostring.lower().split(';'))
        
        #assess if it is placed in the same aisle as the thing we want to replace
        shelved = False
        if opaisles is None and curaisles is None:
            shelved = True
        elif opaisles is not None and curaisles is not None:
            if opaisles.intersection(curaisles):
                shelved = True
        
        #if it IS in the same aisle, see if it is at the FM (we do this last cause FW is slow even with the c speedup)        
        if shelved:
        
            exactmatch = opp[0] in atFM #first see if exact match, again cause FW is slow
            if exactmatch:
                ophighest = (opp[0], 100)
            else:
                ophighest = fw_forcache(opp[0], tuple(atFM))

            item = []
            if ophighest[1] >= 90 and ophighest[0] not in combineding:
                #double check that FW didn't put us in a different aisle!
                matchedaislelist = aisledict.get(ophighest[0])
                matchedaislelisttostring = None
                if matchedaislelist is not None:
                    matchedaisletostring = matchedaislelist[0]
                    matchedaisles = set(matchedaisletostring.lower().split(';'))
                if matchedaisles.intersection(curaisles):
                    item.append(ophighest[0])
                    break
                
    #once we have found the replacement, put it into our dicts 
    if len(item) > 0:
        thisout['match'] = item
        matchaisle = FMinfo.loc[FMinfo['TYPES OF PRODUCTS AVAILABLE'].str.contains(item[0])]
        thebiz = matchaisle['BUSINESS NAME'].tolist()
        thisout['store_hasreplacement'] = thebiz
        combineding.append(thisout['match'][0])
    else:
        thisout['match'] = 'No Match'

        
def handle_shoppinglist(thisout, wheretoshop):
    ''' updates the shopping list dict with items'''
    
    found = False
    for vendor in thisout['store_hasreplacement']:
        if vendor in wheretoshop:
            wheretoshop[vendor].append(thisout['match'][0])
            found = True
            break
    if not found:
        wheretoshop[thisout['store_hasreplacement'][0]] = [thisout['match'][0]]


def handle_tryfresh(thisout, curaisles, aisledict, opposite):
    '''In the case that something is usually packaged but COULD be made from scratch, provide that info'''
    
    if 'pasta and rice' in curaisles or 'canned and jarred' in curaisles:
        for opp in opposite:
            a = aisledict[opp[0]]
            if a and a[0] == 'Produce':
                trythis = opp[0]
                thisout['try_fresh'] = trythis

                
def handle_notvalid(thisout, FMinfo, curaisles, wheretoshop):
    '''if something is not available, find a vendor that might be able to help '''
    
    suggestion = 'Ask about ' + thisout['ingredient']
    #if there is an aisle associated with the item
    if curaisles is not None: 
        # find a similar aisle at the farmers market and point to it
        for shelf in curaisles:
            matchaisle = FMinfo.loc[FMinfo['aisles'] == shelf]
            if len(matchaisle) > 0:
                thebiz = matchaisle['BUSINESS NAME'].tolist()
                thisout['similar_vendor'] = thebiz
                found = False
                for vendor in thisout['similar_vendor']:
                    if vendor in wheretoshop:
                        wheretoshop[vendor].append(suggestion)
                        found = True
                        break
                if not found:
                    wheretoshop[thisout['similar_vendor'][0]] = suggestion
                    

@lru_cache(maxsize=5000)
def fw_forcache(opp, relevant_atFM):
    ophighest = process.extractOne(opp,relevant_atFM) #otherwise use FW
    return ophighest




In [5]:

def get_results(ingredients, cur_rec):
    w2vm, aisledict, noise, atFM, FMinfo, ingvect, ingfeatures, fulling, recvect, recfeatures, recdoc = load_data()
    noise_free_ing = removenoise(ingredients, noise)
    allout, wheretoshop = rulesofsimilarity(noise_free_ing, w2vm, aisledict, atFM, FMinfo) 
    validationstep(allout, fulling, ingvect, ingfeatures, recvect, recfeatures, recdoc, cur_rec, FMinfo, wheretoshop)
    
    return allout, wheretoshop




In [6]:
# #function call
ingredients, cur_rec = request_comparison(theurl)
if cur_rec is not None:
    allout, wheretoshop = get_results(ingredients, cur_rec)
else: 
    print('Woopsie doodle')

In [7]:
#Output -- equivalent is written in HTML in production

for ing in allout:
    if ing['where_available'] is not None:
        print(f'The farmers market DOES have {ing["ingredient"]}\n')
    else:
        print(f'The farmers market does not have {ing["ingredient"]}' )
        if ing['cos_sim'] >= 0.37:
            print(f'but our algorithm thinks {ing["match"][0]} is a solid replacement!\n')
        elif ing['cos_sim'] > 0:
            print(f'but our algorithm thinks {ing["match"][0]} is the closest ingredient available')
            print(f'However, we are smart enough to tell that is not a great replacement, so you can also try asking at these ' +
                f'these stores, who often have items similar to {ing["ingredient"]}: {ing["similar_vendor"][0]}\n' )
        else:
            print(f'Unfortunately we couldnt find a match for {ing["ingredient"]}')
    
    if ing['baking']:
        print('Baking handling')
    if ing['spices']:
        print('Spice handling')
        
    if ing['try_fresh'] is not None:
        print('Fresh handling')
    
          
print('Based on these suggestions, inputting all the similar ingredients (use your descretion) for this grocery list\n')         
for place in wheretoshop:
    
    print(place + ": " + ', '.join(wheretoshop[place]))

The farmers market DOES have chicken breast

The farmers market does not have bell pepper
but our algorithm thinks lettuce is the closest ingredient available
However, we are smart enough to tell that is not a great replacement, so you can also try asking at these these stores, who often have items similar to bell pepper: Alewife Farm

The farmers market DOES have broccoli florets

The farmers market DOES have mushroom

The farmers market DOES have oil

The farmers market DOES have garlic

The farmers market DOES have ginger

The farmers market does not have sesame oil
Unfortunately we couldnt find a match for sesame oil
The farmers market does not have reduced sodium soy sauce
Unfortunately we couldnt find a match for reduced sodium soy sauce
The farmers market DOES have chicken broth

The farmers market DOES have flour

Based on these suggestions, inputting all the similar ingredients (use your descretion) for this grocery list

Abundance Acres Farm: chicken breast
Campo Rosso Farm: 

In [8]:
#testing block
def verify_working():
    ingredients = open('test_data/ingredients.csv', 'r').read()
    full_rec = [open('test_data/fullrec.txt', 'r').read()]
    expected_allout = pickle.load(open("test_data/allout.pkl", 'rb')) #
    expected_wheretoshop = pickle.load(open("test_data/wheretoshop.pkl", 'rb')) 
    allout, wheretoshop = get_results(ingredients, full_rec)
    assert allout == expected_allout
    assert wheretoshop == expected_wheretoshop
    
    

verify_working()
# pickle.dump(wheretoshop,open("test_data/wheretoshop.pkl","wb"))
# pickle.dump(allout,open("test_data/allout.pkl","wb"))
