In [120]:
import requests
from sklearn.manifold import TSNE
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
import numpy as np
from fuzzywuzzy import fuzz, process

from functools import lru_cache


In [299]:
#User input!

theurl = 'https://tasty.co/recipe/chicken-veggie-stir-fry'
#theurl = 'https://damndelicious.net/2019/08/06/easy-chicken-tacos/'
#theurl =  'https://www.thewholesomedish.com/the-best-classic-burger'

In [310]:
#function definiton block
@lru_cache()
def load_data():
    w2vm = pickle.load(open("generated_data/model_w2v.pkl", 'rb')) #
    aisledict = pickle.load(open("generated_data/ingredient_aisle.pkl", 'rb')) #
    noise = pickle.load(open("generated_data/noiselist.pkl", 'rb')) #
    atFM = pickle.load(open("generated_data/FMproducts.pkl", 'rb')) #
    FMinfo = pickle.load(open("generated_data//FMfull.pkl", 'rb')) #
    ingvect = pickle.load(open("generated_data/tfidfvect_ingredients.pkl", 'rb')) #
    fullnningredients = pickle.load(open("generated_data/cleaned_ingredients.pkl", 'rb'))
    fulling = []
    fulling.extend([', '.join(n) for n in fullnningredients])

    recvect = pickle.load(open("generated_data/tfidfvect_recipes.pkl", 'rb'))
    recdoc = pickle.load(open("generated_data/full_recipedoc.pkl", 'rb'))

    return w2vm, aisledict, noise, atFM, FMinfo, ingvect, fulling, recvect, recdoc

def request_comparison(userinput):
    mykey = open('spoonac/apikey.txt').read().strip()
    params = {'url': userinput, 'forceExtraction': 'true', 'apiKey': mykey, 'analyze': 'true'}
    response = requests.get('https://api.spoonacular.com/recipes/extract', params=params)
    rec = response.json()
    ingcomp = rec['extendedIngredients']
    ingredients = [','.join([ing['name'].lower() for ing in ingcomp if ing['name'] ])]
    
    cur_rec = [rec['title'] + ' ' + ingredients[0] + ' ' + rec['instructions']]
    
    return ingredients[0], cur_rec

def removenoise(ingredients, noise): #call on ingredients[0]
    noise_free_ing = []
    for word in ingredients.split(','):
        checked = []
        splitit = word.split()
        checked.extend(i for i in splitit if i not in noise)
        noise_free_ing.append(' '.join(checked))
    return noise_free_ing

def rulesofsimilarity(noise_free_ing, w2vm, aisledict, atFM, FMinfo):
    output = {
        'ingredient': None,
        'where_available': None,
        'unknown': None, #unknown to word2vec
        'baking': False, #true if is baking
        'spices': False, #true if spice/seasoning
        'spice_businesses': None,
        'match': None,
        'similar_vendor': None,
        'try_fresh': None,
        'store_hasreplacement': None,
        'cos_sim': None #this is defined in the validation call
    }
    
    wheretoshop = {
    }

    allout = []

    for i in noise_free_ing:
        thisout = output.copy()
        thisout['ingredient'] = i
        
        highest = process.extractOne(i,atFM)
        if highest[1] >= 90:
            matchaisle = FMinfo.loc[FMinfo['TYPES OF PRODUCTS AVAILABLE'].str.contains(highest[0])]
            thebiz = matchaisle['BUSINESS NAME'].tolist()
            thisout['where_available'] = thebiz
        # print(f'{i} is available at {thebiz} ')
            
            found = False
            for vendor in thisout['where_available']:
                if vendor in wheretoshop:
                    wheretoshop[vendor].append(thisout['ingredient'])
                    found = True
                    break
            if not found:
                wheretoshop[thisout['where_available'][0]] = [thisout['ingredient']] 
            


        else:
            #these are unavailable ingredients
            curaisle = aisledict.get(i)
            
            #use w2v to find similar ingredients
            try:
                similar = w2vm.wv.most_similar(i, topn=100)
                opposite = w2vm.wv.most_similar(similar[0][0], topn=1000)
                #toreplace.append(i)
            # print(f'{i} is not but ')
            except KeyError:
                thisout['unknown'] = i
    #            print(f'We have never heard of {i}, sorry about that')
                continue
                
            #get rid of baking for now
            if curaisle[0] is not None and curaisle[0] == 'Baking':
                thisout['baking'] = True
            #   print(f'{i} is a baking product, which is likely in your pantry!')
                continue
                
            #deal with the seasoning issue
            if curaisle[0] is not None and curaisle[0] == 'Spices and Seasonings':
                matchaisle = FMinfo.loc[FMinfo['aisles'] == curaisle[0].lower()]
                thebiz = matchaisle['BUSINESS NAME'].tolist()
                #print(f'Dried Spices and and seasonings are rare, you may have this in your pantry, otherwise get fresh ones at: {thebiz}\n')
                thisout['spices'] = True
                thisout['spice_businesses'] = thebiz
                
                if thebiz[0] not in wheretoshop:    
                    wheretoshop[thebiz[0]] = ['SPICETIME']
                elif 'SPICETIME' not in wheretoshop[thebiz[0]]:
                    wheretoshop[thebiz[0]].append('SPICETIME')
                    
                
                continue
                
                
            # here is thing our algorithm thinks is similar and IS available
            item = []
            for opp in opposite:
                ophighest = process.extractOne(opp[0],atFM)
                if ophighest[1] >= 90:
                    item.append(opp[0])
                    if len(item) == 1:
                        break
            #print(f'Here is the item our algorithm thinks is most similar and is available: {item}\n')
            thisout['match'] = item
            matchaisle = FMinfo.loc[FMinfo['TYPES OF PRODUCTS AVAILABLE'].str.contains(ophighest[0])]
            thebiz = matchaisle['BUSINESS NAME'].tolist()
            thisout['store_hasreplacement'] = thebiz
            
            found = False
            for vendor in thisout['store_hasreplacement']:
                if vendor in wheretoshop:
                    wheretoshop[vendor].append(thisout['match'])
                    found = True
                    break
            if not found:
                wheretoshop[thisout['store_hasreplacement'][0]] = [thisout['match']] 
            
            
            
            # if it is something usually prepackaged, suggest making it fresh
            if curaisle[0] == 'Pasta and Rice' or curaisle[0] == 'Canned and Jarred':
                for sim in similar:
                    a = aisledict[sim[0]]
                    if a and a[0] == 'Produce':
                        trythis = sim[0]
                        thisout['try_fresh'] = trythis
                        continue
                    
                    
                #print(f'Canned/Jarred items are rare at the Market, but you can make this fresh using {trythis}\n')
                
                    
            if curaisle[0] is not None: 
                #if something is not available, this vendor might be able to help you
                matchaisle = FMinfo.loc[FMinfo['aisles'] == curaisle[0].lower()]
                thebiz = matchaisle['BUSINESS NAME'].tolist()
            # print(f'This list of vendors often has products similar to {i}, try asking them: {thebiz}\n')
                if len(thebiz) > 0:
                    thisout['similar_vendor'] = thebiz



        allout.append(thisout)
    return allout, wheretoshop

def validationstep(allout, fulling, ingvect, recvect, recdoc, cur_rec):

    initinglist = []
    thingstoremove = []
    thingstoadd = []
    for out in allout:
        initinglist.append(out['ingredient'])
        if out['where_available'] is None:
            thingstoremove.append(out['ingredient'])
            thingstoadd.append(out['match'][0])


    for rem, add in zip(thingstoremove, thingstoadd):
        newlist = initinglist.copy()
        newlist.remove(rem)
        newlist.append(add)

     
        #now based on ingredients alone find the most similar recipe we know
        newlistj = [', '.join(newlist)]
        ingredientfeatures = ingvect.transform(fulling)
        nsf = ingvect.transform(newlistj)
        cosine_similarities = linear_kernel(nsf, ingredientfeatures).flatten()
        related_rec_index = cosine_similarities.argsort()[-1]
        

        #now find out, based on more features, how similar these two recipes are
        recfeatures = recvect.transform(recdoc)
        currecfeat = recvect.transform(cur_rec)
        recipe_similarity = linear_kernel(currecfeat, recfeatures[related_rec_index]).flatten()
        
        for out in allout:
            if out['ingredient'] == rem:
                out['cos_sim'] = recipe_similarity
       
            
        
        

In [311]:
#function call
ingredients, cur_rec = request_comparison(theurl)
w2vm, aisledict, noise, atFM, FMinfo, ingvect, fulling, recvect, recdoc = load_data()
noise_free_ing = removenoise(ingredients, noise)
allout, wheretoshop = rulesofsimilarity(noise_free_ing, w2vm, aisledict, atFM, FMinfo)
validationstep(allout, fulling, ingvect, recvect, recdoc, cur_rec)


In [312]:
wheretoshopshop

{'Abundance Acres Farm': ['chicken breast'],
 'Furnace Creek Farm': ['SPICETIME'],
 'Alewife Farm': [['zucchini'], 'broccoli florets', 'mushroom', 'ginger'],
 'Grazin Angus Acres': ['oil'],
 "keith's farm": ['garlic'],
 'Anthony Road Wine Co.': [['rice wine vinegar'], ['shao hsing wine']],
 'Yellow Bell Farm': ['chicken broth'],
 'GrowNYC Grainstand': ['flour']}

In [307]:
#Output -- equivalent is written in HTML


for out in allout:
    print(out['ingredient'])
    if out['unknown']:
        print('This is a new ingredient to us, sorry, we are learning more all the time!')
        continue
    if out['where_available']:
        print(f'is available at {out["where_available"]}\n')
        continue
    if out['baking']:
        print('is a baking item that we suspect you have in your pantry\n')
        continue
    if out['spices']:
        print(' Dried Spices and and seasonings are rare at the farmers market' + 
              f' you may have this in your pantry, otherwise check out the fresh herbs at: {out["spice_businesses"]}')
        continue
    if out['match']:
        print(f'is not available, but our algorithm thinks {out["match"][0]} is similar and available at {out["store_hasreplacement"]}')
        
        if out['cos_sim'][0] <= 0.37: # mean-1 std cos similarity
            print(f'However, it looks like that might fiddle with your recipe too much')
            if out['try_fresh']:
                print(f'So you can try to make this from scratch using {out["try_fresh"]} which IS available \n')
            if out['similar_vendor']:
                print('So try asking at ' +
                      f'these stores, who often have items similar to {out["ingredient"]}: {out["similar_vendor"]}\n' )
            if out['try_fresh'] is None and out['similar_vendor'] is None:
                print(f'I hate to say it, but {out["ingredient"]} might require a trip to a traditional grocery store \n ')
            
        else:
            print('\n')

    
    


chicken breast
is available at ['Abundance Acres Farm', 'Feisty Acres Inc', 'flying pig farms', 'green table farms', 'Hawthorne Valley Farm', 'Norwich Meadows Farm', "Quattro's Game Farm", 'Ramble Creek Farm', 'Sawkill Farm', 'Shannon Brook Farm', 'Stony Mountain Ranch', 'Sun Fed Beef and Pork', 'Violet Hill Farm', 'Yellow Bell Farm']

bell pepper
is not available, but our algorithm thinks zucchini is similar and available at ['Alewife Farm', 'Central Valley Farm', 'Cherry Lane Farms', 'Down Home Acres', 'Element Farms Inc', 'Evolutionary Organics', 'Fishkill Farms', 'Fledging Crow Vegetables', 'gopal farms', 'Hoeffner Farms', "Lani's Farm", 'Lucky Dog Farm', 'Savoie Organic Farm', 'Solid Ground Farm', 'Stokes Farm', 'Two Guys From Woodbridge', 'VanHouten Farms', 'Willow Wisp Organic Farm']


broccoli florets
is available at ['Alewife Farm', 'Central Valley Farm', 'Cherry Lane Farms', 'Down Home Acres', 'Element Farms Inc', 'Evolutionary Organics', 'Fishkill Farms', 'Fledging Crow Vege

In [308]:
wheretoshop

NameError: name 'wheretoshop' is not defined

In [304]:
FMinfo

Unnamed: 0,BUSINESS NAME,TYPES OF PRODUCTS AVAILABLE,aisles
0,1857 Spirits,potato vodka,alcoholic beverages
1,Abundance Acres Farm,"chicken, duck, turkey, beef, pork, eggs, mapl...",meat
2,Alewife Farm,"ramps,ginger,pea greens,arugala,asparagus,char...",produce
3,Andrew's Local Honey,honey,"nut butters, jams, and honey"
4,Anthony Road Wine Co.,wine,alcoholic beverages
...,...,...,...
133,Wilklow Orchards,"vegetables, fruits, cider, jams, beef, pork",produce
134,Willow Wisp Organic Farm,"ramps,ginger,pea greens,arugala,asparagus,char...","nut butters, jams, and honey"
135,Wood Homestead,"maple syrup, honey, sorghum syrup",produce
136,Wood Thrush Farm,"vegetables, mixed greens, herbs, microgreens, ...",meat
