In [120]:
import requests
from sklearn.manifold import TSNE
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
import numpy as np
from fuzzywuzzy import fuzz, process

from functools import lru_cache


In [3]:
#User input!

theurl = 'https://tasty.co/recipe/chicken-veggie-stir-fry'

In [4]:
#function definiton block
@lru_cache
def load_data():
    w2vm = pickle.load(open("bigoven/w2vmodel.pkl", 'rb'))
    aisledict = pickle.load(open("bigoven/aisleclassification.pkl", 'rb'))
    noise = pickle.load(open("bigoven/noiselist.pkl", 'rb'))
    atFM = pickle.load(open("bigoven/FMproducts.pkl", 'rb'))
    FMinfo = pickle.load(open("bigoven/FMfull.pkl", 'rb'))
    ingvect = pickle.load(open("generated_data/tfidfvect_ingredients.pkl", 'rb'))

    return w2vm, aisledict, noise, atFM, FMinfo

def request_comparison(userinput):
    mykey = open('spoonac/apikey.txt').read().strip()
    params = {'url': userinput, 'forceExtraction': 'true', 'apiKey': mykey, 'analyze': 'true'}
    response = requests.get('https://api.spoonacular.com/recipes/extract', params=params)
    rec = response.json()
    ingcomp = rec['extendedIngredients']
    ingredients = [','.join([ing['name'].lower() for ing in ingcomp if ing['name'] ])]
    return ingredients[0]

def removenoise(ingredients, noise): #call on ingredients[0]
    noise_free_ing = []
    for word in ingredients.split(','):
        checked = []
        splitit = word.split()
        checked.extend(i for i in splitit if i not in noise)
        noise_free_ing.append(' '.join(checked))
    return noise_free_ing

def rulesofsimilarity(noise_free_ing, w2vm, aisledict, atFM, FMinfo):
    output = {
        'ingredient': None,
        'where_available': None,
        'unknown': None, #unknown to word2vec
        'baking': False, #true if is baking
        'spices': False, #true if spice/seasoning
        'spice_businesses': None,
        'match': None,
        'try_fresh': None,
        'store_match': None
    }

    allout = []

    for i in noise_free_ing:
        thisout = output.copy()
        thisout['ingredient'] = i
        
        highest = process.extractOne(i,atFM)
        if highest[1] >= 90:
            matchaisle = FMinfo.loc[FMinfo['TYPES OF PRODUCTS AVAILABLE'].str.contains(highest[0])]
            thebiz = matchaisle['BUSINESS NAME'].tolist()
            thisout['where_available'] = thebiz
        # print(f'{i} is available at {thebiz} ')
            
        else:
            #these are unavailable ingredients
            curaisle = aisledict.get(i)
            
            #use w2v to find similar ingredients
            try:
                similar = w2vm.wv.most_similar(i, topn=100)
                opposite = w2vm.wv.most_similar(similar[0][0], topn=1000)
                #toreplace.append(i)
            # print(f'{i} is not but ')
            except KeyError:
                thisout['unknown'] = i
    #            print(f'We have never heard of {i}, sorry about that')
                continue
                
            #get rid of baking for now
            if curaisle[0] is not None and curaisle[0] == 'Baking':
                thisout['baking'] = True
            #   print(f'{i} is a baking product, which is likely in your pantry!')
                continue
                
            #deal with the seasoning issue
            if curaisle[0] is not None and curaisle[0] == 'Spices and Seasonings':
                matchaisle = FMinfo.loc[FMinfo['aisles'] == curaisle[0].lower()]
                thebiz = matchaisle['BUSINESS NAME'].tolist()
                #print(f'Dried Spices and and seasonings are rare, you may have this in your pantry, otherwise get fresh ones at: {thebiz}\n')
                thisout['spices'] = True
                thisout['spice_businesses'] = thebiz
                continue
                
            # here is thing our algorithm thinks is similar and IS available
            item = []
            for opp in opposite:
                ophighest = process.extractOne(opp[0],atFM)
                if ophighest[1] >= 90:
                    item.append(opp[0])
                    if len(item) == 1:
                        break
            #print(f'Here is the item our algorithm thinks is most similar and is available: {item}\n')
            thisout['match'] = item
            
            # if it is something usually prepackaged, suggest making it fresh
            if curaisle[0] == 'Pasta and Rice' or curaisle[0] == 'Canned and Jarred':
                for sim in similar:
                    a = aisledict[sim[0]]
                    if a and a[0] == 'Produce':
                        trythis = sim[0]
                        thisout['try_fresh'] = trythis
                        continue
                #print(f'Canned/Jarred items are rare at the Market, but you can make this fresh using {trythis}\n')
                
                    
            if curaisle[0] is not None: 
                #if something is not available, this vendor might be able to help you
                matchaisle = FMinfo.loc[FMinfo['aisles'] == curaisle[0].lower()]
                thebiz = matchaisle['BUSINESS NAME'].tolist()
            # print(f'This list of vendors often has products similar to {i}, try asking them: {thebiz}\n')
                thisout['store_match'] = thebiz



        allout.append(thisout)
    return allout    

In [5]:
#function call
ingredients = request_comparison(theurl)
w2vm, aisledict, noise, atFM, FMinfo = load_data()
noise_free_ing = removenoise(ingredients, noise)
allout = rulesofsimilarity(noise_free_ing, w2vm, aisledict, atFM, FMinfo)


In [8]:
#Output -- equivalent is written in HTML


for out in allout:
    print(out['ingredient'])
    if out['unknown']:
        print('This is a new ingredient to us, sorry, we are learning more all the time!')
        continue
    if out['where_available']:
        print(f'is available at {out["where_available"]}\n')
        continue
    if out['baking']:
        print('is a baking item that we suspect you have in your pantry\n')
        continue
    if out['spices']:
        print(' Dried Spices and and seasonings are rare at the farmers market' + 
              f' you may have this in your pantry, otherwise check out the fresh herbs at: {out["spice_businesses"]}')
        continue
    if out['match']:
        print(f'is not available, but our algorithm thinks {out["match"]} is similar and available')
        if out['try_fresh']:
            print(f'Or you can try to make this from scratch using {out["try_fresh"]} which is available')
        
        if out['store_match']:
            print('Or, sometimes vendors do not update us on what they have today, try asking at ' +
                  f'these stores, who often have items similar to {out["ingredient"]}: {out["store_match"]}\n' )

    
    
    


chicken breast
is available at ['Abundance Acres Farm', 'Feisty Acres Inc', 'flying pig farms', 'green table farms', 'Hawthorne Valley Farm', 'Norwich Meadows Farm', "Quattro's Game Farm", 'Ramble Creek Farm', 'Sawkill Farm', 'Shannon Brook Farm', 'Stony Mountain Ranch', 'Sun Fed Beef and Pork', 'Violet Hill Farm', 'Yellow Bell Farm']

bell pepper
is not available, but our algorithm thinks ['zucchini'] is similar and available
Or, sometimes vendors do not update us on what they have today, try asking at these stores, who often have items similar to bell pepper: ['Alewife Farm', 'Blue Oyster Cultivation', 'breezy hill orchard', 'Brooklyn Grange Rooftop Farm', 'Bulich Mushroom Co.', 'Campo Rosso Farm', 'caradonna farms', 'Cherry Lane Farms', "D'Attolico's Organic", 'Down Home Acres', 'Eckerton Hill Farm', 'Element Farms Inc', 'Evolutionary Organics', 'Feisty Acres Inc', 'Fishkill Farms', 'Fledging Crow Vegetables', 'Garden of Eve Organic Farm', 'gopal farms', 'Gorzynski Ornery Farm', 'He

In [46]:
#grab this new ingredient list

newlist = []
for out in allout:
    if out['where_available'] or out['spices']:
        newlist.append(out['ingredient'])
    if out['match']:
        newlist.extend(out['match'])
 
print(newlist)

['chicken breast', 'zucchini', 'broccoli florets', 'mushroom', 'oil', 'garlic', 'ginger', 'rice wine vinegar', 'chili garlic sauce', 'chicken broth', 'flour']


In [48]:
#think through how well the replacement matches in the case of bell pepper / zucchini
w2vm = pickle.load(open("bigoven/w2vmodel.pkl", 'rb'))
atFM = pickle.load(open("bigoven/FMproducts.pkl", 'rb'))
similar = w2vm.wv.most_similar('bell pepper', topn=100)
opposite = w2vm.wv.most_similar(similar[0][0], topn=1000)
#ophighest = process.extractOne(opposite[0][0],atFM)
item = []
for opp in opposite:
    ophighest = process.extractOne(opp[0],atFM)
    if ophighest[1] >= 90:
        item.append(opp[0])
        if len(item) == 1:
            break
            
print(item)
w2vm.similarity('bell pepper', 'zucchini')

['zucchini']


  app.launch_new_instance()


0.77954465

In [49]:
nning = pickle.load(open("generated_data/cleaned_ingredients.pkl", 'rb'))


In [50]:
nning

[['garam masala',
  'coriander leaves',
  'ginger root',
  'turmeric',
  'water',
  'coriander',
  'water',
  'cumin',
  'pepper',
  'rice',
  'tea leaves',
  'chana dal',
  'salt',
  'ghee',
  'cumin seed',
  'onion'],
 ['coarse polentacornmeal',
  'fl oz milk',
  'half stick butter',
  'gran luchito chilli paste',
  'pint water',
  'salt',
  'parmesan',
  'ear sweetcorn use small tin',
  'chorizo sausage',
  'king prawns',
  'scallion',
  'handful coriander',
  'egg',
  'lime',
  'olive oil',
  'salt',
  'pepper'],
 ['pasta',
  'shrimp',
  'butter',
  'parsley',
  'pepper flakes',
  'pepper',
  'kosher salt',
  'wine',
  'garlic',
  'parmesan cheese',
  'lemon'],
 ['pepper',
  'egg',
  'hush puppies',
  'grain',
  'onion',
  'salt',
  'salt',
  'fish',
  'buttermilk',
  'baking soda',
  'baking powder',
  'corn flour',
  'cayenne',
  'fish'],
 ['shrimp',
  'habanero pepper',
  'jicama',
  'bell pepper',
  'bell pepper',
  'bell pepper',
  'purple onion',
  'tomato',
  'lime',
  'cila

In [85]:
#put the full ingredient list and new ing list into correct format
senting = []
senting.extend([', '.join(n) for n in nning])
newlistj = [', '.join(newlist)]

In [103]:
#of all the recipes we have seen, which one is most similar based on ingredients 
ingvect = pickle.load(open("generated_data/tfidfvect_ingredients.pkl", 'rb'))

ingredientfeatures = ingvect.transform(senting)
nsf = ingvect.transform(newlistj)
cosine_similarities = linear_kernel(nsf, ingredientfeatures).flatten()
related_docs_indices = cosine_similarities.argsort()[:-5:-1]
related_docs_indices

array([ 6794, 18672, 39654, 19037])

In [91]:
#now get the full recipe

mykey = open('spoonac/apikey.txt').read().strip()
params = {'url': theurl, 'forceExtraction': 'true', 'apiKey': mykey, 'analyze': 'true'}
response = requests.get('https://api.spoonacular.com/recipes/extract', params=params)
rec = response.json()

In [99]:
#make that full recipe into a doc
rdoc = [rec['title'] + ingredients + rec['instructions']]
rdoc

['Chicken & Veggie Stir-Fry Recipe by Tastychicken breast,salt,bell pepper,broccoli florets,mushroom,oil,garlic,ginger,sesame oil,reduced sodium soy sauce,brown sugar,chicken broth,flourIn a large pan on medium-high heat, add 1 tablespoon of oil.  Once the oil is hot, add chicken, season with salt and pepper, and sauté until cooked through and browned. Remove cooked chicken from pan and set aside.\n\nIn the same pan, heat 1 tablespoon of oil and add mushrooms. When the mushrooms start to soften, add broccoli florets and stir-fry until the broccoli is tender. Remove cooked mushrooms and broccoli from the pan and set aside.\n\nAdd 1 tablespoon of oil to the pan and sauté garlic and ginger until fragrant. Add the remaining sauce ingredients and stir until smooth.\n\nReturn the chicken and vegetables to the saucy pan, stir until heated through.\n\nServe with hot rice or noodles.\n\nEnjoy!']

In [104]:
related_docs_indices[0]

6794

In [115]:
recvect = pickle.load(open("generated_data/tfidfvect_recipes.pkl", 'rb'))
recdoc = pickle.load(open("generated_data/full_recipedoc.pkl", 'rb'))


recfeatures = recvect.transform(recdoc)
currec = recvect.transform(rdoc)

cosine_similarities = linear_kernel(currec, recfeatures[related_docs_indices[0]]).flatten()
print(cosine_similarities)


[0.46344]


In [118]:
recdoc[related_docs_indices[0]]

'Chicken, Broccoli and Snow Pea Stir Fry chicken breast broccoli florets mushroom snow peas soy sauce chicken broth sesame oil brown rice In a large skillet with cooking spray, cook chicken cubes over medium heat until cooked through (approx. 7-10 mins).\r\n\r\nRemove from pan and cook broccoli, mushrooms, and snow peas in the same pan. Cook until veggies begin to soften (approx. 6-8 minutes). \r\n\r\nAdd the chicken broth, soy sauce, sesame oil and chicken cubes to the pan. Serve over a bed of rice.'