# Preliminaries

### Import dependencies

In [0]:
import pandas as pd
from pandas.core.common import flatten
import sklearn.metrics
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
import nltk
from nltk.corpus import stopwords
from nltk import SnowballStemmer as snow

### Load file as a dataframe

In [0]:
file = '/content/gf_recipe_data_220k.pkl'
df = pd.read_pickle(file)

In [0]:
df.shape
df = df.iloc[:30000]

### Clean the dataframe

In [0]:
# df = df.set_index('id')

In [4]:
print(df.shape)
df.head()

(30000, 2)


Unnamed: 0_level_0,name,ingredients
id,Unnamed: 1_level_1,Unnamed: 2_level_1
137739,arriba baked winter squash mexican style,"['winter squash', 'mexican seasoning', 'mixed ..."
31490,a bit different breakfast pizza,"['prepared pizza crust', 'sausage patty', 'egg..."
112140,all in the kitchen chili,"['ground beef', 'yellow onions', 'diced tomato..."
59389,alouette potatoes,"['spreadable cheese with garlic and herbs', 'n..."
44061,amish tomato ketchup for canning,"['tomato juice', 'apple cider vinegar', 'sugar..."


## TF-IDF Work

### Prework

In [0]:
# create an instance of a stemmer
stemmer = nltk.SnowballStemmer("english")

In [6]:
# run through an example of a snowball stemmer
# note -- stem might not be an actual word whereas, lemma is an actual language word
# note -- snowball more agressive than porter, less than lancaster

stemmer.stem('banana'), stemmer.stem('tomatoes'), stemmer.stem('active')

('banana', 'tomato', 'activ')

Create a function that tokenizes the recipe text.

In [0]:
def tokenize(text):
    """This function takes an input of text and returns word tokens"""
    pattern = "([a-zA-Z]+(?:'[a-z]+)?)"
    tokens = nltk.regexp_tokenize(text, pattern)
    stemmer = snow("english")
    stems = []
    for item in tokens:
        stems.append(stemmer.stem(item))
    return stems

Use a TFIDF vectorizer

In [0]:
# remove stop words: ex: 'and', tokenize function, and stemm
tfidf = sklearn.feature_extraction.text.TfidfVectorizer(stop_words='english', tokenizer=tokenize)

In [9]:
response = tfidf.fit_transform(df.ingredients)

  'stop_words.' % sorted(inconsistent))


In [0]:
df_tfidf = pd.DataFrame(response.toarray(), columns=tfidf.get_feature_names(), index=df.index)

In [11]:
df_tfidf.shape

(30000, 2303)

In [0]:
df_vectors = df_tfidf.join(df)

In [13]:
# # Uncomment to create updated data frame for the cosine similarity table
df_vectors = df_vectors.drop(['name', 'ingredients'], axis=1)
df_vectors

Unnamed: 0_level_0,absinth,absolut,accent,achiot,acid,acini,acorn,activ,ad,adobo,adzuki,agar,agav,age,aguardient,ahi,aid,aioli,aji,ajinomoto,ajmoh,ajwain,albacor,alcaparrado,alcohol,ale,aleppo,alfalfa,alfredo,allig,allspic,almond,alo,alpha,alphabet,alum,aluminum,amaranth,amaretti,amarettini,...,wip,wishbon,wonton,wood,wooden,worcestershir,worchestershir,worm,worthington,wrap,wrapper,wyler,xanthan,xylitol,y,yakisoba,yam,yeast,yellow,yellowfin,yellowtail,yoghurt,yogurt,yolk,york,yoshida,young,yucca,yukon,yvett,za'atar,zatarain,zatarian,zero,zest,zesti,zinfandel,ziploc,ziti,zucchini
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
137739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
31490,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
112140,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.208836,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
59389,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.208137,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
44061,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
151167,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
184122,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
385205,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
168390,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.224951,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Create a cosine similarity table

In [14]:
cosine_similarity_array = sklearn.metrics.pairwise.cosine_similarity(df_vectors)
df_similarity = pd.DataFrame(cosine_similarity_array, columns=df_vectors.index).set_index(df_vectors.index)
df_similarity.head()

id,137739,31490,112140,59389,44061,5289,25274,67888,70971,109439,42198,67547,107517,39959,43026,23933,8559,23850,76808,49262,48156,54272,47366,37073,44123,59952,93249,54100,67664,87098,25775,33165,38798,22123,90921,58224,33606,94710,35173,52804,...,181533,257909,113235,462825,225972,355321,252008,357996,292320,65434,264604,189024,353148,76017,121664,467417,517285,533995,128756,9497,36252,163933,85430,135112,32485,99372,322434,71025,191978,418281,222603,434980,254173,32725,160384,151167,184122,385205,168390,294499
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
137739,1.0,0.013867,0.009863,0.045322,0.069467,0.0,0.080441,0.045198,0.0,0.0,0.030156,0.009853,0.011782,0.0,0.025031,0.016311,0.022827,0.160737,0.053643,0.066728,0.0,0.011862,0.009434,0.0,0.022238,0.032213,0.107731,0.011907,0.0,0.0,0.013137,0.00853,0.0,0.073647,0.060478,0.015019,0.092144,0.033427,0.092399,0.109345,...,0.103847,0.0,0.030233,0.030384,0.031207,0.0,0.0,0.013759,0.077778,0.008221,0.035838,0.044228,0.026826,0.0,0.024888,0.052052,0.074236,0.071698,0.0,0.009829,0.0,0.044938,0.015561,0.022701,0.025126,0.016582,0.046025,0.03141,0.042776,0.05779,0.009306,0.075312,0.0,0.050031,0.02413,0.049446,0.013443,0.109328,0.053614,0.072653
31490,0.013867,1.0,0.026955,0.058699,0.038149,0.048748,0.0,0.023207,0.0,0.145752,0.04303,0.021336,0.012795,0.0,0.073707,0.017713,0.0,0.032202,0.044922,0.112311,0.012327,0.051296,0.035966,0.0,0.016706,0.050202,0.0,0.094811,0.057635,0.0,0.085371,0.095053,0.036503,0.024358,0.0,0.192092,0.040272,0.051561,0.043889,0.030524,...,0.0,0.0,0.013284,0.036901,0.048136,0.014789,0.010938,0.020187,0.024892,0.026675,0.0314,0.023727,0.053094,0.027949,0.077173,0.073821,0.031245,0.060281,0.065232,0.063872,0.025402,0.0,0.0,0.019859,0.063505,0.029012,0.013761,0.028655,0.032301,0.047785,0.040757,0.142589,0.068922,0.023499,0.031681,0.037338,0.0,0.035263,0.021072,0.02157
112140,0.009863,0.026955,1.0,0.062575,0.142969,0.0,0.0,0.194475,0.0,0.0,0.034432,0.160169,0.009101,0.0,0.071533,0.012599,0.103378,0.07827,0.088365,0.017881,0.254673,0.430797,0.252577,0.0,0.060822,0.224783,0.0,0.103521,0.0,0.225129,0.129049,0.058771,0.041733,0.174528,0.008208,0.181848,0.142418,0.066604,0.057146,0.071495,...,0.0,0.512227,0.213204,0.29301,0.225711,0.101068,0.147,0.332864,0.075058,0.162414,0.024565,0.074099,0.14315,0.137965,0.108292,0.118071,0.046801,0.067314,0.124086,0.05685,0.080372,0.114669,0.0,0.12395,0.015635,0.020636,0.0,0.161815,0.013879,0.024365,0.028991,0.130052,0.148892,0.008382,0.01654,0.157426,0.025503,0.027936,0.106933,0.078062
59389,0.045322,0.058699,0.062575,1.0,0.135763,0.0,0.112377,0.094459,0.0,0.0,0.0,0.040694,0.009071,0.054293,0.039129,0.012557,0.017573,0.288399,0.096169,0.070967,0.026216,0.149513,0.066006,0.166567,0.047091,0.083902,0.0,0.036502,0.0,0.037112,0.055611,0.141181,0.014464,0.059201,0.019567,0.145748,0.118504,0.109949,0.031114,0.062949,...,0.0,0.071917,0.036295,0.056914,0.081193,0.010484,0.139657,0.134577,0.101045,0.202339,0.076314,0.157738,0.177242,0.019814,0.045786,0.259971,0.066099,0.132595,0.030599,0.164966,0.018008,0.077402,0.023235,0.014079,0.015583,0.020567,0.127763,0.279792,0.210055,0.101242,0.038799,0.062812,0.016912,0.122172,0.111706,0.100075,0.0598,0.076804,0.194447,0.030129
44061,0.069467,0.038149,0.142969,0.135763,1.0,0.237481,0.07265,0.22701,0.0,0.0,0.037341,0.101126,0.043722,0.0,0.06907,0.022505,0.062988,0.071106,0.057074,0.13461,0.015661,0.104065,0.091436,0.0,0.13423,0.075958,0.066214,0.138048,0.056757,0.038939,0.036142,0.123851,0.0,0.076624,0.03616,0.154141,0.204373,0.092128,0.0,0.112568,...,0.0,0.08337,0.182278,0.058368,0.124693,0.0,0.044943,0.094275,0.012522,0.059077,0.143989,0.233899,0.197999,0.051997,0.018131,0.205955,0.104069,0.10965,0.050205,0.063034,0.047257,0.21215,0.130699,0.036946,0.110043,0.0,0.108997,0.09571,0.064358,0.170369,0.07037,0.040822,0.0,0.120104,0.115648,0.203964,0.130723,0.17476,0.149497,0.053182


# User interaction

### Stem the users input

In [0]:
user_response = 'acorn'
user_response_stem = stemmer.stem(user_response)
# user_response_stem

### Get the primary recipes 
(Those recipies that have a similarity greater than 0 (not 1) because of the tf-idf method.)

In [0]:
df_primary_recipes = df_tfidf[df_tfidf.get(user_response_stem) > 0]
# df_primary_recipes.index

In [0]:
# df_primary_recipes = df_primary_recipes.sample(2)

In [147]:
# Make a list of the ids of the primary recipes

list_primary_recipes = df_primary_recipes.index.tolist()
print(f'There are {len(list_primary_recipes)} primary recipies.')

There are 84 primary recipies.


### Get the secondary recipies

In [0]:
# lookup each primary recipe against all other recipes in the similarity matrix
# list_secondary = []

# for id_ in list_primary_recipes:    
#     for column_id in df_similarity.columns:
        
#         # check if the similarity is between values   
#           if df_similarity.loc[id_,  column_id] > .5 \
#             and df_similarity.loc[id_, column_id] < .99:
            
      

#             #for the print statement
#             primary = df.loc[id_, 'name']
#             similar = df.loc[column_id, 'name']
#             value = df_similarity.loc[id_,  column_id]
            
#             print(f"Has {user_response}: {primary} id: {id_} \nSimilar recipie: {similar} id: {column_id} value: {value}\n")

            

In [0]:
#### WORK IN THIS CELL
#######################
#######################

# lookup each primary recipe against all other recipes in the similarity matrix

list_secondary = []

for id_ in list_primary_recipes:   
    for column_id in df_similarity.columns:
      
      # check if the similarity is between values   
        if df_similarity.loc[id_,  column_id] > .6 \
          and df_similarity.loc[id_, column_id] <= .99: #do not want the same recipe
          
          value = df_similarity.loc[id_,  column_id]
          list_secondary.append([id_, column_id, value])



In [149]:
pri_sec_values_df = pd.DataFrame(list_secondary, columns=['primary', 'secondary', 'cs_value'])
pri_sec_values_df = pri_sec_values_df.sort_values('cs_value')
pri_sec_values_df

Unnamed: 0,primary,secondary,cs_value
146,325146,142269,0.600058
28,142269,325146,0.600058
43,31538,152166,0.600241
82,170494,3805,0.600519
164,3805,170494,0.600519
...,...,...,...
84,170494,46873,0.923846
285,365452,8562,0.930873
238,8562,365452,0.930873
102,78296,81357,0.989639


In [151]:
pri_sec_values_df_sample = pri_sec_values_df.tail(5)
pri_sec_values_df_sample

Unnamed: 0,primary,secondary,cs_value
84,170494,46873,0.923846
285,365452,8562,0.930873
238,8562,365452,0.930873
102,78296,81357,0.989639
101,81357,78296,0.989639


In [152]:
for i,r in pri_sec_values_df_sample.iterrows():
  primary = pri_sec_values_df_sample.loc[i, 'primary']
  similar = pri_sec_values_df_sample.loc[i, 'secondary']
  value = pri_sec_values_df_sample.loc[i,  'cs_value']
  
  print(f"\nhttps://www.food.com/recipe/{similar} \nwith value: {value}\n ")



https://www.food.com/recipe/46873 
with value: 0.923846230216271
 

https://www.food.com/recipe/8562 
with value: 0.9308732497778829
 

https://www.food.com/recipe/365452 
with value: 0.9308732497778829
 

https://www.food.com/recipe/81357 
with value: 0.9896386687570717
 

https://www.food.com/recipe/78296 
with value: 0.9896386687570717
 


In [60]:
### Goal: to have a diverse set

farthest = pri_sec_values_df.loc[pri_sec_values_df.cs_value.idxmin(), 'secondary']

closest = pri_sec_values_df.loc[pri_sec_values_df.cs_value.idxmax(), 'secondary']

farthest

132856

In [61]:
print(f'https://www.food.com/recipe/{farthest}\nhttps://www.food.com/recipe/{closest}')

https://www.food.com/recipe/132856
https://www.food.com/recipe/31538


## Multiple entries


In [212]:
user_response = 'strawberries chocolate blueberry'
user_response_list = user_response.split(' ')

user_response_stem_list = []

for item in user_response_list:
  user_response_stem = stemmer.stem(item)
  user_response_stem_list.append(user_response_stem)

user_response_stem_list

['strawberri', 'chocol', 'blueberri']

In [0]:
df_recipes_1 = df_vectors[df_vectors.get(user_response_stem_list[0]) > 0]
df_recipes_2 = df_vectors[df_vectors.get(user_response_stem_list[1]) > 0]
df_recipes_3 = df_vectors[df_vectors.get(user_response_stem_list[2]) > 0]



In [0]:
df_recipes_a = df_recipes_1.merge(df_recipes_2, on='id')
df_recipes_both = df_recipes_a.merge(df_recipes_3, on='id')
df_recipes_both_list = df_recipes_both.index.tolist()

In [209]:
df_recipes_both_list

[317896, 314440]