# Simple similarity search

## Data reading

In [3]:
import pandas as pd

data = pd.read_csv('../data/RAW_recipes.csv')
data.head()

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
0,arriba baked winter squash mexican style,137739,55,47892,2005-09-16,"['60-minutes-or-less', 'time-to-make', 'course...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",7
1,a bit different breakfast pizza,31490,30,26278,2002-06-17,"['30-minutes-or-less', 'time-to-make', 'course...","[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"['prepared pizza crust', 'sausage patty', 'egg...",6
2,all in the kitchen chili,112140,130,196586,2005-02-25,"['time-to-make', 'course', 'preparation', 'mai...","[269.8, 22.0, 32.0, 48.0, 39.0, 27.0, 5.0]",6,"['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,"['ground beef', 'yellow onions', 'diced tomato...",13
3,alouette potatoes,59389,45,68585,2003-04-14,"['60-minutes-or-less', 'time-to-make', 'course...","[368.1, 17.0, 10.0, 2.0, 14.0, 8.0, 20.0]",11,['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...","['spreadable cheese with garlic and herbs', 'n...",11
4,amish tomato ketchup for canning,44061,190,41706,2002-10-25,"['weeknight', 'time-to-make', 'course', 'main-...","[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"['tomato juice', 'apple cider vinegar', 'sugar...",8


The data was cleaned with following pipeline:
```py
def clean_text(text):
    # Convert text to lowercase
    text = text.lower()

    # Remove punctuation
    text = text.translate(str.maketrans('', '', string.punctuation))

    # Tokenize the text
    tokens = text.split()

    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]

    # Lemmatize the tokens
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(word) for word in tokens]

    # Join the tokens back into a single string
    cleaned_text = ' '.join(tokens)

    return cleaned_text
```

## First query

query: chicken with creamy sauce
result:

### TFIDF + FAISS

In [20]:
results = [43291, 44435, 81669, 184585, 62393, 98489, 62915, 46373, 62733, 61581]
data.loc[results]

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
43291,chicken breast hot spicy,18607,30,30139,2002-01-31,"['30-minutes-or-less', 'time-to-make', 'course...","[442.2, 46.0, 8.0, 75.0, 68.0, 67.0, 2.0]",8,"['squeeze the lime juice into a bowl , add hot...",chicken breast,"['chicken breasts', 'hot sauce', 'mustard', 'b...",7
44435,chicken in creamy pan sauce,287625,30,333250,2008-02-22,"['30-minutes-or-less', 'time-to-make', 'course...","[276.0, 23.0, 0.0, 12.0, 59.0, 35.0, 1.0]",18,['place chicken inside a resealable plastic ba...,i got this recipe from kraft foods magazine an...,"['boneless skinless chicken breast', 'flour', ...",7
81669,fajita salad with creamy cilantro lime sauce,115346,30,97825,2005-04-05,"['30-minutes-or-less', 'time-to-make', 'course...","[531.3, 29.0, 36.0, 34.0, 83.0, 19.0, 16.0]",12,['combine all ingredients for cilantro-lime sa...,great for summer salad recipe from cooking light,"['olive oil', 'ground cumin', 'paprika', 'chil...",22
184585,sharon s creamy chile chicken,187102,25,165623,2006-09-20,"['30-minutes-or-less', 'time-to-make', 'course...","[413.4, 33.0, 7.0, 29.0, 68.0, 61.0, 6.0]",10,"['cook chicken in butter', 'set aside , keepin...",this is just dreamy creamy ... thanks sharon g!!,"['boneless skinless chicken breasts', 'butter'...",9
62393,creamy buffalo chicken pasta,229231,35,487279,2007-05-21,"['60-minutes-or-less', 'time-to-make', 'main-i...","[385.2, 43.0, 9.0, 18.0, 52.0, 33.0, 2.0]",9,['cut chicken breast into bite-size pieces and...,"your chicken, your hotness and your bleu chees...","['chicken breast', 'paprika', 'garlic powder',...",10
98489,grilled chicken alfredo,503258,30,2891694,2013-07-06,"['30-minutes-or-less', 'time-to-make', 'main-i...","[770.6, 39.0, 17.0, 28.0, 93.0, 51.0, 29.0]",6,['marinate chicken in italian dressing for at ...,grilled chicken on top of a bed of fettuchini ...,"['cream cheese', 'milk', 'ground pepper', 'gar...",9
62915,creamy curry chicken,252894,31,587294,2007-09-13,"['curries', '60-minutes-or-less', 'time-to-mak...","[417.7, 48.0, 19.0, 20.0, 46.0, 85.0, 3.0]",21,"['directions:', 'for the chicken marinade , mi...",creamy curry sauce with chicken breast pieces....,"['boneless skinless chicken breast half', 'yog...",17
46373,chicken with creamy green onion sauce,291657,20,644191,2008-03-13,"['30-minutes-or-less', 'time-to-make', 'course...","[513.8, 47.0, 13.0, 23.0, 98.0, 63.0, 2.0]",10,"['melt butter over med', 'high heat', 'stir in...","this creamy, smooth and tangy sauce is a hit i...","['chicken breasts', 'butter', 'all-purpose flo...",9
62733,creamy chipotle chili sauce,217439,30,104018,2007-03-18,"['30-minutes-or-less', 'time-to-make', 'course...","[92.0, 12.0, 6.0, 5.0, 1.0, 11.0, 1.0]",2,"['mix all of the ingredients in a small bowl',...",creamy chipotle chili sauce,"['mayonnaise', 'sour cream', 'chipotle chiles ...",6
61581,cream cheese sauce,181346,20,341170,2006-08-10,"['30-minutes-or-less', 'time-to-make', 'course...","[137.2, 20.0, 3.0, 5.0, 4.0, 39.0, 0.0]",4,['melt together cream cheese and butter in a s...,a warm creamy sauce that's perfect over pasta ...,"['cream cheese', 'butter', 'milk', 'garlic']",4


### BM25

In [17]:
results_bm25 = [46373, 78734, 76112, 62393]
data.loc[results_bm25]

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
46373,chicken with creamy green onion sauce,291657,20,644191,2008-03-13,"['30-minutes-or-less', 'time-to-make', 'course...","[513.8, 47.0, 13.0, 23.0, 98.0, 63.0, 2.0]",10,"['melt butter over med', 'high heat', 'stir in...","this creamy, smooth and tangy sauce is a hit i...","['chicken breasts', 'butter', 'all-purpose flo...",9
78734,easy yummy creamy chicken tetrazzini,469300,30,1726293,2011-12-01,"['30-minutes-or-less', 'time-to-make', 'course...","[351.2, 27.0, 6.0, 31.0, 47.0, 34.0, 7.0]",9,"['cook noodles as directed on box', 'partially...","easy, creamy and yummy!","['cream of mushroom soup', 'chicken breasts', ...",7
76112,easy creamy lemon chicken,124344,20,219874,2005-06-01,"['30-minutes-or-less', 'time-to-make', 'course...","[398.6, 17.0, 4.0, 47.0, 116.0, 15.0, 4.0]",5,"['cook chicken breasts on grill', ""if you don'...",a variation of the campbell's soup creamy lemo...,"['boneless skinless chicken breasts', 'cream o...",5
62393,creamy buffalo chicken pasta,229231,35,487279,2007-05-21,"['60-minutes-or-less', 'time-to-make', 'main-i...","[385.2, 43.0, 9.0, 18.0, 52.0, 33.0, 2.0]",9,['cut chicken breast into bite-size pieces and...,"your chicken, your hotness and your bleu chees...","['chicken breast', 'paprika', 'garlic powder',...",10


## Second query

query: poultry meat in mediterranean style
(pol: drób w stylu śródziemnomorskim)

In [21]:
""" 
Results:
                                                     name      id  ...  n_ingredients                                           combined
161580                                      please ignore  409347  ...              2      please ignore construction please ignore none
116548  kariokor nyama ya kuchoma  barbecued meat rub ...  307071  ...              7  kariokor nyama ya kuchoma barbecued meat rub k...
77685                        easy pot roast   asian style   58794  ...              8  easy pot roast asian style mom used make dish ...
190335                       smothered nutria cajun style  418543  ...              8  smothered nutria cajun style nutria also calle...
223958  waldorf style chicken saladwaldorf style chick...   24924  ...              7  waldorf style chicken saladwaldorf style chick...
74198                dzik  yucatecan style salpicn de res  248709  ...             14  dzik yucatecan style salpicn de re incredibly ...
20776               beef style seasoning or chicken style  486189  ...              9  beef style seasoning chicken style asked recip...
163451                                        pork madras  205317  ...             14  pork madras fairly simple tasty version indian...
230024                                    yummy meat cake  415408  ...              7  yummy meat cake meat cake combined meatloaf st...
162876                pork barbecue  north carolina style  174305  ...             11  pork barbecue north carolina style pork barbec..."""

results_tfidf = [161580, 116548, 77685, 190335, 223958, 74198, 20776, 163451, 230024, 162876]
data.loc[results_tfidf]

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
161580,please ignore,409347,7,35635,2010-01-19,"['15-minutes-or-less', 'time-to-make', 'course...","[73.5, 7.0, 1.0, 2.0, 12.0, 7.0, 0.0]",1,['none'],"under construction, please ignore","['egg', 'water']",2
116548,kariokor nyama ya kuchoma barbecued meat rub ...,307071,5,461834,2008-06-03,"['15-minutes-or-less', 'time-to-make', 'course...","[161.1, 1.0, 105.0, 2358.0, 5.0, 1.0, 13.0]",2,"['thoroughly combine all ingredients', 'rub on...","barbecued meat, nairobi market style (kenya)\r...","['coarse salt', 'coarse black pepper', 'crushe...",7
77685,easy pot roast asian style,58794,225,59307,2003-04-08,"['weeknight', 'time-to-make', 'course', 'main-...","[932.0, 109.0, 2.0, 28.0, 127.0, 132.0, 1.0]",9,['season meat with a little soy sauce and garl...,my mom used to make this dish all the time. it...,"['pot roast', 'oil', 'soy sauce', 'garlic powd...",8
190335,smothered nutria cajun style,418543,60,64642,2010-03-30,"['weeknight', '60-minutes-or-less', 'time-to-m...","[187.7, 14.0, 31.0, 13.0, 14.0, 8.0, 6.0]",7,"['heat oil in stockpot until very hot', 'sprin...",the nutria (also called ragondin) is a fur bea...,"['vegetable oil', 'nutria', 'cajun seasoning',...",8
223958,waldorf style chicken saladwaldorf style chick...,24924,70,6164,2002-04-11,"['weeknight', 'time-to-make', 'course', 'main-...","[69.5, 5.0, 28.0, 1.0, 2.0, 1.0, 3.0]",3,"['mix together all ingredients', 'cover and re...","mix, refrigerate and eat...!","['chicken', 'mayonnaise', 'apple', 'raisins', ...",7
74198,dzik yucatecan style salpicn de res,248709,140,128945,2007-08-25,"['time-to-make', 'course', 'main-ingredient', ...","[576.2, 52.0, 20.0, 6.0, 103.0, 50.0, 5.0]",12,['the meat: bring 3 cups water to a boil in a ...,this is incredibly delicious and very authenti...,"['flank steaks', 'garlic clove', 'bay leaves',...",14
20776,beef style seasoning or chicken style,486189,5,37449,2012-08-30,"['15-minutes-or-less', 'time-to-make', 'prepar...","[1126.6, 39.0, 6.0, 299.0, 267.0, 16.0, 50.0]",3,['mix all together and keep in a jar with a ti...,i was asked for a recipe for beef style season...,"['nutritional yeast', 'onion powder', 'salt', ...",9
163451,pork madras,205317,150,330621,2007-01-14,"['curries', 'time-to-make', 'course', 'main-in...","[844.0, 104.0, 33.0, 20.0, 86.0, 174.0, 5.0]",11,['mix together all the spices and add a few sp...,a fairly simple and tasty version of the india...,"['country-style pork ribs', 'chili powder', 'g...",14
230024,yummy meat cake,415408,55,524700,2010-03-04,"['60-minutes-or-less', 'time-to-make', 'course...","[622.8, 57.0, 7.0, 45.0, 56.0, 47.0, 14.0]",5,"['mix flour & baking powder in a bowl', 'add o...",meat & cake combined meatloaf style,"['ground beef', 'egg', 'onion soup mix', 'oil'...",7
162876,pork barbecue north carolina style,174305,390,289066,2006-06-21,"['course', 'main-ingredient', 'cuisine', 'nort...","[566.7, 56.0, 44.0, 32.0, 82.0, 62.0, 5.0]",12,"['put roast in roasting pan', 'combine remaini...","pork barbecue, north carolina style","['boston butt', 'onion', 'water', 'vinegar', '...",11
