# Term Project II

The main task is to build a personal food recipe recommendation web-application. The followings are its main features

- The user can search for recipe by name.
- The user can search for recipe by ingredients.
- The user receive the suggestion recipes she/he maybe interested in.

In [1]:
import pandas as pd

from sklearn import preprocessing
from sklearn import neighbors
from sklearn import tree
from sklearn import naive_bayes
from sklearn import model_selection
from sklearn import metrics
from sklearn.feature_extraction.text import TfidfVectorizer

from scipy import sparse

import numpy as np
from numpy import hstack

import multiprocessing as mp

import string

import json
import re

import nltk
from nltk import ngrams
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from sklearn.metrics.pairwise import cosine_similarity

from spellchecker import SpellChecker
import os
from pathlib import Path

## Sample data

In [2]:
food = pd.read_csv('food.csv')
food.drop('Unnamed: 0',axis='columns',inplace=True)
food.drop('Ingredients',axis='columns',inplace=True)
food.head()

Unnamed: 0,Title,Instructions,Image_Name,Cleaned_Ingredients
0,Miso-Butter Roast Chicken With Acorn Squash Pa...,"Pat chicken dry with paper towels, season all ...",miso-butter-roast-chicken-acorn-squash-panzanella,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher..."
1,Crispy Salt and Pepper Potatoes,Preheat oven to 400°F and line a rimmed baking...,crispy-salt-and-pepper-potatoes-dan-kluger,"['2 large egg whites', '1 pound new potatoes (..."
2,Thanksgiving Mac and Cheese,Place a rack in middle of oven; preheat to 400...,thanksgiving-mac-and-cheese-erick-williams,"['1 cup evaporated milk', '1 cup whole milk', ..."
3,Italian Sausage and Bread Stuffing,Preheat oven to 350°F with rack in middle. Gen...,italian-sausage-and-bread-stuffing-240559,"['1 (¾- to 1-pound) round Italian loaf, cut in..."
4,Newton's Law,Stir together brown sugar and hot water in a c...,newtons-law-apple-bourbon-cocktail,"['1 teaspoon dark brown sugar', '1 teaspoon ho..."


In [5]:
title = food.iloc[0]['Title']
print(title)

Miso-Butter Roast Chicken With Acorn Squash Panzanella


In [4]:
check = food.iloc[0]["Cleaned_Ingredients"]
check = check.strip('][').split(',')
for i in range(1,len(check),5):
    print(check[i])

 '2¾ tsp. kosher salt
 '1 Tbsp. finely chopped rosemary'
 'Pinch of crushed red pepper flakes'
 cored
 '3 Tbsp. apple cider vinegar'
 '¼ cup dry white wine'


In [5]:
instru = ingre = food.iloc[0]['Instructions'].split('.')
for i in range(1,len(instru),5) :
    print(instru[i])

 salt, and tie legs together with kitchen twine

Combine sage, rosemary, and 6 Tbsp
 salt to remaining herb butter in bowl; season with black pepper and toss to combine
 Mix miso and 3 Tbsp
) Let chicken rest in skillet at least 5 minutes, then transfer to a plate; reserve skillet
 Transfer to a serving dish
) Add wine and cook, stirring often and scraping up any browned bits with a wooden spoon, until bits are loosened and wine is reduced by about half (you should be able to smell the wine), about 2 minutes

Serve chicken with gravy and squash panzanella alongside


In [6]:
image_name = food.iloc[0]['Image_Name']
print(image_name)

miso-butter-roast-chicken-acorn-squash-panzanella


![somehting](./food_image/miso-butter-roast-chicken-acorn-squash-panzanella.JPG)

# Spell Checker Feature

In [7]:
context = Path("eng-simple_wikipedia_2021_300K-sentences.txt").read_text("utf-8")
context = re.sub('[^A-Za-z]'," ",context)
context = " ".join(context.split())
context = context.lower()

list_of_word = context.split(" ")

In [10]:
spell = SpellChecker()

spell.word_frequency.load_words(list_of_word)

word = "Miso Buter Roats Chickken Wiht Acon Squah Panzanella"

misspelled = spell.unknown(word.split(" "))

if len(misspelled) != 0 :
    for word in misspelled:
        print(spell.correction(word),end=' ')
#         candidate_list = [i for i in spell.candidates(word)]
#         print(candidate_list)
#         print(spell.candidates(word))
else:
    print("no mispelling")

squad outer roads con chicken manzanilla 

# Search Feature

# Prepre Search Feature

- tf-idf
- bm25
- elastic search

In [11]:
vectorizer = TfidfVectorizer(ngram_range=(1,3))
X = vectorizer.fit_transform(food["Title"].astype('U'))

## Prepare TF-IDF

In [19]:
def serach_by_tf_idf(query="sweet"):
    query_vec = vectorizer.transform([query])
    results = cosine_similarity(X,query_vec).reshape((-1,))
    return results.argsort()[-5:][::-1]

In [20]:
serach_by_tf_idf()

array([ 7713,  7987,  8899, 11648, 13142], dtype=int64)

## Prepare BM25

In [21]:
class BM25(object):
    def __init__(self, b=0.75, k1=1.6):
        self.vectorizer = TfidfVectorizer(norm=None, smooth_idf=False,ngram_range=(1,3))
        self.b = b
        self.k1 = k1

    def fit(self, X):
        self.vectorizer.fit(X)
        y = super(TfidfVectorizer, self.vectorizer).transform(X)
        self.avdl = y.sum(1).mean()

    def transform(self, q, X):
        b, k1, avdl = self.b, self.k1, self.avdl

        X = super(TfidfVectorizer, self.vectorizer).transform(X)
        len_X = X.sum(1).A1
        q, = super(TfidfVectorizer, self.vectorizer).transform([q])
        assert sparse.isspmatrix_csr(q)

        X = X.tocsc()[:, q.indices]
        denom = X + (k1 * (1 - b + b * len_X / avdl))[:, None]
        idf = self.vectorizer._tfidf.idf_[None, q.indices] - 1.
        numer = X.multiply(np.broadcast_to(idf, X.shape)) * (k1 + 1)                                                          
        return (numer / denom).sum(1).A1

In [22]:
bm25 = BM25()
bm25.fit(food["Title"].astype('U'))

In [23]:
def search_by_bm25(query):
    result = bm25.transform(query,food["Title"].astype('U'))
    return result.argsort()[-5:][::-1]

## Search by Food Title ( by name) Feature

In [24]:
for i in serach_by_tf_idf("sweet english tea") :
    print(food.iloc[i]['Title'])

Sweet Tea
English Cookies
English Pancakes
Sweet Peach Tea
Mint Tea


In [25]:
for i in search_by_bm25("taco kabub") :
    print(food.iloc[i]['Title'])

Fish Taco Platter
Guacamole Taquero: Taco-Shop Guacamole
Guajillo-Braised Beef Short Rib Taco
Butternut Squash, Kale, and Crunchy Pepitas Taco
Mushroom, Rajas, and Corn Taco with Queso Fresco


## Search By Ingredient Feature

### Check answer

In [26]:
ingre = food[food.Title == "Crock Pot Ribs"]["Cleaned_Ingredients"]
print(ingre)

3883    ['1 3–4 pound country style pork ribs, also kn...
Name: Cleaned_Ingredients, dtype: object


In [27]:
check = food.iloc[3883]["Cleaned_Ingredients"].split(',')
check[0] = check[0].replace('[','')
check[-1] = check[-1].replace(']','')
for i in check :
    print(i)

'1 3–4 pound country style pork ribs
 also known as baby back ribs (about 2 racks)'
 '1 teaspoon salt'
 '1 cup bbq sauce plus additional for serving'


In [28]:
bm25_ingr = BM25()
bm25_ingr.fit(food["Cleaned_Ingredients"].astype('U'))

In [29]:
vectorizer = TfidfVectorizer(ngram_range=(1,3))
X_ingr = vectorizer.fit_transform(food["Cleaned_Ingredients"].astype('U'))

In [30]:
def serach_ingre_tf_idf(query):
    query_vec = vectorizer.transform([query])
    results = cosine_similarity(X_ingr,query_vec).reshape((-1,))
    return results.argsort()[-5:][::-1]

In [31]:
def search_ingre_bm25(query):
    result = bm25.transform(query,food["Cleaned_Ingredients"].astype('U'))
    return result.argsort()[-5:][::-1]

In [32]:
for i in serach_ingre_tf_idf("pork") :
    print(food.iloc[i]['Title'])

Cassoulet in the Style of Toulouse (Cassoulet de Toulouse)
James Beard's Roasted Spareribs
Porchetta
Carnitas: Braised and Fried Pork
Slow-Cooked Carnitas Tacos


In [33]:
for i in search_ingre_bm25("pork") :
    print(food.iloc[i]['Title'])

James Beard's Roasted Spareribs
Chicharrón Casera
Dan Barber's Pork Belly
Porchetta
Grilled Pork Tenderloin with Molasses and Mustard


In [43]:
# Return data

In [34]:
food_ingredients = food 

In [35]:
    food_ingredients = food_ingredients.rename(columns={'Unnamed: 0' : 'id'})
#     food_ingredients.drop('Ingredients',axis='columns',inplace=True)
    food_ingredients["Cleaned_Ingredients"] = food_ingredients["Cleaned_Ingredients"] .map(lambda x: str(x).lstrip('[').rstrip(']'))

In [36]:
food_ingredients

Unnamed: 0,Title,Instructions,Image_Name,Cleaned_Ingredients
0,Miso-Butter Roast Chicken With Acorn Squash Pa...,"Pat chicken dry with paper towels, season all ...",miso-butter-roast-chicken-acorn-squash-panzanella,"'1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher ..."
1,Crispy Salt and Pepper Potatoes,Preheat oven to 400°F and line a rimmed baking...,crispy-salt-and-pepper-potatoes-dan-kluger,"'2 large egg whites', '1 pound new potatoes (a..."
2,Thanksgiving Mac and Cheese,Place a rack in middle of oven; preheat to 400...,thanksgiving-mac-and-cheese-erick-williams,"'1 cup evaporated milk', '1 cup whole milk', '..."
3,Italian Sausage and Bread Stuffing,Preheat oven to 350°F with rack in middle. Gen...,italian-sausage-and-bread-stuffing-240559,"'1 (¾- to 1-pound) round Italian loaf, cut int..."
4,Newton's Law,Stir together brown sugar and hot water in a c...,newtons-law-apple-bourbon-cocktail,"'1 teaspoon dark brown sugar', '1 teaspoon hot..."
...,...,...,...,...
13496,Brownie Pudding Cake,Preheat the oven to 350°F. Into a bowl sift to...,brownie-pudding-cake-14408,"'1 cup all-purpose flour', '2/3 cup unsweetene..."
13497,Israeli Couscous with Roasted Butternut Squash...,Preheat oven to 475°F.\nHalve lemons and scoop...,israeli-couscous-with-roasted-butternut-squash...,"'1 preserved lemon', '1 1/2 pound butternut sq..."
13498,Rice with Soy-Glazed Bonito Flakes and Sesame ...,"If using katsuo bushi flakes from package, moi...",rice-with-soy-glazed-bonito-flakes-and-sesame-...,'Leftover katsuo bushi (dried bonito flakes) f...
13499,Spanakopita,Melt 1 tablespoon butter in a 12-inch heavy sk...,spanakopita-107344,'1 stick (1/2 cup) plus 1 tablespoon unsalted ...


In [39]:
return_data = []
for i in range(len(food_ingredients)):
    data = {
        'id':i,
        'Title':food_ingredients.iloc[i].to_dict()["Title"],
        'Instructions':[x for x in str(food_ingredients.iloc[i].to_dict()["Instructions"]).split('.')],
        'Image_Name':food_ingredients.iloc[i].to_dict()["Image_Name"],
        'Ingredients':[x for x in food_ingredients.iloc[i].to_dict()["Cleaned_Ingredients"].split(',')],

    }
    return_data.append(json.dumps(data)) 
# return_data[:5]

In [40]:
return_data[0]

'{"id": 0, "Title": "Miso-Butter Roast Chicken With Acorn Squash Panzanella", "Instructions": ["Pat chicken dry with paper towels, season all over with 2 tsp", " salt, and tie legs together with kitchen twine", " Let sit at room temperature 1 hour", "\\nMeanwhile, halve squash and scoop out seeds", " Run a vegetable peeler along ridges of squash halves to remove skin", " Cut each half into \\u00bd\\"-thick wedges; arrange on a rimmed baking sheet", "\\nCombine sage, rosemary, and 6 Tbsp", " melted butter in a large bowl; pour half of mixture over squash on baking sheet", " Sprinkle squash with allspice, red pepper flakes, and \\u00bd tsp", " salt and season with black pepper; toss to coat", "\\nAdd bread, apples, oil, and \\u00bc tsp", " salt to remaining herb butter in bowl; season with black pepper and toss to combine", " Set aside", "\\nPlace onion and vinegar in a small bowl; season with salt and toss to coat", " Let sit, tossing occasionally, until ready to serve", "\\nPlace a rac

# Try search by tf idf and spell checking 


In [41]:
import ast

In [42]:
def search_by_name_tfidf(query="Misoo butterr chickken"):
    misspelled = spell.unknown(query.split(" "))
    if len(misspelled)==0:
        searched_data = serach_by_tf_idf(query)[:10]
        for i in searched_data:
            print(ast.literal_eval(return_data[i])["Title"])
    else:
        print("did you mean",end=" ")
        for word in misspelled:
            print(spell.correction(word),end=" ")
       

In [43]:
search_by_name_tfidf()

did you mean butter chicken miso 

# search by ingredient with tfidf with spell checking

In [46]:
def search_by_ingre_tfidf(query="chicken"):
    misspelled = spell.unknown(query.split(" "))
    if len(misspelled)==0:
#         vectorizer = TfidfVectorizer(ngram_range=(1,3))
#         X_ingr = vectorizer.fit_transform(food["Cleaned_Ingredients"].astype('U'))
        searched_data = serach_ingre_tf_idf(query)[:2]
        for i in searched_data:
            print(return_data[i])
    else:
        print("did you mean",end=" ")
        for word in misspelled:
            print(spell.correction(word),end=" ")

In [47]:
search_by_ingre_tfidf()

{"id": 2599, "Title": "Fully Salted Roast Chicken", "Instructions": ["Pat dry chicken with paper towels", " Holding a 5-finger pinch of salt and positioning your hand at least 10\" above chicken, evenly rain salt down over chicken", " Repeat with several more pinches of salt until both inside and outside of chicken are covered with salt (be generous and make sure to get inside all the nooks and crannies)", " Place chicken on a wire rack set inside a rimmed baking sheet and chill uncovered at least 8 hours and up to 1 day", "\nLet sit at room temperature 1 hour", "\nPreheat oven to 425\u00b0F", " Roast chicken until an instant-read thermometer inserted into the thickest part of chicken thigh registers 165\u00b0F, 40\u201355 minutes", " Transfer chicken to a cutting board and let rest 15 minutes before carving", ""], "Image_Name": "fully-salted-roast-chicken", "Ingredients": ["'1 (3 1/2\u20134-lb.) chicken'", " 'Kosher salt'"]}
{"id": 1398, "Title": "Lentil and Chicken Soup with Sweet Po

# SEARCH by Elasticsearch

In [167]:
from elasticsearch import Elasticsearch


In [48]:
es = Elasticsearch(HOST="http://localhost", PORT=9200)
es = Elasticsearch()

# The Suggestion System using machine learning