# Recipe and Ingredient Recommendation system

In [None]:
import numpy as np
import pandas as pd
import sklearn
import matplotlib.pyplot as plt
import seaborn as sns

# Import dataset

In [None]:
data = pd.read_csv('https://raw.githubusercontent.com/shaansubbaiah/allrecipes-scraper/main/export/scraped-07-05-21.csv')
data.head()

In [None]:
import sys

sys.path.insert(0, "C:/Users/melen/Documents/DS Bootcamp/_functions_")
from Functions_EDA import *

In [None]:
data.columns

In [None]:
data.info()

In [None]:
percentage_nullValues(data)

In [None]:
data_ = data.copy()
data_.head()

In [None]:
data_.columns

In [None]:
data_ = data_.drop(['calories', 'carbohydrates_g',
       'sugars_g', 'fat_g', 'saturated_fat_g', 'cholesterol_mg', 'protein_g',
       'dietary_fiber_g', 'sodium_mg', 'calories_from_fat', 'calcium_mg',
       'iron_mg', 'magnesium_mg', 'potassium_mg', 'zinc_mg', 'phosphorus_mg',
       'vitamin_a_iu_IU', 'niacin_equivalents_mg', 'vitamin_b6_mg',
       'vitamin_c_mg', 'folate_mcg', 'thiamin_mg', 'riboflavin_mg',
       'vitamin_e_iu_IU', 'vitamin_k_mcg', 'biotin_mcg', 'vitamin_b12_mcg',
       'mono_fat_g', 'poly_fat_g', 'trans_fatty_acid_g',
       'omega_3_fatty_acid_g', 'omega_6_fatty_acid_g'], axis=1)
data_

In [None]:
percentage_nullValues(data_)

In [None]:
data_ = data_.drop(["cook","prep","total","author"], axis=1)

In [None]:
percentage_nullValues(data_)

## Save dataset to csv

In [None]:
data_.to_csv(
    r"C:\Users\melen\Documents\DS Bootcamp\data_recipes.csv",
    index=None,
    header=True)

In [None]:
data_ = pd.read_csv(r"C:\Users\melen\Documents\DS Bootcamp\data_recipes.csv")

In [None]:
data_.info()

In [None]:
data_.tail()

Checking duplicates - there are some dishes for which there are multiple recipes, however I will keep both. 

# EDA & Data Cleaning

In [None]:
import sys

sys.path.insert(0, "C:/Users/melen/Documents/DS Bootcamp/_functions_")
from fx_NLP import *

## Name

In [None]:
data_.name.duplicated().sum()

In [None]:
data_.name.value_counts()

In [None]:
data_[data_["name"] == "Roasted Cauliflower Soup"]

In [None]:
data_[data_["name"] == "Turkey Sloppy Joes"]

In [None]:
from nltk.corpus import stopwords

stop_words = stopwords.words('english')
data_['name_cl'] = data_['name'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop_words)]))

## Category

In [None]:
data_.category.value_counts()

Replacing the two numerical values with "uncategorized"

In [None]:
data_[(data_["category"] == "515") | (data_["category"] == "251")]

Changing the categories that include a few recipes to "uncategorized"

In [None]:
data_["category"] = data_["category"].str.replace("515", "uncategorized")

In [None]:
data_.category.value_counts()

In [None]:
data_["category"] = data_["category"].str.replace("251", "uncategorized")

In [None]:
data_.category.value_counts()

In [None]:
data_["category"] = data_["category"].str.replace("-", " ")

In [None]:
data_.columns

## Yield

In [None]:
data_["yield"].value_counts().tail(25)

In [None]:
data_["servings"].value_counts().head(25)

In [None]:
data_["yield"].value_counts().head(25)

In [None]:
data_["yield"] = data_["yield"].str.replace("- ", " ")
data_["yield"].value_counts().tail(25)

In [None]:
data_["yield"] = data_["yield"].str.strip()
data_["yield"].value_counts().head(25)

In [None]:
data_.columns

In [None]:
data_["yield"].str.contains(pat = "sandwich").value_counts()

Ingredients, Directions & Summary are all text values. Text cleaning and standardising to be applied.

## Lemmatizer Function

In [None]:
# Import WordNetLemmatizer
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
# Import Counter
from collections import Counter

In [None]:
def lemmatizer_func(text, x):
    # input is text and number of desired most common results
    tokens = word_tokenize(text)
    
    # English Stop words
    english_stops = set(stopwords.words('english'))
    
    # Convert the tokens into lowercase: lower_tokens
    lower_tokens = [t.lower() for t in tokens]
    
    # Retain alphabetic words: alpha_only
    alpha_only = [t for t in lower_tokens if t.isalpha()] 
    
    # Remove all stop words: no_stops
    no_stops = [t for t in alpha_only if t not in english_stops]
    
    # Instantiate the WordNetLemmatizer
    wordnet_lemmatizer = WordNetLemmatizer()

    # Lemmatize all tokens into a new list: lemmatized
    lemmatized = [wordnet_lemmatizer.lemmatize(t) for t in no_stops]

    # Create the bag-of-words: bow
    bow = Counter(lemmatized)

    # Print the 10 most common tokens
    return (bow.most_common(x))

## Summary

In [None]:
data_.summary.head()

In [None]:
text_summary = ". ".join(txt for txt in data_.summary.astype(str))
text_summary

In [None]:
top_100_summary = lemmatizer_func(text_summary,100)

In [None]:
stawp_words_summ = [
    "also", "recipe", "make", "like", "way", "hot", "warm", "really", "better",
    "well", "little", "even", "get","used","try","go","using","work","rich"
]

In [None]:
text_summary_up = text_preprocessing_updated(text_summary,stawp_words_summ)

### Top 100 key words used in summaries

In [None]:
top_100_summary = lemmatizer_func(text_summary_up,50)
top_100_summary

data_['summary_prep'] = data_['summary'].apply(lambda x: text_preprocessing(x, 'english'))
data_

## Ingredients

In [None]:
data_.ingredients.head()

In [None]:
replacers = {
    "\u2009": "",
    "[0-9]": " ",
    "  ": " ",
    ",":" ",
    "  ": " ",
    "   ":" ",
    "     ":" ",
    " ; ":"  ",
    "[$&+,:=?@#<>.^*()]": " ",
    "[¼½¾⅐⅑⅒⅓⅔⅕⅖⅗⅘⅙⅚⅛⅜⅝⅞↉]+": " ",
    " ;" : "; ",
}

data_["ingr_clean"] = data_.ingredients.replace(replacers, regex=True).str.strip()

In [None]:
replacer = {
    "\u2009": "",
    "[$&+,:=?@#<>.^*()]": " ",
    "  ": " ",
    " ; ":"; "
}

data_["ingr_results"] = data_.ingredients.replace(replacer, regex=True).str.strip()

In [None]:
data_["ingr_results"].head()

In [None]:
from nltk import pos_tag

In [None]:
data_['POS Tagged Text'] = data_['ingr_clean'].apply(lambda item:item.strip().split()).apply(pos_tag)
data_['Just Nouns Text'] = data_['POS Tagged Text'].apply(lambda item:[w for w,t in item if t=='NN'])
print(data_['Just Nouns Text'])

In [None]:
data_['liststring'] = data_['Just Nouns Text'].apply(lambda x: ', '.join(map(str, x)))
data_['liststring'].head()

In [None]:
stawp_words_ingr = [
    "cups", "cup", "tablespoons", "teaspoon", "tablespoon", "ounce", "chopped",
    "ground", "taste", "white", "fresh", "pound", "sliced", "package",
    "packed", "pack", "minced", "diced", "red", "dried", "green", "large",
    "cut", "shredded", "baking", "juice", "peeled", "extract", "drained",
    "vanilla", "pinch", "optional", "grated", "brown", "finely", "divided",
    "slice", "softened", "dry", "needed", "small", "large", "medium", "big",
    "leaf", "piece", "frozen", "thinly", "melted", "mix", "freshly", "inch",
    "beaten", "can", "boneless", "can", "hot", "allpurpose", "cans", "sliced",
    "rinsed", "heavy", "spray", "seed", "cooking", "cubed", "rinsed", "thawed",
    "italian", "package", "halved", "unsalted", "sweet", "cube", "sweet",
    "breast", "half", "whole", "piece", "chip", "flake", "teaspoon", "powder",
    "ounces", "slice", "bell", "crushed", "piece", "kosher", "bunch",
    "crumbled", "stalk", "paste", "pitted", "teaspoons", "confectioner", "jar",
    "soda", "deveined", "juiced", "cooked", "container", "unsweetened",
    "sweet", "vegetable", "sour", "box", "sauce", "elbow", "butt", "sheet",
    "temperature", "soup", "flake", "soup", "confectioner", "half", "head",
    "condensed", "prepared", "fluid", "uncooked", "light","frying","food","baby"
]

In [None]:
data_['ingr_cleaner'] = data_['liststring'].apply(lambda x : text_preprocessing_updated(x, stawp_words_ingr))
data_['ingr_cleaner']

In [None]:
data_['ingr_cleaner'].head()

In [None]:
text_ingr = "  ".join(txt for txt in data_.ingr_cleaner.astype(str))

### Handling ngrams

In [None]:
from nltk.util import ngrams

n = 2
sentence = text_ingr
unigrams = ngrams(sentence.split(), n)

for item in unigrams:
    print(item)

In [None]:
from nltk import bigrams

ingr_bigr = [x for x in data_['ingr_cleaner'] for x in bigrams(x.split())]
c = Counter(ingr_bigr)

top = c.most_common(100)
print (*top)

In [None]:
top_dict = dict(top)

In [None]:
df_dict = pd.DataFrame(list(top_dict.items()),columns = ['bigram','count']) 
df_dict

In [None]:
ingr_bigr_df = pd.DataFrame.from_records(top, columns=['bigram', 'count'])
ingr_bigr_df

In [None]:
import nltk
from nltk.collocations import *
bigram_assoc_measures = nltk.collocations.BigramAssocMeasures()

text = text_ingr

#1. Split text into words
text = text.split()

#2. Set minimum number of bigrams to extract and 
#of those how many to return
minimum_number_of_bigrams = 2
top_bigrams_to_return = 1

#3. Get bigrams contained in text variable
finder = BigramCollocationFinder.from_words(text)

#4. Filter bigrams to those that appear at least twice
finder.apply_freq_filter(minimum_number_of_bigrams) 

#5. Return one of the top bigrams
finder.nbest(bigram_assoc_measures.pmi, 200)  

In [None]:
from sklearn.feature_extraction.text import CountVectorizer

word_vectorizer = CountVectorizer(ngram_range=(2, 3), analyzer='word')
sparse_matrix = word_vectorizer.fit_transform(data_['ingr_cleaner'])
frequencies = sum(sparse_matrix).toarray()[0]
df_2 = pd.DataFrame(frequencies,
             index=word_vectorizer.get_feature_names(),
             columns=['frequency'])

### Top 50 ngrams (2,3) - Ingredients

In [None]:
ingr_ngram_df = df_2.sort_values(by="frequency",ascending=False).head(50)
ingr_ngram_df.reset_index(inplace=True)
ingr_ngram_df = ingr_ngram_df.rename(columns = {"index":"ingredients"})
ingr_ngram_df.head()

### Top 50 ngrams(3,5) - Recipe names

In [None]:
word_vectorizer = CountVectorizer(ngram_range=(3, 5), analyzer='word')
sparse_matrix = word_vectorizer.fit_transform(data_['name_cl'])
frequencies = sum(sparse_matrix).toarray()[0]
df_3 = pd.DataFrame(frequencies,
             index=word_vectorizer.get_feature_names(),
             columns=['frequency'])

In [None]:
name_ngram_df = df_3.sort_values(by="frequency",ascending=False).head(50)
name_ngram_df.reset_index(inplace=True)
name_ngram_df = name_ngram_df.rename(columns = {"index":"recipe name"})
name_ngram_df.head(20)

### Lemmatize column

In [None]:
import nltk

w_tokenizer = nltk.tokenize.WhitespaceTokenizer()
lemmatizer = nltk.stem.WordNetLemmatizer()

def lemmatize_text(text):
    return [lemmatizer.lemmatize(w) for w in w_tokenizer.tokenize(text)]

data_['text_lemmatized'] = data_['ingr_cleaner'].apply(lemmatize_text)

In [None]:
data_['text_lemmatized'].head()

In [None]:
pd.value_counts(np.hstack(data_['text_lemmatized'])).head(20)

### Top 50 ingredients

In [None]:
top_50_ingred = data_['ingr_cleaner'].str.split(' ').explode('Cast').value_counts().rename_axis('ingredients').reset_index(
        name='counts').head(50)
top_50_ingred

In [None]:
data_['ingr_cleaner'].str.split(' ').explode('Cast').value_counts().rename_axis('ingredients').reset_index(
        name='counts').shape

## Directions

In [None]:
data_.directions

In [None]:
text_dir = "  ".join(txt for txt in data_.directions.astype(str))
text_dir

top_100_dir = lemmatizer_func(text_dir,100)
top_100_dir

## url

In [None]:
data_.url.head(20)

## Rating

In [None]:
data_.info()

Replacing "uncategorized" with "0.0" and converting all values to float.

In [None]:
data_["rating"].value_counts()

In [None]:
data_["rating"].unique()

In [None]:
data_["rating"]

In [None]:
data_["rating"].unique()

In [None]:
data_["rating"] = data_["rating"].astype("float")
data_["rating"]

In [None]:
data_.info()

## Rating count

In [None]:
data_.rating_count.value_counts()

In [None]:
data_.rating_count.unique()

In [None]:
data_["rating_count"] = data_["rating_count"].astype("int")
data_["rating_count"]

In [None]:
data_.info()

## Review count

In [None]:
data_.review_count.value_counts()

In [None]:
data_["review_count"] = data_["review_count"].astype("int")
data_["review_count"]

## Servings

In [None]:
data_["servings"] = data_["servings"].astype("int")
data_["servings"]

In [None]:
data_.info()

In [None]:
data_.category.value_counts()

## Save clean dataset to csv

In [None]:
data_.to_csv(
    r"C:\Users\melen\Documents\DS Bootcamp\data_recipes_clean.csv",
    index=None,
    header=True)

In [None]:
data_ = pd.read_csv(
    r"C:\Users\melen\Documents\DS Bootcamp\data_recipes_clean.csv"
)

# Problem understanding

## What type of data science problem is this?

- What do we need to learn about the data?
- What type of exploratory data analysis do we need to conduct?
- Where is our data coming from?
- What is the current state of our data?
- Is this a supervised or unsupervised learning problem?
- Is this a regression, classification, or clustering problem?
- What biases could our data contain?
- What type of data cleaning do we need to do?
- What type of feature engineering could be useful?
- What algorithms or types of models have been proven to solve similar problems well?
- What evaluation metric are we using for our model?
- What is our training and testing plan?
- How can we tweak the model to make it more accurate, increase the ROC/AUC, decrease log-loss, etc. ?
- Have we optimized the various parameters of the algorithm? Try grid search here.

By conducting a preliminary EDA to each of the labels that are relevant to the project, I learned what information the data offers and how it's structured. The data was in good state and didn't require much cleaning. 

The aim of this project is to create a recommender system for recipes. The data comes from one source (allrecipes.com), therefore it doesn't offer a variety of options and may be biased. 

Some of the labels had some minor data inconsistencies, which I have been able to address. In terms of feature engineering, I will have to encode all categorical values. These represent the biggest part of the data as they include the recipe name, the ingredients, the summary and the instructions. 

The following are the **primary goals** of this project:

1) Create a filter to find recipes by ingredients

2) Produce a model capable of recommending recipes based on other recipes

In [None]:
data_.head()

In [None]:
data_.info()

# Data Visualisation

## Recipe names - wordcloud

In [None]:
#!pip install wordcloud

In [None]:
from wordcloud import WordCloud

In [None]:
data_.name.head()

In [None]:
recipes = data_['name_cl']

In [None]:
word_cloud = WordCloud(background_color='white', width=600,
                       height=400).generate(" ".join(recipes))

plt.imshow(word_cloud, interpolation='bilinear')
plt.axis("off")
plt.show()

In [None]:
from PIL import Image

In [None]:
import requests

In [None]:
book =  np.array(Image.open("book.png"))
plt.imshow(book)

In [None]:
wordcloud = WordCloud(width=3000,
                      height=2000,
                      random_state=1,
                      background_color='white',
                      colormap='Set2',
                      collocations=False,
                      mask=book).generate(" ".join(recipes))

In [None]:
plt.figure(figsize=(25, 50))

word_cloud = WordCloud(background_color="#f9cfd0",
                       colormap='Set2',
                       collocations=False,
                       width=900,
                       height=450,
                       max_font_size=150).generate(" ".join(recipes))

plt.imshow(word_cloud, interpolation='bilinear')
plt.axis("off")
plt.show()

## Categories - bar plot

In [None]:
data_.columns

In [None]:
data_.category.value_counts()

In [None]:
categories = data_.category.value_counts().rename_axis('unique_values').reset_index(name='counts')
categories

In [None]:
categories.plot(kind="bar")
plt.show()

In [None]:
x_axis = categories.counts
y_axis = categories["unique_values"]
y_axis

In [None]:
x_axis

In [None]:
plt.barh(y_axis, x_axis)
plt.title('Categories')
plt.show()

In [None]:
import plotly.graph_objs as go
import plotly.express as px
import matplotlib.pyplot as plt

In [None]:
food_cat = [
    go.Bar(
        y= categories.counts,
        x= categories["unique_values"],
        marker=dict(colorscale='Jet', color="#206638")
    )
]

layout = go.Layout(
    title='Recipe Categories',
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)',
    bargap=0.05)

fig = go.Figure(data=food_cat, layout=layout)
fig.update_layout(title_x=0.5)

In [None]:
categories.counts

### Bar Plt

In [None]:
fig, ax = plt.subplots(figsize=(50, 30))

sns.set_color_codes("muted")

sns.barplot(y=categories.counts, x=categories["unique_values"],

data= categories)
plt.tick_params(axis='both', which='major', labelsize=35)
plt.xticks(rotation=70)
plt.savefig("filename.png", transparent=True)

## Summary 

In [None]:
data_.columns

In [None]:
summary = data_.summary.to_list()
type(summary)

In [None]:
all_words = data_['summary'].str.split(expand=True).unstack().value_counts()

In [None]:
import matplotlib.pyplot as plt 
#!pip install pip
#!pip install matplotlib

In [None]:
data = [go.Bar(
            x = all_words.index.values[2:50],
            y = all_words.values[2:50],
            marker= dict(colorscale='Jet',
                         color = all_words.values[2:100]
                        ))]

layout = go.Layout(
    title='Recipe Summary - Top 50 Words'
)

fig = go.Figure(data=data, layout=layout)

py.iplot(fig, filename='basic-bar')

In [None]:
sorted_rating = data_.sort_values(by=['rating'], ascending=False).head(100)
sorted_rating

In [None]:
text = " ".join(txt for txt in sorted_rating.summary.astype(str))
text

In [None]:
word_cloud = WordCloud(background_color='white', 
                       colormap='Set2',      
                       width=600,
                       height=400).generate(text)

plt.imshow(word_cloud, interpolation='bilinear')
plt.axis("off")
plt.title('Top 100 Recipes - Summary');
plt.show()

## Rating

In [None]:
data_.rating.head()

In [None]:
sorted_rating_count = data_[["rating","rating_count"]].sort_values(by=['rating'], ascending=False)
sorted_rating_count.head()

In [None]:
sorted_rating_count = data_[["rating","rating_count"]].sort_values(by=['rating'], ascending=False).astype("int")
sorted_rating_count.head()

In [None]:
fig = px.pie(sorted_rating_count,
             values="rating_count",
             names="rating")
fig.show()

In [None]:
fig = plt.figure(figsize = (10, 5))
 
# creating the bar plot
plt.bar(sorted_rating_count.rating, sorted_rating_count.rating_count, color ='maroon',
        width = 0.4)

plt.show()

In [None]:
fig = sns.barplot(x="rating", y="rating_count", data= sorted_rating_count, palette='pastel',edgecolor="none",linewidth=0)
plt.xlabel("Rating")
plt.margins(x=0.03)
plt.ylabel("Rating Count")
plt.savefig("rating.png", transparent=True)

In [None]:
# Rating by recipe type? 

## Ingredients

In [None]:
fig, ax = plt.subplots(figsize=(50, 30))

sns.set_color_codes("muted")

splot = sns.barplot(x="ingredients", y="counts", data= top_50_ingred).set(title='50 Most Used Ingredients')
plt.xticks(rotation=70)
plt.tick_params(axis='both', which='major', labelsize=35)
plt.savefig("ingredients.png", transparent=True)

In [None]:
fig, ax = plt.subplots(figsize=(10, 20))

sns.set_color_codes("muted")

sns.barplot(y="ingredients", x="frequency",

data= ingr_ngram_df,

orient = "h").set(title='50 Most Used Ingredients')

## Reviews + Rating

In [None]:
most_popular = data_[["name","review_count","rating"]].sort_values(by=['review_count'], ascending=False)

In [None]:
most_popular.shape

In [None]:
most_popular = most_popular.loc[most_popular['review_count'] > 50]
most_popular = most_popular.loc[most_popular['rating'] > 3.5]

In [None]:
#rating vs. number of ratings

In [None]:
fig = px.scatter(most_popular,
                 y="review_count",
                 x="rating",
                 size="review_count",
                 color="rating",
                 log_x=True,
                 size_max=20,
                 labels={
                     "review_count": "Number of Reviews",
                     "rating": "Rating"
                 })

fig.update_layout({
    "plot_bgcolor": "rgba(0, 0, 0, 0)",
    "paper_bgcolor": "rgba(0, 0, 0, 0)",
})

fig.update_layout(title_text='Review Count vs Rating', title_x=0.5)

fig.show()

#Are the most reviewed recipes the highest rated?

In [None]:
data_.head()

## Filtered Dataframe

In [None]:
data_['ingr_cleaner'].isnull().sum()

In [None]:
filtered_df = data_[data_['ingr_cleaner'].notnull()]

In [None]:
filtered_df.head(1)

In [None]:
filtered_df = filtered_df.reset_index(drop=True)
filtered_df.head(2)

In [None]:
filtered_df.columns

In [None]:
filtered_df = filtered_df.drop(['name_cl',
                               'ingr_clean',
                               'POS Tagged Text',
                               'Just Nouns Text',
                               'liststring',
                               'ingr_cleaner',
                               'text_lemmatized',
                               "ingredients",
                               "summary"],
                               axis=1)

In [None]:
filtered_df.columns

In [None]:
filtered_df_ord = filtered_df[[
    "category", "name", "ingr_results", 'servings', 'yield', 'directions',
    'rating', 'rating_count', 'review_count','url']]
filtered_df_ord.columns = [
    "Category", "Recipe Name", "Ingredients", 'Servings', 'Yield', 'Directions',
    'Rating', 'Rating Count', 'Review Count','URL']
filtered_df_ord

In [None]:
filtered_df_ord[filtered_df_ord['Category'].str.contains("snack")]

In [None]:
filtered_df_ord[filtered_df_ord['Recipe Name'].str.contains("Auber")]

# KNN

## Vectorising Ingredients & Creating a sparse_matrix

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn import metrics

In [None]:
tfidf = TfidfVectorizer()

sparse_matrix = tfidf.fit_transform(data_["ingr_cleaner"].values.astype('U'))

sparse_matrix

In [None]:
sparse_matrix.todense()[100]

In [None]:
recipe_to_assess = sparse_matrix[666]
recipe_to_assess

### DF from sparse_matrix

In [None]:
sparse_matrix

In [None]:
doc_term_matrix = sparse_matrix.todense()
df_ = pd.DataFrame(doc_term_matrix, columns=tfidf.get_feature_names())
df_

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
similarity = cosine_similarity(df_.T, df_.T)

### Compute Cosine Similarity

In [None]:
sim_df = pd.DataFrame(similarity, columns = tfidf.get_feature_names(), 
                      index = tfidf.get_feature_names()) 
sim_df

In [None]:
sim_df.loc['zucchini',:].reset_index().sort_values(by="zucchini",ascending=False).head(5)

In [None]:
tomato_df = sim_df.loc['tomato',:].reset_index()
tomato_df.sort_values(by= tomato_df.columns[1], ascending= False).head(5)

## KNN Model #0

In [None]:
from sklearn.neighbors import NearestNeighbors

kNN = NearestNeighbors(n_neighbors=6, metric='cosine')
kNN
kNN.fit(sparse_matrix)

In [None]:
neighbour = kNN.kneighbors(recipe_to_assess,
                           return_distance=False)

In [None]:
neighbour

In [None]:
data_.loc[666,'name']

In [None]:
data_.loc[5112, "ingredients"]

In [None]:
data_.loc[neighbour[0], "ingredients"]

In [None]:
data_.loc[neighbour[0],'name']

### First recommendation!

In [None]:
print("If you like {}, you should try:\n\n{}".format(
    (data_.loc[666, 'name']),
    (data_.loc[neighbour[0], 'name'])))

## KNN Model #1

In [None]:
kNN = NearestNeighbors(n_neighbors=4, metric='cosine')
kNN
kNN.fit(sparse_matrix)

In [None]:
recipe_to_assess = sparse_matrix[11]

In [None]:
data_.loc[11, 'name']

In [None]:
neighbour = kNN.kneighbors(recipe_to_assess,
                           return_distance=False)

In [None]:
neighbour

In [None]:
filtered_df_ord.loc[5958,'Recipe Name']

In [None]:
filtered_df_ord.loc[5958, "Ingredients"]

In [None]:
filtered_df_ord.loc[neighbour[0], "Ingredients"]

In [None]:
filtered_df_ord.loc[neighbour[0],'Recipe Name']

In [None]:
def print_recipe_recom(x):
    recipe_to_assess = sparse_matrix[x]
    neighbour = kNN.kneighbors(recipe_to_assess, return_distance=False)
    print("If you like {}, you should try:\n\n{}".format(
        (data_.loc[x, 'name']), (data_.loc[neighbour[0][1:], 'name'])))

In [None]:
print_recipe_recom(22)

In [None]:
data_.loc[neighbour[0][1:],'name']

In [None]:
pd.set_option('display.max_colwidth', None)

In [None]:
data = [(data_.loc[2, 'name']),(data_.loc[2, 'category']),(data_.loc[2, 'ingr_results']),
        (data_.loc[2, 'directions']), (data_.loc[2, 'servings']),
        (data_.loc[2, 'yield']), (data_.loc[2, 'rating']), (data_.loc[2, 'rating_count'])]
headers = ["Recipe", "Category","Ingredients", "Directions", "Servings", "Yield","Rating","Rating Count"]
pd.DataFrame(data, headers)

In [None]:
data_n = [(data_.loc[neighbour[0][1:], 'name']),
          (data_.loc[neighbour[0][1:], 'category']),
          (data_.loc[neighbour[0][1:], 'servings']),
          (data_.loc[neighbour[0][1:], 'yield']),
          (data_.loc[neighbour[0][1:], 'rating']),
          (data_.loc[neighbour[0][1:], 'rating_count']),
          (data_.loc[neighbour[0][1:], 'ingr_results']),
          (data_.loc[neighbour[0][1:], 'directions'])]
headers = [
    "Recipe", "Category", "Servings", "Yield",
    "Rating", "Rating Count", "Ingredients", "Directions"
]
pd.DataFrame(data_n, headers)

## Function to find recommendations by recipe ID

In [None]:
def print_recipe_recom_format(x):
    #this function takes the recipe index number as input and returns 5 suggestions
    recipe_to_assess = sparse_matrix[x]
    neighbour = kNN.kneighbors(recipe_to_assess, return_distance=False)
    data_n = [(data_.loc[neighbour[0][1:], 'name']),
              (data_.loc[neighbour[0][1:], 'category']),
              (data_.loc[neighbour[0][1:], 'servings']),
              (data_.loc[neighbour[0][1:], 'yield']),
              (data_.loc[neighbour[0][1:], 'rating']),
              (data_.loc[neighbour[0][1:], 'rating_count']),
              (data_.loc[neighbour[0][1:], 'ingr_results']),
              (data_.loc[neighbour[0][1:], 'directions'])]
    headers = [
        "Recipe", "Category", "Servings", "Yield", "Rating", "Rating Count",
        "Ingredients", "Directions"
    ]
    return pd.DataFrame(data_n, headers)

In [None]:
print_recipe_recom_format(88)

In [None]:
def print_recipe_recom_format_(x):
    #this function takes the recipe index number as input and returns 5 suggestions
    recipe_to_assess = sparse_matrix[x]
    neighbour = kNN.kneighbors(recipe_to_assess, return_distance=False)
    data_n = [(data_.loc[neighbour[0][1:], 'name']),
              (data_.loc[neighbour[0][1:], 'category']),
              (data_.loc[neighbour[0][1:], 'servings']),
              (data_.loc[neighbour[0][1:], 'yield']),
              (data_.loc[neighbour[0][1:], 'rating']),
              (data_.loc[neighbour[0][1:], 'rating_count']),
              (data_.loc[neighbour[0][1:], 'ingr_results']),
              (data_.loc[neighbour[0][1:], 'directions']),
              (data_.loc[neighbour[0][1:], 'url'])]
    headers = [
        "Recipe", "Category", "Servings", "Yield", "Rating", "Rating Count",
        "Ingredients", "Directions", "Url"]
    print("If you like {}, you should try:\n\n".format((data_.loc[x, 'name'])))
    return pd.DataFrame(data_n, headers)

In [None]:
print_recipe_recom_format_(199)

In [None]:
def print_recipe_recom_n_and_index(y,x):
    #this function takes as input the number of recipes to take in consideration(1 x recipe + neighbours) ad recipe index
    from sklearn.neighbors import NearestNeighbors

    kNN = NearestNeighbors(n_neighbors=y, metric='cosine')
    kNN
    kNN.fit(sparse_matrix)
    recipe_to_assess = sparse_matrix[x]
    neighbour = kNN.kneighbors(recipe_to_assess, return_distance=False)
    data_n = [(data_.loc[neighbour[0][1:], 'name']),
              (data_.loc[neighbour[0][1:], 'category']),
              (data_.loc[neighbour[0][1:], 'servings']),
              (data_.loc[neighbour[0][1:], 'yield']),
              (data_.loc[neighbour[0][1:], 'rating']),
              (data_.loc[neighbour[0][1:], 'rating_count']),
              (data_.loc[neighbour[0][1:], 'ingr_results']),
              (data_.loc[neighbour[0][1:], 'directions']),
              (data_.loc[neighbour[0][1:], 'url'])]
    headers = [
        "Recipe", "Category", "Servings", "Yield", "Rating", "Rating Count",
        "Ingredients", "Directions", "Url"]
    print("If you like {}, you should try:\n\n".format((data_.loc[x, 'name'])))
    return pd.DataFrame(data_n, headers)

In [None]:
print_recipe_recom_n_and_index(5,55)

In [None]:
def print_recipe_recom_n_and_index_up(y, x):
    #this function takes as input the number of recipes to take in consideration(1 x recipe + neighbours) ad recipe index
    
    from sklearn.feature_extraction.text import TfidfVectorizer
    from sklearn.neighbors import NearestNeighbors

    tfidf = TfidfVectorizer()

    sparse_matrix = tfidf.fit_transform(
        data_["ingr_cleaner"].values.astype('U'))

    sparse_matrix

    kNN = NearestNeighbors(n_neighbors=y, metric='cosine')
    kNN
    kNN.fit(sparse_matrix)
    recipe_to_assess = sparse_matrix[x]
    neighbour = kNN.kneighbors(recipe_to_assess, return_distance=False)
    data_n = [(data_.loc[neighbour[0][1:], 'name']),
              (data_.loc[neighbour[0][1:], 'category']),
              (data_.loc[neighbour[0][1:], 'servings']),
              (data_.loc[neighbour[0][1:], 'yield']),
              (data_.loc[neighbour[0][1:], 'rating']),
              (data_.loc[neighbour[0][1:], 'rating_count']),
              (data_.loc[neighbour[0][1:], 'ingr_results']),
              (data_.loc[neighbour[0][1:], 'directions']),
              (data_.loc[neighbour[0][1:], 'url'])]
    headers = [
        "Recipe", "Category", "Servings", "Yield", "Rating", "Rating Count",
        "Ingredients", "Directions", "Url"
    ]
    print("If you like {}, you should try:\n\n".format((data_.loc[x, 'name'])))
    return pd.DataFrame(data_n, headers)

In [None]:
print_recipe_recom_n_and_index_up(5,55)

### Function to find similar recipes - user input

In [None]:
def recipe_reco_input():
        
    from sklearn.feature_extraction.text import TfidfVectorizer
    from sklearn.neighbors import NearestNeighbors
    
    input_ingr = input("Which recipe or ingredient are you looking for? ")
    strings = input_ingr.split()
    mask = data_[data_['name'].str.contains('|'.join(strings), case=False, na=False)]
    mask_index = mask.index[0]
    
    tfidf = TfidfVectorizer()

    sparse_matrix = tfidf.fit_transform(
        data_["ingr_cleaner"].values.astype('U'))

    kNN = NearestNeighbors(n_neighbors=4, metric='cosine')
    kNN
    kNN.fit(sparse_matrix)
    recipe_to_assess = sparse_matrix[mask_index]
    neighbour = kNN.kneighbors(recipe_to_assess, return_distance=False)
    data_n = [(data_.loc[neighbour[0][1:], 'name']),
              (data_.loc[neighbour[0][1:], 'category']),
              (data_.loc[neighbour[0][1:], 'servings']),
              (data_.loc[neighbour[0][1:], 'yield']),
              (data_.loc[neighbour[0][1:], 'rating']),
              (data_.loc[neighbour[0][1:], 'rating_count']),
              (data_.loc[neighbour[0][1:], 'ingr_results']),
              (data_.loc[neighbour[0][1:], 'directions']),
              (data_.loc[neighbour[0][1:], 'url'])]
    headers = [
        "Recipe", "Category", "Servings", "Yield", "Rating", "Rating Count",
        "Ingredients", "Directions", "Url"
    ]
    print("If you like {}, you should try:\n\n".format(input_ingr))
    return pd.DataFrame(data_n, headers)

In [None]:
recipe_reco_input()

### def recipe_ingredient_recom_input

In [None]:
def recipe_ingredient_recom_input():

    from sklearn.feature_extraction.text import TfidfVectorizer
    from sklearn.neighbors import NearestNeighbors

    input_ingr = input("Which recipe or ingredient are you looking for? ")
    strings = input_ingr.split()
    mask = data_[data_['name'].str.
                 contains('|'.join(strings), case=False, na=False)
                 | data_['ingr_cleaner'].str.
                 contains('|'.join(strings), case=False, na=False)]
    mask_index = mask.sample().index[0]

    tfidf = TfidfVectorizer()

    sparse_matrix = tfidf.fit_transform(
        data_["ingr_cleaner"].values.astype('U'))

    kNN = NearestNeighbors(n_neighbors=4, metric='cosine')
    kNN
    kNN.fit(sparse_matrix)
    recipe_to_assess = sparse_matrix[mask_index]
    neighbour = kNN.kneighbors(recipe_to_assess, return_distance=False)
    data_n = [(data_.loc[neighbour[0][1:], 'name']),
              (data_.loc[neighbour[0][1:], 'category']),
              (data_.loc[neighbour[0][1:], 'servings']),
              (data_.loc[neighbour[0][1:], 'yield']),
              (data_.loc[neighbour[0][1:], 'rating']),
              (data_.loc[neighbour[0][1:], 'rating_count']),
              (data_.loc[neighbour[0][1:], 'ingr_results']),
              (data_.loc[neighbour[0][1:], 'directions']),
              (data_.loc[neighbour[0][1:], 'url'])]
    headers = [
        "Recipe", "Category", "Servings", "Yield", "Rating", "Rating Count",
        "Ingredients", "Directions", "Url"
    ]
    print("\nIf you like {}, you should try:\n\n".format(input_ingr))
    return pd.DataFrame(data_n, headers)

In [None]:
recipe_ingredient_recom_input()

In [None]:
import dataframe_image as dfi
data_dfdata_df = recipe_ingredient_recom_input()

dfi.export(data_dfdata_df, "table.png")

In [None]:
recipeing_reco_input()

## Search by ingredients

### Find any recipe containing any of these ingredients

In [None]:
pattern = 'butter |eggs | milk | lemon'
mask = data_["ingr_results"].str.contains(pattern, case=False, na=False)
data_[mask].sample(3)

### Find any recipe containing all of these ingredients

In [None]:
pattern = 'butter.*eggs.*milk.*lemon'
mask = data_["ingr_results"].str.contains(pattern, case=False, na=False)
data_[mask].sample(3)

In [None]:
#input pattern
#find pattern in data_["ingr_results"]
#return index of 1 sample
#vectorise ingredients
#return neighbours
#get results

In [None]:
ing1 = "butter"
ing2 = "eggs"
ing3 = "milk"
mask = data_[(data_['ingr_results'].str.contains(ing1, case=False, na=False)) &
             (data_['ingr_results'].str.contains(ing2, case=False, na=False))&
             (data_['ingr_results'].str.contains(ing3, case=False, na=False))]

In [None]:
sparse_matrix = tfidf.fit_transform(mask.ingr_cleaner.values)
sparse_matrix

In [None]:
mask.index

In [None]:
for x in mask.index:
    print(x)

In [None]:
first_index = mask.index[0]

In [None]:
df9 = mask.filter(items = [first_index], axis=0)
df9

In [None]:
def recipe_recommendation_sys(ing1,ing2,ing3):
    from sklearn.feature_extraction.text import TfidfVectorizer
    from sklearn.neighbors import NearestNeighbors
    ing1_ = ing1
    ing2_ = ing2
    ing3_ = ing3
    mask = data_[(data_['ingr_cleaner'].str.contains(ing1_, case=False, na=False)) &
             (data_['ingr_cleaner'].str.contains(ing2_, case=False, na=False))&
             (data_['ingr_cleaner'].str.contains(ing3_, case=False, na=False))]
    
    first_index = mask.index[0]
    
    mask_df = mask.filter(items = [first_index], axis=0)
    
    tfidf = TfidfVectorizer()

    sparse_matrix = tfidf.fit_transform(data_["ingr_cleaner"].values.astype('U'))

    kNN = NearestNeighbors(n_neighbors=4, metric='cosine')
    kNN
    kNN.fit(sparse_matrix)
    ingredients_to_assess = sparse_matrix[first_index]
    neighbour = kNN.kneighbors(ingredients_to_assess, return_distance=False)
    data_n = [(data_.loc[neighbour[0][1:], 'name']),
              (data_.loc[neighbour[0][1:], 'category']),
              (data_.loc[neighbour[0][1:], 'servings']),
              (data_.loc[neighbour[0][1:], 'yield']),
              (data_.loc[neighbour[0][1:], 'rating']),
              (data_.loc[neighbour[0][1:], 'rating_count']),
              (data_.loc[neighbour[0][1:], 'ingr_results']),
              (data_.loc[neighbour[0][1:], 'directions']),
              (data_.loc[neighbour[0][1:], 'url'])]
    headers = [
        "Recipe", "Category", "Servings", "Yield", "Rating", "Rating Count",
        "Ingredients", "Directions", "Url"]
    print("If you like {}, {} and {}, you should try:\n\n".format(ing1_,ing2_,ing3_))
    return pd.DataFrame(data_n, headers)

In [None]:
recipe_recommendation_sys("egg","lemon","tomato")

In [None]:
ing1_ = "lemon"
ing2_ = "tomato"
ing3_ = "milk"
mask = data_[
    (data_['ingr_cleaner'].str.contains(ing1_, case=False, na=False))
    & (data_['ingr_cleaner'].str.contains(ing2_, case=False, na=False)) &
    (data_['ingr_cleaner'].str.contains(ing3_, case=False, na=False))]
mask

In [None]:
first_index = mask.index[0]

mask_df = mask.filter(items=[first_index], axis=0)
mask_df

In [None]:
tfidf = TfidfVectorizer()

sparse_matrix = tfidf.fit_transform(data_["ingr_cleaner"].values.astype('U'))
sparse_matrix

In [None]:
kNN = NearestNeighbors(n_neighbors=4, metric='cosine')

kNN.fit(sparse_matrix)
ingredients_to_assess = sparse_matrix[first_index]

In [None]:
neighbour = kNN.kneighbors(ingredients_to_assess, return_distance=False)

In [None]:
def print_recipe_recom_n_and_index_up(y,x):
    #this function takes as input the number of recipes to take in consideration(1 x recipe + neighbours) ad recipe index
    from sklearn.feature_extraction.text import TfidfVectorizer
    from sklearn.neighbors import NearestNeighbors

    tfidf = TfidfVectorizer()

    sparse_matrix = tfidf.fit_transform(data_["ingr_cleaner"].values.astype('U'))

    sparse_matrix
    

    kNN = NearestNeighbors(n_neighbors=y, metric='cosine')
    kNN
    kNN.fit(sparse_matrix)
    recipe_to_assess = sparse_matrix[x]
    neighbour = kNN.kneighbors(recipe_to_assess, return_distance=False)
    data_n = [(data_.loc[neighbour[0][1:], 'name']),
              (data_.loc[neighbour[0][1:], 'category']),
              (data_.loc[neighbour[0][1:], 'servings']),
              (data_.loc[neighbour[0][1:], 'yield']),
              (data_.loc[neighbour[0][1:], 'rating']),
              (data_.loc[neighbour[0][1:], 'rating_count']),
              (data_.loc[neighbour[0][1:], 'ingr_results']),
              (data_.loc[neighbour[0][1:], 'directions']),
              (data_.loc[neighbour[0][1:], 'url'])]
    headers = [
        "Recipe", "Category", "Servings", "Yield", "Rating", "Rating Count",
        "Ingredients", "Directions", "Url"]
    print("If you like {}, you should try:\n\n".format((data_.loc[x, 'name'])))
    return pd.DataFrame(data_n, headers)

In [None]:
print_recipe_recom_n_and_index_up(2,55)

In [None]:
def recipe_recommendation_sys2(ing1,ing2,ing3):
    from sklearn.feature_extraction.text import TfidfVectorizer
    from sklearn.neighbors import NearestNeighbors
    ing1_ = ing1
    ing2_ = ing2
    ing3_ = ing3
    mask = data_[(data_['ingr_cleaner'].str.contains(ing1_, case=False, na=False)) &
             (data_['ingr_cleaner'].str.contains(ing2_, case=False, na=False))&
             (data_['ingr_cleaner'].str.contains(ing3_, case=False, na=False))]
    
    random_index = mask.index.sample(n=1)
    
    mask_df = mask.filter(items = [random_index], axis=0)
    
    tfidf = TfidfVectorizer()

    sparse_matrix = tfidf.fit_transform(mask["ingr_cleaner"].values.astype('U'))

    kNN = NearestNeighbors(n_neighbors=4, metric='cosine')
    kNN.fit(sparse_matrix)
    ingredients_to_assess = sparse_matrix[random_index]
    neighbour = kNN.kneighbors(ingredients_to_assess, return_distance=False)
    data_n = [(data_.loc[neighbour[0][1:], 'name']),
              (data_.loc[neighbour[0][1:], 'category']),
              (data_.loc[neighbour[0][1:], 'servings']),
              (data_.loc[neighbour[0][1:], 'yield']),
              (data_.loc[neighbour[0][1:], 'rating']),
              (data_.loc[neighbour[0][1:], 'rating_count']),
              (data_.loc[neighbour[0][1:], 'ingr_results']),
              (data_.loc[neighbour[0][1:], 'directions']),
              (data_.loc[neighbour[0][1:], 'url'])]
    headers = [
        "Recipe", "Category", "Servings", "Yield", "Rating", "Rating Count",
        "Ingredients", "Directions", "Url"]
    print("If you like {}, {} and {}, you should try:\n\n".format(ing1_,ing2_,ing3_))
    return pd.DataFrame(data_n, headers)

In [None]:
recipe_recommendation_sys("egg","lemon","pomegranate")

### Recipe filter function

In [None]:
def recipe_filter(ing1,ing2,ing3):
    ing1_ = ing1
    ing2_ = ing2
    ing3_ = ing3
    mask = data_[(data_['ingr_cleaner'].str.contains(ing1_, case=False, na=False)) &
             (data_['ingr_cleaner'].str.contains(ing2_, case=False, na=False))&
             (data_['ingr_cleaner'].str.contains(ing3_, case=False, na=False))]
   
    mask_df = mask.sample(n=3)
    
    data_n = [(mask_df.name),
              (mask_df.category),
              (mask_df.servings),
              (mask_df["yield"]),
              (mask_df.rating),
              (mask_df.rating_count),
              (mask_df.ingr_results),
              (mask_df.directions),
              (mask_df.url)]
    headers = [
        "Recipe", "Category", "Servings", "Yield", "Rating", "Rating Count",
        "Ingredients", "Directions", "Url"]
    print("If you like {}, {} and {}, you should try:\n\n".format(ing1_,ing2_,ing3_))
    return pd.DataFrame(data_n, headers)

In [None]:
recipe_filter("potato","tomato","carrot")

### def recipe_filter_sys

In [None]:
def recipe_filter_sys():

    input_ingr = input("What are your ingredients? ")

    mask = data_.loc[sum([data_.ingr_cleaner.str.contains(input_ingr) for word in input_ingr]) > 0]

    mask_df = mask[:4]

    data_n = [(mask_df.name), (mask_df.category), (mask_df.servings),
              (mask_df["yield"]), (mask_df.rating), (mask_df.rating_count),
              (mask_df.ingr_results.str.wrap(100)), (mask_df.directions.str.wrap(100)), (mask_df.url)]
    headers = [
        "Recipe", "Category", "Servings", "Yield", "Rating", "Rating Count",
        "Ingredients", "Directions", "Url"
    ]
    print("\nIf you like {}, you should try:\n\n".format(input_ingr))
    return pd.DataFrame(data_n, headers)

In [None]:
recipe_filter_sys()

In [None]:
import dataframe_image as dfi
data_dfda_ta_df = recipe_filter_sys()

dfi.export(data_dfda_ta_df, "tablef.png")

# Clustering with KMeans

In [None]:
# Import the necessary modules
from sklearn.cluster import KMeans
from sklearn.feature_extraction.text import TfidfVectorizer


# Extract the ingredients column from the dataframe
ingredients = data_["ingr_cleaner"]

# Convert the ingredients column to a matrix of numerical values
tfidf = TfidfVectorizer()

sparse_matrix = tfidf.fit_transform(ingredients.values.astype('U'))

# Create a KMeans clustering model with 5 clusters
kmeans = KMeans(n_clusters=5)

# Fit the model to the data
kmeans.fit(sparse_matrix)

# Predict the cluster labels for each recipe
cluster_labels = kmeans.predict(sparse_matrix)

# Add the cluster labels as a new column in the dataframe
data_['cluster'] = cluster_labels

# Group the dataframe by the cluster column
clusters = data_.groupby('cluster').head(3)

In [None]:
fig, ax = plt.subplots(figsize=(10, 15))

sns.set_color_codes("muted")

sns.barplot(x=data_.cluster, y=data_.category,

data= data_).set(title='Recipe Categories by cluster')

In [None]:
plt.bar(data_.cluster, data_.category)

# App

In [None]:
import pickle
import requests
import json

In [None]:
pickle.dump(kNN, open('model.pkl','wb'))


In [None]:
from flask import Flask,render_template,url_for,request
import pickle
import joblib

app = Flask(__name__)

@app.route('/')
def home():
    return render_template('home.html')

@app.route('/predict',methods=['POST'])
def predict():
    import pandas as pd 
    data_ = pd.read_csv(r"C:\Users\melen\Documents\DS Bootcamp\data_recipes_clean.csv")
    input_ingr = input("Which recipe or ingredient are you looking for? ")
    strings = input_ingr.split()
    mask = data_[data_['name'].str.contains('|'.join(strings),
                                            case=False,
                                            na=False)]
    mask_index = mask.index[0]

    tfidf = TfidfVectorizer()

    sparse_matrix = tfidf.fit_transform(
        data_["ingr_cleaner"].values.astype('U'))

    kNN = NearestNeighbors(n_neighbors=4, metric='cosine')
    kNN.fit(sparse_matrix)
    recipe_to_assess = sparse_matrix[mask_index]
    neighbour = kNN.kneighbors(recipe_to_assess, return_distance=False)
    data_n = [(data_.loc[neighbour[0][1:], 'name']),
              (data_.loc[neighbour[0][1:], 'category']),
              (data_.loc[neighbour[0][1:], 'servings']),
              (data_.loc[neighbour[0][1:], 'yield']),
              (data_.loc[neighbour[0][1:], 'rating']),
              (data_.loc[neighbour[0][1:], 'rating_count']),
              (data_.loc[neighbour[0][1:], 'ingr_results']),
              (data_.loc[neighbour[0][1:], 'directions']),
              (data_.loc[neighbour[0][1:], 'url'])]
    headers = [
        "Recipe", "Category", "Servings", "Yield", "Rating", "Rating Count",
        "Ingredients", "Directions", "Url"
    ]

    from sklearn.feature_extraction.text import TfidfVectorizer
    from sklearn.neighbors import NearestNeighbors

    joblib.dump(kNN, 'model.pkl')
    ingr_rec = open('model.pkl', 'rb')
    kNN = joblib.load(ingr_rec)
    

    if request.method == 'POST':
        message = request.form['message']
        data = [message]
        vect = cv.transform(data).toarray()
        my_prediction = kNN.predict(vect)
    return render_template('result.html', prediction = my_prediction)

if __name__ == '__main__':
    app.run(debug=True)

In [None]:
from flask import Flask, render_template, url_for, request
import pandas as pd
import pickle
import joblib

app = Flask(__name__)


@app.route('/')
def home():
    return render_template('home.html')


@app.route('/predict', methods=['POST'])
def predict():
    input_ingr = input("What are your ingredients? ")

    mask = data_.loc[sum([data_.ingr_cleaner.str.contains(input_ingr) for word in input_ingr]) > 0]

    mask_df = mask[:3]

    data_n = [(mask_df.name), (mask_df.category), (mask_df.servings),
              (mask_df["yield"]), (mask_df.rating), (mask_df.rating_count),
              (mask_df.ingr_results), (mask_df.directions), (mask_df.url)]
    headers = [
        "Recipe", "Category", "Servings", "Yield", "Rating", "Rating Count",
        "Ingredients", "Directions", "Url"
    ]
    print("\nIf you like {}, you should try:\n\n".format(input_ingr))
    return pd.DataFrame(data_n, headers)

    if request.method == 'POST':
        message = request.form['message']
        data = [message]
        vect = cv.transform(data).toarray()
        my_prediction = clf.predict(vect)
    return render_template('result.html', prediction=my_prediction)


if __name__ == '__main__':
    app.run(debug=True)

In [None]:
pd.set_option('display.max_colwidth', 0)

In [None]:
import dataframe_image as dfi
data_dfda_ta_df = recipe_filter_sys()

dfi.export(data_dfda_ta_df, "tofu_rice.png")

In [None]:
recipe_filter_sys()

In [None]:
recipe_ingredient_recom_input()

In [None]:
recipe_ingredient_recom_input()