# Culinary Canvas

In [85]:
import pandas as pd
import string
from functools import reduce
from operator import add
import ast
import re
import spacy


## Data Pre-processing

In [86]:
df=pd.read_csv('/kaggle/input/food-ingredients-and-recipe-dataset-with-images/Food Ingredients and Recipe Dataset with Image Name Mapping.csv',index_col=0)

In [87]:
print("DataFrame Info:")
print(df.info())

print("\nMissing Values:")
print(df.isnull().sum())


DataFrame Info:
<class 'pandas.core.frame.DataFrame'>
Index: 13501 entries, 0 to 13500
Data columns (total 5 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Title                13496 non-null  object
 1   Ingredients          13501 non-null  object
 2   Instructions         13493 non-null  object
 3   Image_Name           13501 non-null  object
 4   Cleaned_Ingredients  13501 non-null  object
dtypes: object(5)
memory usage: 632.9+ KB
None

Missing Values:
Title                  5
Ingredients            0
Instructions           8
Image_Name             0
Cleaned_Ingredients    0
dtype: int64


Dropping the null and missing values

In [90]:
df.dropna(inplace=True)

# Confirm that missing values have been dropped
print("DataFrame Info after dropping missing values:")
print(df.info())

DataFrame Info after dropping missing values:
<class 'pandas.core.frame.DataFrame'>
Index: 13493 entries, 0 to 13500
Data columns (total 5 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Title                13493 non-null  object
 1   Ingredients          13493 non-null  object
 2   Instructions         13493 non-null  object
 3   Image_Name           13493 non-null  object
 4   Cleaned_Ingredients  13493 non-null  object
dtypes: object(5)
memory usage: 632.5+ KB
None


In [91]:
df.shape

(13493, 5)

Identifies rows in the DataFrame where 'Cleaned_Ingredients', 'Title', or 'Instructions' contain only punctuation or numbers.
Counts the number of rows to be dropped in each category.
Generates a unique set of indices corresponding to these rows.
Drops the identified rows from the DataFrame.
Resets the index of the updated DataFrame.

In [92]:
nc_ingred_index = [index for i, index in zip(df['Cleaned_Ingredients'], df.index) if all(j.isdigit() or j in string.punctuation for j in i)]
nc_title_index = [index for i, index in zip(df['Title'], df.index) if all(j.isdigit() or j in string.punctuation for j in i)]
nc_instr_index = [index for i, index in zip(df['Instructions'], df.index) if all(j.isdigit() or j in string.punctuation for j in i)]

# Checking number of rows in each category that are only punc/nums
index_list = [nc_ingred_index, nc_title_index, nc_instr_index]
num_to_drop = [len(x) for x in index_list]

# Generating unique indices for index_list and dropping from DataFrame
inds_to_drop = set(reduce(add, index_list))
print(len(inds_to_drop))

# Dropping rows with indices from inds_to_drop and resetting index
new_df = df.drop(index=inds_to_drop).reset_index(drop=True)
print(new_df.shape)

6
(13487, 5)


Removing all recipes having length < 20

In [93]:
# Removing recipes which have too little instructions
empty_instr_ind = [index for i, index in zip(new_df['Cleaned_Ingredients'], new_df.index) if len(i) < 20]
new_df = new_df.drop(index = empty_instr_ind).reset_index(drop=True)

print(new_df.shape)
new_df.isna().sum()

(13480, 5)


Title                  0
Ingredients            0
Instructions           0
Image_Name             0
Cleaned_Ingredients    0
dtype: int64

## Tokenizing

#### Ensuring Cleaned_Ingredients is a list and Title is typecasted to string

In [94]:
new_df['Cleaned_Ingredients'] = new_df['Cleaned_Ingredients'].apply(ast.literal_eval)
new_df['Title']=new_df['Title'].astype(str)

In [95]:
new_df['Title'].head(5)

0    Miso-Butter Roast Chicken With Acorn Squash Pa...
1                      Crispy Salt and Pepper Potatoes
2                          Thanksgiving Mac and Cheese
3                   Italian Sausage and Bread Stuffing
4                                         Newton's Law
Name: Title, dtype: object

In [96]:
new_df.loc[0,'Cleaned_Ingredients']

['1 (3½–4-lb.) whole chicken',
 '2¾ tsp. kosher salt, divided, plus more',
 '2 small acorn squash (about 3 lb. total)',
 '2 Tbsp. finely chopped sage',
 '1 Tbsp. finely chopped rosemary',
 '6 Tbsp. unsalted butter, melted, plus 3 Tbsp. room temperature',
 '¼ tsp. ground allspice',
 'Pinch of crushed red pepper flakes',
 'Freshly ground black pepper',
 '⅓ loaf good-quality sturdy white bread, torn into 1" pieces (about 2½ cups)',
 '2 medium apples (such as Gala or Pink Lady; about 14 oz. total), cored, cut into 1" pieces',
 '2 Tbsp. extra-virgin olive oil',
 '½ small red onion, thinly sliced',
 '3 Tbsp. apple cider vinegar',
 '1 Tbsp. white miso',
 '¼ cup all-purpose flour',
 '2 Tbsp. unsalted butter, room temperature',
 '¼ cup dry white wine',
 '2 cups unsalted chicken broth',
 '2 tsp. white miso',
 'Kosher salt',
 'freshly ground pepper']

#### Joining the string separated by ; instead of , to handle csv file in further steps

In [97]:
new_df['Ingredients_Text'] = ['; '.join(ingredients) for ingredients in new_df['Cleaned_Ingredients']]
new_df['Ingredients_Text'].head()


0    1 (3½–4-lb.) whole chicken; 2¾ tsp. kosher sal...
1    2 large egg whites; 1 pound new potatoes (abou...
2    1 cup evaporated milk; 1 cup whole milk; 1 tsp...
3    1 (¾- to 1-pound) round Italian loaf, cut into...
4    1 teaspoon dark brown sugar; 1 teaspoon hot wa...
Name: Ingredients_Text, dtype: object

In [98]:
new_df['Ingredients_Count'] = [len(ingredients) for ingredients in new_df['Cleaned_Ingredients']]

#### Storing the dataframe to the csv file

In [99]:
new_df.head()
new_df.to_csv('Processed_Recipees.csv')

#### Merging the tree coumns into one and storing the text into All_text.csv

In [100]:
all_text = new_df['Title'] + ' ' + new_df['Ingredients_Text'] + ' ' + new_df['Instructions']
all_text[0]

'Miso-Butter Roast Chicken With Acorn Squash Panzanella 1 (3½–4-lb.) whole chicken; 2¾ tsp. kosher salt, divided, plus more; 2 small acorn squash (about 3 lb. total); 2 Tbsp. finely chopped sage; 1 Tbsp. finely chopped rosemary; 6 Tbsp. unsalted butter, melted, plus 3 Tbsp. room temperature; ¼ tsp. ground allspice; Pinch of crushed red pepper flakes; Freshly ground black pepper; ⅓ loaf good-quality sturdy white bread, torn into 1" pieces (about 2½ cups); 2 medium apples (such as Gala or Pink Lady; about 14 oz. total), cored, cut into 1" pieces; 2 Tbsp. extra-virgin olive oil; ½ small red onion, thinly sliced; 3 Tbsp. apple cider vinegar; 1 Tbsp. white miso; ¼ cup all-purpose flour; 2 Tbsp. unsalted butter, room temperature; ¼ cup dry white wine; 2 cups unsalted chicken broth; 2 tsp. white miso; Kosher salt; freshly ground pepper Pat chicken dry with paper towels, season all over with 2 tsp. salt, and tie legs together with kitchen twine. Let sit at room temperature 1 hour.\nMeanwhile, 

In [101]:
all_text.to_csv("All_text.csv")

#### Cleaning the text such as removing punctuations, empty spaces, new lines and generating cleaned_text.csv

In [102]:
def clean_text(documents):
    cleaned_text = []
    for doc in documents:
        doc = doc.translate(str.maketrans('', '', string.punctuation)) # Remove Punctuation
        doc = re.sub(r'\d+', '', doc) # Remove Digits
        doc = doc.replace('\n',' ') # Remove New Lines
        doc = doc.strip() # Remove Leading White Space
        doc = re.sub(' +', ' ', doc) # Remove multiple white spaces
        cleaned_text.append(doc)
    return cleaned_text

# Cleaning Text
cleaned_text = clean_text(all_text)
pd.Series(cleaned_text).to_csv('Cleaned_Text.csv')

In [103]:
all_text[2]

'Thanksgiving Mac and Cheese 1 cup evaporated milk; 1 cup whole milk; 1 tsp. garlic powder; 1 tsp. onion powder; 1 tsp. smoked paprika; ½ tsp. freshly ground black pepper; 1 tsp. kosher salt, plus more; 2 lb. extra-sharp cheddar, coarsely grated; 4 oz. full-fat cream cheese; 1 lb. elbow macaroni Place a rack in middle of oven; preheat to 400°. Bring evaporated milk and whole milk to a bare simmer in a large saucepan over medium heat. Whisk in garlic powder, onion powder, paprika, pepper, and 1 tsp. salt. Working in batches, whisk in three fourths of the cheddar, then all of the cream cheese.\nMeanwhile, bring a large pot of generously salted water to a boil (it should have a little less salt than seawater). Cook macaroni, stirring occasionally, until very al dente, about 4 minutes. Drain in a colander.\nAdd macaroni to cheese sauce in pan and mix until well coated. Evenly spread out half of macaroni mixture in a 13x9" baking dish. Sprinkle half of remaining cheddar evenly over. Layer r

In [104]:
cleaned_text[2]

'Thanksgiving Mac and Cheese cup evaporated milk cup whole milk tsp garlic powder tsp onion powder tsp smoked paprika ½ tsp freshly ground black pepper tsp kosher salt plus more lb extrasharp cheddar coarsely grated oz fullfat cream cheese lb elbow macaroni Place a rack in middle of oven preheat to ° Bring evaporated milk and whole milk to a bare simmer in a large saucepan over medium heat Whisk in garlic powder onion powder paprika pepper and tsp salt Working in batches whisk in three fourths of the cheddar then all of the cream cheese Meanwhile bring a large pot of generously salted water to a boil it should have a little less salt than seawater Cook macaroni stirring occasionally until very al dente about minutes Drain in a colander Add macaroni to cheese sauce in pan and mix until well coated Evenly spread out half of macaroni mixture in a x baking dish Sprinkle half of remaining cheddar evenly over Layer remaining macaroni mixture on top and sprinkle with remaining cheddar Bake un

### Striping down the text as much as possible. In this case that means lemmatizing words and removing stop words. The goal here is not text prediction, but similarity measures and keyword extraction, which don't require the semantic granularity that stop words and non-lemmatized words might provide.

In [105]:
# Testing Strategies and Code
nlp = spacy.load("en_core_web_sm")

' '.join([token.lemma_ for token in nlp(cleaned_text[2]) if not token.is_stop])

'Thanksgiving Mac Cheese cup evaporate milk cup milk tsp garlic powder tsp onion powder tsp smoke paprika ½ tsp freshly grind black pepper tsp kosher salt plus lb extrasharp cheddar coarsely grated oz fullfat cream cheese lb elbow macaroni Place rack middle oven preheat ° bring evaporate milk milk bare simmer large saucepan medium heat Whisk garlic powder onion powder paprika pepper tsp salt Working batch whisk fourth cheddar cream cheese bring large pot generously salt water boil little salt seawater Cook macaroni stir occasionally al dente minute Drain colander Add macaroni cheese sauce pan mix coated Evenly spread half macaroni mixture x bake dish Sprinkle half remain cheddar evenly Layer remain macaroni mixture sprinkle remain cheddar Bake cheese melt minute let cool slightly serve'

#### Stop word removal and lemmatization

In [106]:
# Tokenizing Function that lemmatizes words and removes Stop Words
def text_tokenizer(documents):
    tokenized_documents = []
    for doc in documents:
        tok_doc = ' '.join([token.lemma_ for token in nlp(doc) if not token.is_stop])
        tokenized_documents.append(tok_doc)
    return tokenized_documents

In [107]:
tokenized_text = text_tokenizer(cleaned_text)

Saving the tokenized_text to a csv so not to run a whole cell again

In [108]:
pd.Series(tokenized_text).to_csv('tokenized_text.csv')

print(tokenized_text[0])
print("\n"*3)
print(tokenized_text[2])

MisoButter Roast Chicken Acorn Squash Panzanella ½–lb chicken ¾ tsp kosher salt divide plus small acorn squash lb total Tbsp finely chop sage Tbsp finely chop rosemary Tbsp unsalted butter melt plus tbsp room temperature ¼ tsp ground allspice Pinch crushed red pepper flake freshly ground black pepper ⅓ loaf goodquality sturdy white bread tear piece ½ cup medium apple Gala Pink Lady oz total core cut piece Tbsp extravirgin olive oil ½ small red onion thinly slice Tbsp apple cider vinegar Tbsp white miso ¼ cup allpurpose flour Tbsp unsalted butter room temperature ¼ cup dry white wine cup unsalte chicken broth tsp white miso Kosher salt freshly ground pepper Pat chicken dry paper towel season tsp salt tie leg kitchen twine let sit room temperature hour halve squash scoop seed run vegetable peeler ridge squash half remove skin cut half ½thick wedge arrange rimmed baking sheet Combine sage rosemary Tbsp melt butter large bowl pour half mixture squash bake sheet Sprinkle squash allspice red

#### Transform a collection of tokenized text data into a TF-IDF (Term Frequency-Inverse Document Frequency) matrix.

In [109]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(lowercase = True,
                            ngram_range = (1,1))

text_tfidf = vectorizer.fit_transform(tokenized_text)
tfidf_words = vectorizer.get_feature_names_out()
print(text_tfidf.shape)
print(len(tfidf_words))

(13480, 17087)
17087


## Topic Modeling
- LDA
- NNMF

The ultimate goal with topic modeling is to group documents together and generate category words using TextRank. These category words can then be used to further refine the recommendation query

------

LDA and NNMF extract topic models by finding similar subgroups of text within the corpora of recipes (or other text documents). However

In [110]:
text_tfidf.shape

(13480, 17087)

In [111]:
from sklearn.decomposition import LatentDirichletAllocation as LDA

lda = LDA(n_components = 50,
          n_jobs = -1,
          max_iter = 100)
text_lda = lda.fit_transform(text_tfidf)
text_lda.shape

(13480, 50)

In [112]:
from sklearn.decomposition import NMF

nmf = NMF(alpha_W=0.0,
         init='nndsvdar',
         l1_ratio=0.0,
         max_iter = 100,
         n_components = 50,
         solver='cd')

text_nmf = nmf.fit_transform(text_tfidf)
text_nmf.shape



(13480, 50)

* Now we will see the distribution of recipess using the topics which are being defined by LDA and NMF.
* For that we will use the merged text ("Title; Ingredients; Instructions")


### Next Steps:
1. Document x Topic Matrix
2. Word x Topic Matrix

In [113]:
# variable dependencies:
text_series = pd.Series(all_text)

def docs_by_tops(top_mat, topic_range = (0,0), doc_range = (0,2)):
    for i in range(topic_range[0], topic_range[1]):
        topic_scores = pd.Series(top_mat[:,i])
        doc_index = topic_scores.sort_values(ascending = False)[doc_range[0]:doc_range[1]].index
        for j, index in enumerate(doc_index, doc_range[0]):
            print('Topic #{}'.format(i),
                  '\nDocument #{}'.format(j),
                  '\nTopic Score: {}\n\n'.format(topic_scores[index]),
                  text_series[index], '\n\n')

* By arranging them in descending order we have selected top three recipees in first three topics.

In [114]:
docs_by_tops(text_lda,(0,3),(0,3))

Topic #0 
Document #0 
Topic Score: 0.2733735753787887

 Carnation Lemon Bars 2 cups all-purpose flour; 1/2 cup powdered sugar; 1 cup butter or margarine, softened; 1 (14-ounce) can Nestlé Carnation Sweetened Condensed Milk; 4 large eggs; 2/3 cup lemon juice; 1 tablespoon all-purpose flour; 1 teaspoon baking powder; 1/4 teaspoon salt; 4 drops yellow food coloring (optional); 1 tablespoon grated lemon peel; Sifted powdered sugar (optional) Preheat oven to 350°F.
Combine flour and powdered sugar in a medium bowl. Cut in butter with pastry blender or 2 knives until mixture is crumbly. Press lightly onto bottom and halfway up sides of ungreased 13 x 9-inch baking pan.
Bake for 20 minutes.
Beat sweetened condensed milk and eggs in a large mixing bowl until fluffy. Beat in lemon juice, flour, baking powder, salt, and food coloring until blended. Fold in grated lemon peel; pour over crust.
Bake until filling is set and crust is golden brown, about 20 to 25 minutes. Cool in pan on wire rack. R

In [115]:
docs_by_tops(text_nmf,(1,2),(90000,90001))

### Exploring Topics by words

In [116]:
text_nmf.shape

(13480, 50)

In [117]:
text_tfidf.T.shape

(17087, 13480)

In [118]:
# Function for best topic words using cosine similarity
# Variable Dependency:
word_series = pd.Series(tfidf_words)

def words_by_tops(tfidf_mat, top_mat, topic_range=(0,0), n_words=10):
    topic_word_scores = tfidf_mat.T * top_mat
    for i in range(topic_range[0],topic_range[1]):
        word_scores = pd.Series(topic_word_scores[:,i])
        word_index = word_scores.sort_values(ascending = False)[:n_words].index
        print('\nTopic #{}'.format(i))
        for index in word_index:
            print(word_series[index],'\t\t', word_scores[index])

* By using this function, we caliberated first three topic using the given TF-IDF words. (We can see that first three topic are being comprised of these top 10 words.)

In [119]:
# Keywords using LDA
words_by_tops(text_tfidf, text_lda, (0,3), 10)


Topic #0
cup 		 2.6761464011241487
tablespoon 		 1.9153241295859236
minute 		 1.8303345140301757
teaspoon 		 1.794969653826666
salt 		 1.7436765583345135
oil 		 1.6405221937667827
sugar 		 1.467669644698339
heat 		 1.462129701243681
pepper 		 1.4406853043219052
add 		 1.439887435694879

Topic #1
cup 		 2.5601955533846326
tablespoon 		 1.9361926254602957
teaspoon 		 1.7996265589225018
minute 		 1.7443376257875722
salt 		 1.6499777738118737
oil 		 1.624739119363484
heat 		 1.4489368003534737
add 		 1.4328838427163475
sugar 		 1.4198739652186674
pepper 		 1.4084511063074716

Topic #2
cup 		 37.062515755800064
sugar 		 30.75009586179475
juice 		 29.719782796292673
ice 		 29.59276562206275
syrup 		 23.84585064479922
lemon 		 22.688997180983552
glass 		 19.14403806384986
water 		 17.647766589647773
lime 		 17.46957038693417
strawberry 		 16.888589058804264


* Same thing as above but here we have used topics from NMF algorithm

In [120]:
# Words using NMF
words_by_tops(text_tfidf, text_nmf, (0,3), 10)


Topic #0
heat 		 4.104488316723269
oil 		 4.007459847705226
minute 		 3.8923718444262603
skillet 		 3.871123879347345
cook 		 3.764503505046831
cup 		 3.454891520331051
tablespoon 		 3.440286495844668
add 		 3.2294137777070864
salt 		 3.024061532401517
pepper 		 2.914679761557094

Topic #1
butter 		 12.908190965002872
cup 		 11.92429351495226
flour 		 10.70435331569814
sugar 		 9.140337916737984
bake 		 9.118674334943886
teaspoon 		 8.088851570850123
egg 		 7.930745508120486
minute 		 6.540562561995593
pan 		 6.288009276628132
tablespoon 		 6.251170540360929

Topic #2
dough 		 15.836945059393653
flour 		 8.128335033611355
cup 		 4.947073039256024
sugar 		 4.288315522981629
bake 		 4.1939206815302565
roll 		 4.085252466583574
butter 		 4.0084020528967965
minute 		 3.6704939235056666
sheet 		 3.4590364848784834
teaspoon 		 3.333943978839933


* By comparing these two topics we discovered that NMF topics are highly effective.
* So from now, we will use score of NMF algorithm

### Keyword Extraction of Topics Using TextRank

The purpose of using TextRank to extract keywords 

------------

Consider using a smaller corpora size, so as to more quickly code. Then run the entire copora.
Using the time module to test out corpora sizes.

Using TextRank to summarize the topics by extracting words involves many variables:
- Deciding how many of the top documents each the topic should be summarized?
    - Arbitrarily: top 100, evaluate, then only decrease from there.
- Should TextRank then be performed once over the selected topic corpora, or should it be run individually and then scores added to make an aggregated rank?
    - For the sake of simplicity it should probably only be run over the entire corpora
- How many top ranked words should be used?
    - Check out the top ranks words first, then decide.
    - Arbitrarily choosing the top 20 ranked
- Then once keywords are decided, to how many documents should those words be assigned the extract?
    - I think it depends on the number of documents used to find the categorical keywords.
- Once we're satisfied with a TextRank strategy, we need to this about which method to use to extract topics (NNMF, LDA) as well as whether we'll NNMF, LDA, or TextRank to extract keywords.

In [121]:
# # Pulling the top one-hundred documents ranked in similarity among Topic #1
# text_index = pd.Series(text_nmf[:,1]).sort_values(ascending = False)[:100].index
# text_4summary = pd.Series(cleaned_text)[text_index]

# # Manually Creating a list of recipe stop
# recipe_stopwords = ['cup','cups','ingredient','ingredients','teaspoon','tablespoon','oven']


* We defined some constants, which we will be using for retrieval of recipees.

In [123]:
# import numpy as np
N_topics = 50             # Number of Topics to Extract from corpora
N_top_docs = 200          # Number of top documents within each topic to extract keywords
N_top_words = 25          # Number of keywords to extract from each topic
N_docs_categorized = 2000 # Number of top documents within each topic to tag 
N_neighbor_window = 4     # Length of word-radius that defines the neighborhood for
                          # each word in the TextRank adjacency table

# Query Similarity Weights
w_title = 0.2
w_text = 0.3
w_categories = 0.5
w_array = np.array([w_title, w_text, w_categories])

# Recipe Stopwords: for any high volume food recipe terminology that doesn't contribute
# to the searchability of a recipe. This list must be manually created.
recipe_stopwords = ['cup','cups','ingredient','ingredients','teaspoon','teaspoons','tablespoon',
                   'tablespoons','C','F','minutes']

In [124]:
topic_transformed_matrix = text_nmf
root_text_data = pd.read_csv('Cleaned_Text.csv').iloc[:, 1].to_list()

# The Functions that are defined below:

**Topic_docs_4kwsummary**
* It returns top **200** recipees given the score of particular topic

**generate_filter_kws**
* Filters out specific parts of speech and stop words from the list of potential keywords

**generate_adjacency**
* Tabulates counts of neighbors in the neighborhood window for each unique word

**generate_wordranks**
* Runs TextRank on adjacency table

**generate_tag_list**
* Uses TextRank ranks to return actual key words for each topic in rank order

**generate_tags**
* It generates the tag list for each topic.

In [125]:
def topic_docs_4kwsummary(topic_document_scores, root_text_data):
    '''Gathers and formats the top recipes in each topic'''
    text_index = pd.Series(topic_document_scores).sort_values(ascending = False)[:N_top_docs].index
    text_4kwsummary = pd.Series(root_text_data)[text_index]
    return text_4kwsummary

def generate_filter_kws(text_list):
    '''Filters out specific parts of speech and stop words from the list of potential keywords'''
    parsed_texts = nlp(' '.join(text_list)) 
    kw_filter = set([str(word) for word in parsed_texts 
                if (word.pos_== ('NOUN' or 'ADJ' or 'VERB'))
                and word.lemma_ not in recipe_stopwords])
    kw_filter=list(kw_filter)
    kw_filter=sorted(kw_filter)
    print(np.array(kw_filter).shape)
    return kw_filter, parsed_texts

# def generate_adjacency(kw_filts, parsed_texts):
#     '''Tabulates counts of neighbors in the neighborhood window for each unique word'''
#     adjacency = pd.DataFrame(columns=kw_filts, index=kw_filts, data = 0)
#     for i, word in enumerate(parsed_texts):
#         if any ([str(word) == item for item in kw_filts]):
#             end = min(len(parsed_texts), i+N_neighbor_window+1) # Neighborhood Window Utilized Here
#             nextwords = parsed_texts[i+1:end]
#             inset = [str(x) in kw_filts for x in nextwords]
#             neighbors = [str(nextwords[i]) for i in range(len(nextwords)) if inset[i]]
#             if neighbors:
#                 adjacency.loc[str(word), neighbors] += 1
#     return adjacency

def generate_adjacency(kw_filts, parsed_texts):
    '''Tabulates counts of neighbors in the neighborhood window for each unique word'''
    kw_filts_list = kw_filts
    adjacency = pd.DataFrame(columns=kw_filts_list, index=kw_filts_list, data=0)
    
    for i, word in enumerate(parsed_texts):
        if str(word) in kw_filts_list:
            end = min(len(parsed_texts), i + N_neighbor_window + 1)  # Neighborhood Window Utilized Here
            nextwords = [str(parsed_texts[j]) for j in range(i + 1, end)]
            inset = [word in kw_filts_list for word in nextwords]
            neighbors = [nextwords[j] for j in range(len(nextwords)) if inset[j]]
            
            if neighbors:
                for neighbor in neighbors:
                    adjacency.loc[str(word), neighbor] += 1
                    # adjacency.loc[neighbor, str(word)] += 1  # Add bidirectional relationship

    return adjacency

                
# def generate_wordranks(adjacency):
#     '''Runs TextRank on adjacency table'''
#     nx_words = nx.from_numpy_matrix(adjacency.values)
#     ranks=nx.pagerank(nx_words, alpha=.85, tol=.00000001)
#     return ranks

def generate_wordranks(adjacency):
    '''Runs TextRank on adjacency table'''
    nx_words = nx.from_numpy_array(adjacency.values)
    ranks = nx.pagerank(nx_words, alpha=0.85, tol=1e-8)
    return ranks

def generate_tag_list(kw_filter,ranks):
    '''Uses TextRank ranks to return actual key words for each topic in rank order'''
    rank_values = [i for i in ranks.values()]
    ranked = pd.DataFrame(zip(rank_values, kw_filter)).sort_values(by=0,axis=0,ascending=False)
    kw_list = ranked.iloc[:N_top_words,1].to_list()
    return kw_list

# Master Function utilizing all above functions
def generate_tags(topic_document_scores, root_text_data):
    text_4kwsummary = topic_docs_4kwsummary(topic_document_scores, root_text_data)
    kw_filter, parsed_texts = generate_filter_kws(text_4kwsummary)
    adjacency = generate_adjacency(kw_filter, parsed_texts)
    ranks = generate_wordranks(adjacency)
    kw_list = generate_tag_list(kw_filter,ranks)
    return kw_list

def generate_kw_index(topic_document_scores):
    kw_index = pd.Series(topic_document_scores).sort_values(ascending = False)[:N_docs_categorized].index
    return kw_index


In [126]:
recipes=pd.read_csv('Processed_Recipees.csv')

* The loop below creates the tag list for each topic using score from the NMF algorithm and then appends that tag list to the top **2000** recipees for the same topic.

In [127]:
import networkx as nx
recipes['tag_list'] = [[] for _ in range(recipes.shape[0])]

for i in range(topic_transformed_matrix.shape[1]):
    scores = topic_transformed_matrix[:, i]
    topic_kws = generate_tags(scores, root_text_data)
    kw_index_4df = generate_kw_index(scores)
    
    # Update the 'tag_list' column
    recipes.loc[kw_index_4df, 'tag_list'] = recipes.loc[kw_index_4df, 'tag_list'].apply(lambda x: x + topic_kws)
    if i % 10 == 0:
        print(f'Topic #{i} Checkpoint')

print('done!')

(1097,)
Topic #0 Checkpoint
(845,)
(1683,)
(1062,)
(1249,)
(1002,)
(1050,)
(905,)
(1245,)
(1056,)
(1222,)
Topic #10 Checkpoint
(1243,)
(1268,)
(1261,)
(1246,)
(1045,)
(450,)
(1094,)
(1074,)
(1066,)
(1018,)
Topic #20 Checkpoint
(1434,)
(1117,)
(1049,)
(1071,)
(1236,)
(1158,)
(1141,)
(1075,)
(1077,)
(1213,)
Topic #30 Checkpoint
(996,)
(1094,)
(1070,)
(1097,)
(1141,)
(1221,)
(1238,)
(1211,)
(992,)
(807,)
Topic #40 Checkpoint
(1294,)
(1156,)
(1144,)
(1050,)
(1097,)
(1026,)
(1145,)
(1198,)
(1117,)
done!


* The numbers shown above are the shape of the tag list for the 50 topics generated by NMF algorithm.

In [128]:
recipes.head()
new=recipes
new.head()

Unnamed: 0.1,Unnamed: 0,Title,Ingredients,Instructions,Image_Name,Cleaned_Ingredients,Ingredients_Text,Ingredients_Count,tag_list
0,0,Miso-Butter Roast Chicken With Acorn Squash Pa...,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...","Pat chicken dry with paper towels, season all ...",miso-butter-roast-chicken-acorn-squash-panzanella,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...",1 (3½–4-lb.) whole chicken; 2¾ tsp. kosher sal...,22,"[baking, flour, butter, sugar, minutes, bowl, ..."
1,1,Crispy Salt and Pepper Potatoes,"['2 large egg whites', '1 pound new potatoes (...",Preheat oven to 400°F and line a rimmed baking...,crispy-salt-and-pepper-potatoes-dan-kluger,"['2 large egg whites', '1 pound new potatoes (...",2 large egg whites; 1 pound new potatoes (abou...,7,"[potatoes, minutes, salt, oil, pepper, potato,..."
2,2,Thanksgiving Mac and Cheese,"['1 cup evaporated milk', '1 cup whole milk', ...",Place a rack in middle of oven; preheat to 400...,thanksgiving-mac-and-cheese-erick-williams,"['1 cup evaporated milk', '1 cup whole milk', ...",1 cup evaporated milk; 1 cup whole milk; 1 tsp...,10,"[potatoes, minutes, salt, oil, pepper, potato,..."
3,3,Italian Sausage and Bread Stuffing,"['1 (¾- to 1-pound) round Italian loaf, cut in...",Preheat oven to 350°F with rack in middle. Gen...,italian-sausage-and-bread-stuffing-240559,"['1 (¾- to 1-pound) round Italian loaf, cut in...","1 (¾- to 1-pound) round Italian loaf, cut into...",13,"[skillet, minutes, oil, heat, salt, Add, peppe..."
4,4,Newton's Law,"['1 teaspoon dark brown sugar', '1 teaspoon ho...",Stir together brown sugar and hot water in a c...,newtons-law-apple-bourbon-cocktail,"['1 teaspoon dark brown sugar', '1 teaspoon ho...",1 teaspoon dark brown sugar; 1 teaspoon hot wa...,6,"[ounce, juice, ice, ounces, glass, lemon, shak..."


In [129]:
new['Instructions'] = new['Instructions'].str.replace(',', ';')
new['Instructions'] = new['Instructions'].str.replace('.', ';')
new['Instructions'] = new['Instructions'].str.replace('\n', ';')
new.head()

Unnamed: 0.1,Unnamed: 0,Title,Ingredients,Instructions,Image_Name,Cleaned_Ingredients,Ingredients_Text,Ingredients_Count,tag_list
0,0,Miso-Butter Roast Chicken With Acorn Squash Pa...,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...",Pat chicken dry with paper towels; season all ...,miso-butter-roast-chicken-acorn-squash-panzanella,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...",1 (3½–4-lb.) whole chicken; 2¾ tsp. kosher sal...,22,"[baking, flour, butter, sugar, minutes, bowl, ..."
1,1,Crispy Salt and Pepper Potatoes,"['2 large egg whites', '1 pound new potatoes (...",Preheat oven to 400°F and line a rimmed baking...,crispy-salt-and-pepper-potatoes-dan-kluger,"['2 large egg whites', '1 pound new potatoes (...",2 large egg whites; 1 pound new potatoes (abou...,7,"[potatoes, minutes, salt, oil, pepper, potato,..."
2,2,Thanksgiving Mac and Cheese,"['1 cup evaporated milk', '1 cup whole milk', ...",Place a rack in middle of oven; preheat to 400...,thanksgiving-mac-and-cheese-erick-williams,"['1 cup evaporated milk', '1 cup whole milk', ...",1 cup evaporated milk; 1 cup whole milk; 1 tsp...,10,"[potatoes, minutes, salt, oil, pepper, potato,..."
3,3,Italian Sausage and Bread Stuffing,"['1 (¾- to 1-pound) round Italian loaf, cut in...",Preheat oven to 350°F with rack in middle; Gen...,italian-sausage-and-bread-stuffing-240559,"['1 (¾- to 1-pound) round Italian loaf, cut in...","1 (¾- to 1-pound) round Italian loaf, cut into...",13,"[skillet, minutes, oil, heat, salt, Add, peppe..."
4,4,Newton's Law,"['1 teaspoon dark brown sugar', '1 teaspoon ho...",Stir together brown sugar and hot water in a c...,newtons-law-apple-bourbon-cocktail,"['1 teaspoon dark brown sugar', '1 teaspoon ho...",1 teaspoon dark brown sugar; 1 teaspoon hot wa...,6,"[ounce, juice, ice, ounces, glass, lemon, shak..."


In [130]:
new.reset_index(drop=True)

Unnamed: 0.1,Unnamed: 0,Title,Ingredients,Instructions,Image_Name,Cleaned_Ingredients,Ingredients_Text,Ingredients_Count,tag_list
0,0,Miso-Butter Roast Chicken With Acorn Squash Pa...,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...",Pat chicken dry with paper towels; season all ...,miso-butter-roast-chicken-acorn-squash-panzanella,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...",1 (3½–4-lb.) whole chicken; 2¾ tsp. kosher sal...,22,"[baking, flour, butter, sugar, minutes, bowl, ..."
1,1,Crispy Salt and Pepper Potatoes,"['2 large egg whites', '1 pound new potatoes (...",Preheat oven to 400°F and line a rimmed baking...,crispy-salt-and-pepper-potatoes-dan-kluger,"['2 large egg whites', '1 pound new potatoes (...",2 large egg whites; 1 pound new potatoes (abou...,7,"[potatoes, minutes, salt, oil, pepper, potato,..."
2,2,Thanksgiving Mac and Cheese,"['1 cup evaporated milk', '1 cup whole milk', ...",Place a rack in middle of oven; preheat to 400...,thanksgiving-mac-and-cheese-erick-williams,"['1 cup evaporated milk', '1 cup whole milk', ...",1 cup evaporated milk; 1 cup whole milk; 1 tsp...,10,"[potatoes, minutes, salt, oil, pepper, potato,..."
3,3,Italian Sausage and Bread Stuffing,"['1 (¾- to 1-pound) round Italian loaf, cut in...",Preheat oven to 350°F with rack in middle; Gen...,italian-sausage-and-bread-stuffing-240559,"['1 (¾- to 1-pound) round Italian loaf, cut in...","1 (¾- to 1-pound) round Italian loaf, cut into...",13,"[skillet, minutes, oil, heat, salt, Add, peppe..."
4,4,Newton's Law,"['1 teaspoon dark brown sugar', '1 teaspoon ho...",Stir together brown sugar and hot water in a c...,newtons-law-apple-bourbon-cocktail,"['1 teaspoon dark brown sugar', '1 teaspoon ho...",1 teaspoon dark brown sugar; 1 teaspoon hot wa...,6,"[ounce, juice, ice, ounces, glass, lemon, shak..."
...,...,...,...,...,...,...,...,...,...
13475,13475,Brownie Pudding Cake,"['1 cup all-purpose flour', '2/3 cup unsweeten...",Preheat the oven to 350°F; Into a bowl sift to...,brownie-pudding-cake-14408,"['1 cup all-purpose flour', '2/3 cup unsweeten...",1 cup all-purpose flour; 2/3 cup unsweetened c...,13,"[baking, flour, butter, sugar, minutes, bowl, ..."
13476,13476,Israeli Couscous with Roasted Butternut Squash...,"['1 preserved lemon', '1 1/2 pound butternut s...",Preheat oven to 475°F;;Halve lemons and scoop ...,israeli-couscous-with-roasted-butternut-squash...,"['1 preserved lemon', '1 1/2 pound butternut s...",1 preserved lemon; 1 1/2 pound butternut squas...,10,"[potatoes, minutes, salt, oil, pepper, potato,..."
13477,13477,Rice with Soy-Glazed Bonito Flakes and Sesame ...,['Leftover katsuo bushi (dried bonito flakes) ...,If using katsuo bushi flakes from package; moi...,rice-with-soy-glazed-bonito-flakes-and-sesame-...,['Leftover katsuo bushi (dried bonito flakes) ...,Leftover katsuo bushi (dried bonito flakes) fr...,7,"[a, rice, minutes, water, heat, salt, oil, Add..."
13478,13478,Spanakopita,['1 stick (1/2 cup) plus 1 tablespoon unsalted...,Melt 1 tablespoon butter in a 12-inch heavy sk...,spanakopita-107344,['1 stick (1/2 cup) plus 1 tablespoon unsalted...,1 stick (1/2 cup) plus 1 tablespoon unsalted b...,6,"[sheet, minutes, baking, pastry, F, oven, salt..."


In [131]:
new.head()

Unnamed: 0.1,Unnamed: 0,Title,Ingredients,Instructions,Image_Name,Cleaned_Ingredients,Ingredients_Text,Ingredients_Count,tag_list
0,0,Miso-Butter Roast Chicken With Acorn Squash Pa...,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...",Pat chicken dry with paper towels; season all ...,miso-butter-roast-chicken-acorn-squash-panzanella,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...",1 (3½–4-lb.) whole chicken; 2¾ tsp. kosher sal...,22,"[baking, flour, butter, sugar, minutes, bowl, ..."
1,1,Crispy Salt and Pepper Potatoes,"['2 large egg whites', '1 pound new potatoes (...",Preheat oven to 400°F and line a rimmed baking...,crispy-salt-and-pepper-potatoes-dan-kluger,"['2 large egg whites', '1 pound new potatoes (...",2 large egg whites; 1 pound new potatoes (abou...,7,"[potatoes, minutes, salt, oil, pepper, potato,..."
2,2,Thanksgiving Mac and Cheese,"['1 cup evaporated milk', '1 cup whole milk', ...",Place a rack in middle of oven; preheat to 400...,thanksgiving-mac-and-cheese-erick-williams,"['1 cup evaporated milk', '1 cup whole milk', ...",1 cup evaporated milk; 1 cup whole milk; 1 tsp...,10,"[potatoes, minutes, salt, oil, pepper, potato,..."
3,3,Italian Sausage and Bread Stuffing,"['1 (¾- to 1-pound) round Italian loaf, cut in...",Preheat oven to 350°F with rack in middle; Gen...,italian-sausage-and-bread-stuffing-240559,"['1 (¾- to 1-pound) round Italian loaf, cut in...","1 (¾- to 1-pound) round Italian loaf, cut into...",13,"[skillet, minutes, oil, heat, salt, Add, peppe..."
4,4,Newton's Law,"['1 teaspoon dark brown sugar', '1 teaspoon ho...",Stir together brown sugar and hot water in a c...,newtons-law-apple-bourbon-cocktail,"['1 teaspoon dark brown sugar', '1 teaspoon ho...",1 teaspoon dark brown sugar; 1 teaspoon hot wa...,6,"[ounce, juice, ice, ounces, glass, lemon, shak..."


In [132]:
new.to_csv('tagged_recipes_df.csv')

In [133]:
new.loc[:5,'tag_list']

0    [baking, flour, butter, sugar, minutes, bowl, ...
1    [potatoes, minutes, salt, oil, pepper, potato,...
2    [potatoes, minutes, salt, oil, pepper, potato,...
3    [skillet, minutes, oil, heat, salt, Add, peppe...
4    [ounce, juice, ice, ounces, glass, lemon, shak...
5    [pasta, Add, pepper, salt, minutes, oil, heat,...
Name: tag_list, dtype: object

In [134]:
new['tags'] = [' '.join(tags) for tags in new['tag_list']]

In [135]:
new.columns

Index(['Unnamed: 0', 'Title', 'Ingredients', 'Instructions', 'Image_Name',
       'Cleaned_Ingredients', 'Ingredients_Text', 'Ingredients_Count',
       'tag_list', 'tags'],
      dtype='object')

#### Remaing our colums (picture link would be replaced by actual links if the data was collected through crawling)

In [136]:
new.rename(columns={
    'Title': 'title',
    'Ingredients': 'ingredients',
    'Instructions': 'instructions',
    'Image_Name': 'picture_link',
    'Cleaned_Ingredients': 'ingredient_text',
    'Ingredients_Text': 'ingredient_count',
    'tag_list': 'tag_list',
    'tags': 'tags'
}, inplace=True)

# Rename the DataFrame itself
new.rename({'new': 'recipes'}, axis=1, inplace=True)


# Querying Algorithm
The final product presented is a search algorithm that takes in a list of ingredients or categories, and uses the query to return relavant recipes that utilize those ingredients or are similarly related to other ingredients and those recipes.

In [137]:
recipes.columns

Index(['Unnamed: 0', 'title', 'ingredients', 'instructions', 'picture_link',
       'ingredient_text', 'ingredient_count', 'Ingredients_Count', 'tag_list',
       'tags'],
      dtype='object')

In [138]:
# Creating TF-IDF Matrices and recalling text dependencies

'''import text_tokenized.csv here'''

# TF-IDF vectorizer instance
'''vectorizer = TfidfVectorizer(lowercase = True,
                            ngram_range = (1,1))'''

'''text_tfidf = vectorizer.fit_transform(tokenized_text)'''
title_tfidf = vectorizer.transform(recipes['title'])
# text_tfidf    <== Variable with recipe ingredients and instructions
tags_tfidf = vectorizer.transform(recipes['tags'])
# recipes   <== DataFrame; For indexing and printing recipes

# Query Similarity Weights
w_title = .2
w_text = .3
w_categories = .5

In [139]:
def qweight_array(query_length, qw_array = [1]):
    '''Returns descending weights for ranked query ingredients'''
    if query_length > 1:
        to_split = qw_array.pop()
        split = to_split/2
        qw_array.extend([split, split])
        return qweight_array(query_length - 1, qw_array)
    else:
        return np.array(qw_array)

def ranked_query(query):
    '''Called if query ingredients are ranked in order of importance.
    Weights and adds each ranked query ingredient vector.'''
    query = [[q] for q in query]      # place words in seperate documents
    q_vecs = [vectorizer.transform(q) for q in query] 
    qw_array = qweight_array(len(query),[1])
    q_weighted_vecs = q_vecs * qw_array
    q_final_vector = reduce(np.add,q_weighted_vecs)
    return q_final_vector

def overall_scores(query_vector):
    '''Calculates Query Similarity Scores against recipe title, instructions, and keywords.
    Then returns weighted averages of similarities for each recipe.'''
    final_scores = title_tfidf*query_vector.T*w_title
    final_scores += text_tfidf*query_vector.T*w_text
    final_scores += tags_tfidf*query_vector.T*w_categories
    return final_scores

def print_recipes(index, query, recipe_range):
    '''Prints recipes according to query similary ranks'''
    print('Search Query: {}\n'.format(query))
    for i, index in enumerate(index, recipe_range[0]):
        print('Recipe Rank: {}\t'.format(i+1),recipes.loc[index, 'title'],'\n')
        print('Ingredients:\n{}\n '.format(recipes.loc[index, 'ingredient_text']))
        print('Instructions:\n{}\n'.format(recipes.loc[index, 'instructions']))
        
def Search_Recipes(query, query_ranked=False, recipe_range=(0,3)):
    '''Master Recipe Search Function'''
    if query_ranked == True:
        q_vector = ranked_query(query)
    else:
        q_vector = vectorizer.transform([' '.join(query)])
    recipe_scores = overall_scores(q_vector)
    sorted_index = pd.Series(recipe_scores.toarray().T[0]).sort_values(ascending = False)[recipe_range[0]:recipe_range[1]].index
    return print_recipes(sorted_index, query, recipe_range)
    


### Testing the Algorithm

In [140]:
query = ['cinnamon', 'cream', 'banana']
Search_Recipes(query, query_ranked=True, recipe_range=(0,3))

Search Query: ['cinnamon', 'cream', 'banana']

Recipe Rank: 1	 Cinnamon White Hot Chocolate 

Ingredients:
['4 (3") cinnamon sticks', '1/8 teaspoon kosher salt', '2 1/2 cups whole milk, divided', '2 teaspoons cornstarch', '2 ounces high-quality white chocolate, chopped or white chocolate chips (about 1/3 cup)', '1/2 teaspoon vanilla extract', 'Unsweetened whipped cream and ground cinnamon (for serving)']
 
Instructions:
Toast cinnamon sticks in a medium dry saucepan over medium heat until fragrant; about 1 minute; Add salt and 2 cups milk and bring to a simmer; Remove from heat and let steep 10 minutes;;Whisk cornstarch and remaining 1/2 cup milk in a small bowl; Add to warm milk mixture and heat over medium; Bring to a simmer and cook; stirring constantly; until warmed through; about 1 minute; Remove from heat and add chocolate and vanilla; Let sit until chocolate is melted; about 30 seconds; then stir to combine; Discard cinnamon sticks;;Divide hot chocolate between mugs; Top with wh

In [141]:
# Test Rank
query = ['wine', 'cilantro','butter']
Search_Recipes(query, query_ranked=False, recipe_range=(0,3))

Search Query: ['wine', 'cilantro', 'butter']

Recipe Rank: 1	 Chile-Cilantro Butter 

Ingredients:
['1 finely chopped seeded Fresno chile', '1 tablespoon chopped fresh cilantro or marjoram', '1/2 teaspoon fresh lemon or lime juice', '1/2 teaspoon kosher salt', '1/2 cup (1 stick) room-temperature unsalted butter', 'Freshly ground black pepper']
 
Instructions:
Mix 1 finely chopped seeded Fresno chile; 1 tablespoon chopped fresh cilantro or marjoram; 1/2 teaspoon fresh lemon or lime juice; and 1/2 teaspoon kosher salt into 1/2 cup (1 stick) room-temperature unsalted butter; Season with freshly ground black pepper; Place on parchment or waxed paper and roll up into a log; Chill until firm; 1 hour;;Butter can be made 2 days ahead; Keep chilled;

Recipe Rank: 2	 Grilled Corn with Lime-Cilantro Butter 

Ingredients:
['8 tablespoons (1 stick) unsalted butter, softened', '1 tablespoon freshly squeezed lime juice', '1/2 teaspoon lime zest', '1/2 teaspoon sugar', '1/2 teaspoon salt', '1/4 cup ch

# Conclusions and Future Work

In summary, the project's functionality has shown impressive results. Through various experiments, particularly with adjusting the weighting, we've discovered that leveraging the recipes' original text offers significantly enhanced outcomes compared to utilizing categories generated with TextRank. This insight opens avenues for further enriching the model by integrating additional topics. The exceptional granularity achieved by LDA and NNMF in clustering recipes underscores the potential for even finer categorization. A promising direction for improvement involves adopting dense word embeddings, which promise to capture semantic similarities between words with greater nuance and accuracy. One area for refinement is expanding the scope of words that are associated with each topic or category. Currently, the model may overlook relevant words that, despite being closely related to a topic, were not directly extracted from it. Addressing this would ensure a more comprehensive search functionality through the categories, enhancing the model's overall effectiveness and user experience.


Future Implementation and Changes for this model:

-  GloVe embeddings
- LDA2Vec topic extraction
- Using Web Scraping functions to get dynamic and up-to-date recipes from the web and keep changing the data accordingly
- Create an interface and GUI for users to upload their food items and get recipesraccordinglyordingly 

In [142]:
# Test 
query = ['jelly','wine']
Search_Recipes(query, query_ranked=False, recipe_range=(0,3))

Search Query: ['jelly', 'wine']

Recipe Rank: 1	 Jalapeño Pepper Jelly 

Ingredients:
['½ cup coarsely chopped seeded jalapeños (about 3 medium)', '½ cup coarsely chopped green bell pepper', '3 cups organic cane sugar', '½ cup apple cider vinegar', '1 (3-ounce) package liquid pectin']
 
Instructions:
Sterilize three ½-pint jars and their lids or similarly sized lidded storage containers;;In a food processor; combine the jalapeños and the bell pepper and process; scraping down the sides as needed; until the peppers are very finely chopped; 15 to 20 seconds; (They will let off quite a bit of liquid; this is okay;);Transfer the contents of the food processor to a 4-quart stainless-steel or enameled Dutch oven; Add the sugar and vinegar and bring to a rolling boil over medium-high heat; stirring continuously; Boil; still stirring; for 3 minutes; then squeeze in the pectin; Return the mixture to a rolling boil; stirring continuously; and cook for 1 minute; Immediately remove from the heat a

In [143]:
query = ['pepper','apple','pork']
Search_Recipes(query, query_ranked=False, recipe_range=(0,3))

Search Query: ['pepper', 'apple', 'pork']

Recipe Rank: 1	 Roasted Pork Tenderloin with Apple Chutney 

Ingredients:
['2 1- to 1 1/4-pound trimmed pork tenderloins', '1/2 cup apple cider', '1/2 cup dry red wine', '1 cup purchased apple chutney', '2 garlic cloves, chopped', '2 tablespoons fresh thyme leaves plus sprigs for garnish', '2 tablespoons olive oil']
 
Instructions:
Rinse pork and pat dry; Place in shallow bowl; Mix cider; wine; chutney; garlic; and thyme leaves in small bowl; Pour mixture over pork; cover and marinate at room temperature 1 hour or chill up to 3 hours;;Preheat oven to 375°F; Heat oil in large ovenproof skillet over medium-high heat; Remove pork from marinade; reserving marinade; Add pork to skillet; sprinkle with salt and pepper; and cook until light brown on all sides; about 6 minutes total;;Pour reserved marinade over pork; Transfer skillet to oven; roast pork; basting occasionally; until instant-read thermometer inserted into thickest part registers 145°F; a

In [144]:
### No Category Weight
query = ['cream','banana','cinnamon']
Search_Recipes(query, query_ranked=False, recipe_range=(0,3))

Search Query: ['cream', 'banana', 'cinnamon']

Recipe Rank: 1	 Banana Bread 

Ingredients:
['1 stick butter, melted, plus softened butter for greasing', '2 cups flour', '1/2 teaspoon salt', '1 1/2 teaspoons baking powder', '1 cup sugar', '3 very ripe bananas, mashed with a fork until smooth', '2 eggs', '1 teaspoon vanilla extract', '1/2 cup chopped walnuts (optional)', '1/2 cup shredded unsweetened coconut (optional)']
 
Instructions:
Heat the oven to 350°F; Grease a 9x5-inch loaf pan with softened butter;;Whisk together the flour; salt; baking powder; and sugar in a large bowl;;Mix together the melted butter and mashed bananas in a separate bowl; Beat in the eggs and vanilla until well combined; Stir this mixture into the dry ingredients just enough to combine everything; Gently fold in the nuts and coconut if you’re using them;;Pour the batter into the prepared pan; Bake for 50 to 60 minutes or until the bread is golden brown and a toothpick inserted in the center of the bread comes 

In [145]:
### Empty Query
query = []
Search_Recipes(query, query_ranked=False, recipe_range=(0,3))

Search Query: []

Recipe Rank: 1	 Miso-Butter Roast Chicken With Acorn Squash Panzanella 

Ingredients:
['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher salt, divided, plus more', '2 small acorn squash (about 3 lb. total)', '2 Tbsp. finely chopped sage', '1 Tbsp. finely chopped rosemary', '6 Tbsp. unsalted butter, melted, plus 3 Tbsp. room temperature', '¼ tsp. ground allspice', 'Pinch of crushed red pepper flakes', 'Freshly ground black pepper', '⅓ loaf good-quality sturdy white bread, torn into 1" pieces (about 2½ cups)', '2 medium apples (such as Gala or Pink Lady; about 14 oz. total), cored, cut into 1" pieces', '2 Tbsp. extra-virgin olive oil', '½ small red onion, thinly sliced', '3 Tbsp. apple cider vinegar', '1 Tbsp. white miso', '¼ cup all-purpose flour', '2 Tbsp. unsalted butter, room temperature', '¼ cup dry white wine', '2 cups unsalted chicken broth', '2 tsp. white miso', 'Kosher salt', 'freshly ground pepper']
 
Instructions:
Pat chicken dry with paper towels; season all ove

In [146]:
### Only Category Weight
query = ['apple','blueberry']
Search_Recipes(query, query_ranked=False, recipe_range=(0,3))

Search Query: ['apple', 'blueberry']

Recipe Rank: 1	 Blues-Busting Blueberry Ice Cream 

Ingredients:
['2 1/2 cups fresh blueberries, rinsed', '1 cup sugar', 'Juice of 1 small lemon', '3 cups whole cream']
 
Instructions:
Puree the blueberries with the sugar and lemon juice in the bowl of a food processor until smooth; Pour into a large bowl; Whisk in the cream until thoroughly combined; Pour into an ice cream maker and freeze according to the manufacturer's directions; Serve in separate bowls garnished with a few fresh blueberries and a sprig of mint;

Recipe Rank: 2	 Blueberry Compote 

Ingredients:
['1/2 cup water', '1/2 cup sugar', '2 (3- by 1/2-inch) strips fresh lemon zest', '2 cups blueberries (10 oz)', '1 1/2 tablespoons fresh lemon juice']
 
Instructions:
Boil water; sugar; and zest in a 1-quart heavy saucepan; uncovered; 5 minutes; Discard zest; Stir in blueberries and simmer; stirring occasionally; until blueberries begin to burst; 3 to 5 minutes; Remove from heat and stir 

In [147]:
### Only Category Weight
query = ['japanese']
Search_Recipes(query, query_ranked=False, recipe_range=(0,3))

Search Query: ['japanese']

Recipe Rank: 1	 Japanese Turnips with Miso 

Ingredients:
['3 tablespoons white miso', '3 tablespoons unsalted butter, softened, divided', '3 pounds small (1 1/2-to 2-inch) Japanese turnips with greens', '1 1/3 cups water', '2 tablespoons mirin (Japanese sweet rice wine)']
 
Instructions:
Stir together miso and 2 tablespoon butter;;Discard turnip stems and coarsely chop leaves; Halve turnips (leave whole if tiny) and put in a 12-inch heavy skillet along with water; mirin; remaining tablespoon butter; and 1/8 teaspoon salt; Bring to a boil over medium-high heat; then boil; covered; 10 minutes;;Add greens by handfuls; turning and stirring with tongs and adding more as volume in skillet reduces; Cover and cook 1 minute; Uncover and continue boiling; stirring occasionally; until turnips are tender and liquid is reduced to a glaze; about 5 minutes; Stir in miso butter and cook 1 minute;

Recipe Rank: 2	 Steamed Japanese Rice 

Ingredients:
['2 cups Japanese short

## Peerings into the generated topics

In [148]:
recipes.tags

0        baking flour butter sugar minutes bowl salt pa...
1        potatoes minutes salt oil pepper potato heat A...
2        potatoes minutes salt oil pepper potato heat A...
3        skillet minutes oil heat salt Add pepper cook ...
4        ounce juice ice ounces glass lemon shaker cock...
                               ...                        
13475    baking flour butter sugar minutes bowl salt pa...
13476    potatoes minutes salt oil pepper potato heat A...
13477    a rice minutes water heat salt oil Add medium ...
13478    sheet minutes baking pastry F oven salt ° suga...
13479    skillet minutes oil heat salt Add pepper cook ...
Name: tags, Length: 13480, dtype: object

In [149]:
recipes.tags[13]

'chicken minutes salt oil pepper heat Add F garlic skillet ° skin oven pan olive side lemon Transfer bowl sauce Place medium cut pieces juice a beef minutes salt oil pepper Add meat heat ribs garlic sauce pot F ° oven pan cut medium Transfer roast pound water inch broth pork minutes salt oil pepper F Add heat ° garlic sauce Transfer medium pan bowl oven chops skillet Pork cut water ground sugar Place pound tsp Tbsp salt minutes pepper oil bowl Add medium garlic ground baking juice red cut Transfer lemon heat cook sugar F skillet mixture butter chicken lemon juice salt oil minutes bowl pepper sugar medium zest Add olive butter heat garlic mixture baking pan F water Transfer ground cream cut ° turkey minutes pan salt pepper F ° Add heat oil butter roasting oven medium breast water garlic bowl skin rack temperature black Place stock sauce lamb a minutes salt pepper oil garlic Add heat ground medium bowl F ° Transfer lemon olive pan juice chops mixture cut oven cloves skillet bread slices 

In [150]:
recipes.tags[12297]

'skillet minutes oil heat salt Add pepper cook medium eggs stirring garlic Transfer bowl Heat mediumhigh olive Cook butter cut mixture season add inch lemon a dough minutes flour bowl salt F ° water surface butter sugar oil baking yeast warm inch oven mixture plastic wrap let Add sheet pan fish salmon salt oil minutes pepper heat fillets lemon bowl Add sauce skin juice olive fillet side garlic medium skillet baking cut oven Place mixture a beef minutes salt oil pepper Add meat heat ribs garlic sauce pot F ° oven pan cut medium Transfer roast pound water inch broth lemon juice salt oil minutes bowl pepper sugar medium zest Add olive butter heat garlic mixture baking pan F water Transfer ground cream cut ° lamb a minutes salt pepper oil garlic Add heat ground medium bowl F ° Transfer lemon olive pan juice chops mixture cut oven cloves skillet salt ground oil pepper minutes garlic Add seeds bowl cumin medium heat yogurt chicken mixture juice black lemon olive F onion inch ° coriander oven

In [151]:
recipes.head()

Unnamed: 0.1,Unnamed: 0,title,ingredients,instructions,picture_link,ingredient_text,ingredient_count,Ingredients_Count,tag_list,tags
0,0,Miso-Butter Roast Chicken With Acorn Squash Pa...,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...",Pat chicken dry with paper towels; season all ...,miso-butter-roast-chicken-acorn-squash-panzanella,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...",1 (3½–4-lb.) whole chicken; 2¾ tsp. kosher sal...,22,"[baking, flour, butter, sugar, minutes, bowl, ...",baking flour butter sugar minutes bowl salt pa...
1,1,Crispy Salt and Pepper Potatoes,"['2 large egg whites', '1 pound new potatoes (...",Preheat oven to 400°F and line a rimmed baking...,crispy-salt-and-pepper-potatoes-dan-kluger,"['2 large egg whites', '1 pound new potatoes (...",2 large egg whites; 1 pound new potatoes (abou...,7,"[potatoes, minutes, salt, oil, pepper, potato,...",potatoes minutes salt oil pepper potato heat A...
2,2,Thanksgiving Mac and Cheese,"['1 cup evaporated milk', '1 cup whole milk', ...",Place a rack in middle of oven; preheat to 400...,thanksgiving-mac-and-cheese-erick-williams,"['1 cup evaporated milk', '1 cup whole milk', ...",1 cup evaporated milk; 1 cup whole milk; 1 tsp...,10,"[potatoes, minutes, salt, oil, pepper, potato,...",potatoes minutes salt oil pepper potato heat A...
3,3,Italian Sausage and Bread Stuffing,"['1 (¾- to 1-pound) round Italian loaf, cut in...",Preheat oven to 350°F with rack in middle; Gen...,italian-sausage-and-bread-stuffing-240559,"['1 (¾- to 1-pound) round Italian loaf, cut in...","1 (¾- to 1-pound) round Italian loaf, cut into...",13,"[skillet, minutes, oil, heat, salt, Add, peppe...",skillet minutes oil heat salt Add pepper cook ...
4,4,Newton's Law,"['1 teaspoon dark brown sugar', '1 teaspoon ho...",Stir together brown sugar and hot water in a c...,newtons-law-apple-bourbon-cocktail,"['1 teaspoon dark brown sugar', '1 teaspoon ho...",1 teaspoon dark brown sugar; 1 teaspoon hot wa...,6,"[ounce, juice, ice, ounces, glass, lemon, shak...",ounce juice ice ounces glass lemon shaker cock...
