In [81]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

checkpoint = "openai-community/gpt2-large"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(checkpoint)

prompt = "Hugging face company is"
inputs = tokenizer(prompt, return_tensors = "pt")

outputs = model.generate(**inputs, penalty_alpha = 0.6, top_k=4, max_new_tokens=100)
tokenizer.batch_decode(outputs, skip_special_tokens=True)

  from .autonotebook import tqdm as notebook_tqdm
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


KeyboardInterrupt: 

In [None]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer

tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2-large")
model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2-large")

In [None]:
input_text = "Generate a vegan Italian recipe with the following ingredients: tomatoes, basil, garlic."
input_ids = tokenizer.encode(input_text, return_tensors="pt")

outputs = model.generate(input_ids, max_length=500, num_return_sequences=1)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

# The Best Vietnamese Recipe to fit your vibe!

## Data collection

Let's scrap the data now from a website!

In [1]:
from bs4 import BeautifulSoup
import requests

url = 'https://www.recipetineats.com/category/vietnamese-recipes/'
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')

Now that we have parsed through the website HTML, let's get all of the recipe links using find_all

In [2]:
recipe_links = soup.find_all('a', class_= 'entry-image-link')
# Get the href attribute to store the link
links = []
for recipe in recipe_links:
    links.append(recipe.get('href'))
print(links)

['https://www.recipetineats.com/vietnamese-lettuce-wraps-with-peanut-sauce/', 'https://www.recipetineats.com/vietnamese-caramel-ginger-chicken/', 'https://www.recipetineats.com/vietnamese-pork-noodle-bowls/', 'https://www.recipetineats.com/vietnamese-lemongrass-pork-steaks/', 'https://www.recipetineats.com/chicken-banh-mi-vietnamese-sandwich/', 'https://www.recipetineats.com/red-vietnamese-fried-rice/', 'https://www.recipetineats.com/vietnamese-shaking-beef/', 'https://www.recipetineats.com/vietnamese-baked-chicken/', 'https://www.recipetineats.com/vietnamese-chicken-salad/', 'https://www.recipetineats.com/vietnamese-chicken-pho-soup-pho-ga/', 'https://www.recipetineats.com/vietnamese-rice-paper-rolls-spring-rolls/', 'https://www.recipetineats.com/vietnamese-pho-recipe/', 'https://www.recipetineats.com/vietnamese-top-10-best-street-food-ho-chi-minh-city/', 'https://www.recipetineats.com/vietnamese-chicken-noodle-bowl/', 'https://www.recipetineats.com/vietnamese-caramelised-pork-bowls/'

After getting all of the links for all of the recipe, let's iterate through each recipe title to see if it works

In [3]:
from urllib.parse import urljoin

for link in links:
    recipe_page = requests.get(link)

    recipe_soup = BeautifulSoup(recipe_page.content, 'html.parser')

    # Extract the recipe's title
    recipe_title = recipe_soup.find('h1').get_text()
    print(f'Recipe Title: {recipe_title}')

Recipe Title: Vietnamese Lettuce Wraps with Peanut Sauce
Recipe Title: Vietnamese Caramel Ginger Chicken
Recipe Title: Vietnamese lemongrass pork noodle bowls (bun thit nuong)
Recipe Title: Vietnamese lemongrass pork steaks
Recipe Title: Chicken Banh Mi (Vietnamese sandwich)
Recipe Title: Red Vietnamese Fried Rice
Recipe Title: Vietnamese Shaking Beef
Recipe Title: Vietnamese Baked Chicken
Recipe Title: Vietnamese Chicken Salad
Recipe Title: Vietnamese Chicken Pho soup (Pho Ga)
Recipe Title: Vietnamese Rice Paper Rolls
Recipe Title: Vietnamese Pho recipe
Recipe Title: {Pilot Travel Video!!} Top 10 BEST Street Food in Vietnam – Ho Chi Minh City
Recipe Title: Vietnamese Noodles with Lemongrass Chicken
Recipe Title: Vietnamese Caramelised Pork Bowls
Recipe Title: Banh Mi ! (Vietnamese sandwich)
Recipe Title: Pork Meatballs for Banh Mi
Recipe Title: Caramelised Vietnamese Shredded Beef
Recipe Title: Bun Cha (Vietnamese Meatballs!)
Recipe Title: Vietnamese Coconut Caramel Chicken


Now that we're able to iterate through each of the link, we can start create a pipeline to extracts the necessary information (ingredients, instructions, and title)

In [4]:
def extract_info(div_class, ul_class, extra_char_to_strip, soup):
    divs = soup.find_all('div', class_= div_class)
    list_of_info = []
    for div in divs:
        ul = div.find_next('ul', class_=ul_class)
        for li in ul.find_all('li'):
            list_of_info.append(li.get_text().strip(extra_char_to_strip))
    return list_of_info

In [5]:
from urllib.parse import urljoin
import pandas as pd
# Create a dataframe storing all of the vietnamese recipes
recipes_df = pd.DataFrame(columns=['Title', 'Ingredients', 'Instructions', 'Tags'])
for link in links:
    recipe_page = requests.get(link)

    recipe_soup = BeautifulSoup(recipe_page.content, 'html.parser')

    # Extract the recipe's title and add to df
    recipe_title = recipe_soup.find('h1').get_text()

    new_row = pd.DataFrame({'Title': [recipe_title], 'Ingredients': [extract_info('wprm-recipe-ingredient-group', 'wprm-recipe-ingredients','▢ ', recipe_soup)], 'Instructions': [extract_info('wprm-recipe-instruction-group', 'wprm-recipe-instructions','▢ ', recipe_soup)]})
    recipes_df = pd.concat([recipes_df, new_row], ignore_index=True)

In [6]:
recipes_df.head()

Unnamed: 0,Title,Ingredients,Instructions,Tags
0,Vietnamese Lettuce Wraps with Peanut Sauce,[300g / 10 oz peeled whole cooked prawns/shrim...,"[Pickle first – Put the boiling water, salt an...",
1,Vietnamese Caramel Ginger Chicken,"[1 kg / 2 lb skinless chicken thigh fillets , ...","[Toss chicken with fish sauce and chilli, then...",
2,Vietnamese lemongrass pork noodle bowls (bun t...,[1 batch lemongrass marinated pork (it’s marin...,"[Pickle – In a large bowl, dissolve the salt a...",
3,Vietnamese lemongrass pork steaks,"[500g/1 lb pork shoulder , skinless and bonele...","[Cut pork – Cut into 8 equal, thinnish slices ...",
4,Chicken Banh Mi (Vietnamese sandwich),"[2 medium carrots , peeled cut into 2-3mm / 1/...","[Pickle – In a large bowl, dissolve the salt a...",


Our df is almost done! Now, we just need to append tags for each of the food's description. To do so, we'll use NLP's library called spacy and download its pre-trained model. We'll use this model to find tags and filter out unecessary words.

In [7]:
import spacy

# Load the English NLP model
nlp = spacy.load("en_core_web_sm")

# The food description
text = ("I find it funny that pho is the dish that’s become the superstar of Vietnamese food when bun thit nuong is tastier to me! "
        "I adore the contrast of fresh vegetables and herbs with delicious grilled meats, that it’s light and healthy yet anything but dull. "
        "It’s a big bowl of delicious, and I shared the chicken version many years ago (bun ga nuong). "
        "And as soon as I cracked the pork version, I shared in immediately (just last Wednesday!). "
        "And I’m back today with the noodle bowls recipe that is made using the lemongrass pork – just like you get on the streets of Vietnam!")


# Process the text
doc = nlp(text)

# Extract tags with filtering
filtered_tags = [token.text for token in doc 
                 if token.pos_ not in ["PRON", "VERB","AUX", "ADP","SYM","PREP"]
                 and not token.is_stop
                 and not token.is_punct]

# Print extracted tags
print(filtered_tags)

['funny', 'pho', 'dish', 'superstar', 'Vietnamese', 'food', 'bun', 'nuong', 'tastier', 'contrast', 'fresh', 'vegetables', 'herbs', 'delicious', 'meats', 'light', 'healthy', 'dull', 'big', 'bowl', 'delicious', 'chicken', 'version', 'years', 'ago', 'bun', 'ga', 'nuong', 'soon', 'pork', 'version', 'immediately', 'Wednesday', 'today', 'noodle', 'bowls', 'recipe', 'lemongrass', 'pork', 'like', 'streets', 'Vietnam']


Based on this filtering, we can see that the tags found from this description are good. Let's add a column into the df called food description and apply this pipeline to append to the tags column.

In [8]:
# Add an empty column calle Description
recipes_df["Description"] = None

In [9]:
from bs4 import BeautifulSoup
import requests 
recipes_df = pd.DataFrame(columns=['Title', 'Ingredients', 'Instructions', 'Tags', 'Description'])
for link in links:
    recipe_page = requests.get(link)

    recipe_soup = BeautifulSoup(recipe_page.content, 'html.parser')

    recipe_title = recipe_soup.find('h1').get_text()
    print(recipe_title)
    h2_heading = recipe_soup.find('h2', class_ = ['has-text-align-center wp-block-heading', 'wp-block-heading has-text-align-center', 'text-align: center;'])
    paragraphs_block = []
    if h2_heading:
        for sibling in h2_heading.find_next_siblings():
            if sibling.name == "h2" or sibling.name == "h3":
                break
            if sibling.name == "p":
                paragraphs_block.append(sibling.get_text().strip())
    new_row = pd.DataFrame({'Title': [recipe_title], 'Ingredients': [' '.join(extract_info('wprm-recipe-ingredient-group', 'wprm-recipe-ingredients','▢ ', recipe_soup))], 'Instructions': [' '.join(extract_info('wprm-recipe-instruction-group', 'wprm-recipe-instructions','▢ ', recipe_soup))], 'Description': [' '.join(paragraphs_block)]})
    recipes_df = pd.concat([recipes_df, new_row], ignore_index=True)

Vietnamese Lettuce Wraps with Peanut Sauce
Vietnamese Caramel Ginger Chicken
Vietnamese lemongrass pork noodle bowls (bun thit nuong)
Vietnamese lemongrass pork steaks
Chicken Banh Mi (Vietnamese sandwich)
Red Vietnamese Fried Rice
Vietnamese Shaking Beef
Vietnamese Baked Chicken
Vietnamese Chicken Salad
Vietnamese Chicken Pho soup (Pho Ga)
Vietnamese Rice Paper Rolls
Vietnamese Pho recipe
{Pilot Travel Video!!} Top 10 BEST Street Food in Vietnam – Ho Chi Minh City
Vietnamese Noodles with Lemongrass Chicken
Vietnamese Caramelised Pork Bowls
Banh Mi ! (Vietnamese sandwich)
Pork Meatballs for Banh Mi
Caramelised Vietnamese Shredded Beef
Bun Cha (Vietnamese Meatballs!)
Vietnamese Coconut Caramel Chicken


In [10]:
# Remove the row containing the Top 10 Street food in Vietnam.
recipes_df1 = recipes_df.copy()
recipes_df1 = recipes_df1.drop([6, 7, 12])


In [11]:
recipes_df1

Unnamed: 0,Title,Ingredients,Instructions,Tags,Description
0,Vietnamese Lettuce Wraps with Peanut Sauce,300g / 10 oz peeled whole cooked prawns/shrimp...,"Pickle first – Put the boiling water, salt and...",,These lettuce wraps are not strictly Vietnames...
1,Vietnamese Caramel Ginger Chicken,"1 kg / 2 lb skinless chicken thigh fillets , c...","Toss chicken with fish sauce and chilli, then ...",,"When you see today’s recipe, you’re going to d..."
2,Vietnamese lemongrass pork noodle bowls (bun t...,1 batch lemongrass marinated pork (it’s marina...,"Pickle – In a large bowl, dissolve the salt an...",,I find it funny that pho is the dish that’s be...
3,Vietnamese lemongrass pork steaks,"500g/1 lb pork shoulder , skinless and boneles...","Cut pork – Cut into 8 equal, thinnish slices o...",,I’ve been wanting to recreate the chargrilled ...
4,Chicken Banh Mi (Vietnamese sandwich),"2 medium carrots , peeled cut into 2-3mm / 1/1...","Pickle – In a large bowl, dissolve the salt an...",,Banh Mi is a meat filled French baguette sandw...
5,Red Vietnamese Fried Rice,"30g / 2 tbsp unsalted butter 3 garlic cloves ,...",Melt most of the butter in a large non-stick s...,,Fried rice always makes for a great quick meal...
8,Vietnamese Chicken Salad,"350g/12oz cooked chicken , cut into thin baton...",Dressing: Shake Dressing ingredients in a jar....,,While this exact salad is not strictly authent...
9,Vietnamese Chicken Pho soup (Pho Ga),"1 tbsp oil , vegetable or canola (or other pla...",Char onion & ginger - Heat oil in a 6 litre / ...,,
10,Vietnamese Rice Paper Rolls,7 – 14 sheets of 22cm/8.5″ round rice paper (N...,Peanut Sauce: Combine the Peanut Dipping Sauce...,,If I took a platter of these to a gathering wi...
11,Vietnamese Pho recipe,"2 large onions , halved 150g / 5oz ginger , s...",Heat a heavy based skillet over high heat (no ...,,


We noticed that for some title like Vietnamese Chicken Pho, we're missing some information so let's complete our dataframe before we generate the tags.

Pho Ga

In [12]:
recipes_df.iloc[9]

Title                        Vietnamese Chicken Pho soup (Pho Ga)
Ingredients     1 tbsp oil , vegetable or canola (or other pla...
Instructions    Char onion & ginger - Heat oil in a 6 litre / ...
Tags                                                          NaN
Description                                                      
Name: 9, dtype: object

In [13]:
def html_parser(url):
    request = requests.get(url)
    page = BeautifulSoup(request.content, 'html.parser')
    return page

In [14]:
def get_description(url, title, num_p, df, row_index, column_name):
    page = html_parser(url)
    h2s = page.find_all('h2')
    for h2 in h2s:
        if h2.get_text() == title:
            heading = h2

    # Get the paragraph after the h2
    description_blocks = heading.find_all_next('p', limit=num_p)
    description = ' '.join(block.get_text() for block in description_blocks)
    df.loc[row_index, column_name] = description

In [15]:
recipes_df2 = recipes_df1.copy()
get_description('https://www.recipetineats.com/vietnamese-chicken-pho-soup-pho-ga/', "Chicken Pho soup", 4, recipes_df2, 9, "Description")

Pho

In [16]:
get_description('https://www.recipetineats.com/vietnamese-pho-recipe/', "What is Pho?", 5, recipes_df2,11,"Description")

Vietnamese Lemon Grass Chicken

In [17]:
lemon_grass_chicken_page = html_parser('https://www.recipetineats.com/vietnamese-chicken-noodle-bowl/')
h2s = lemon_grass_chicken_page.find_all('h2',limit=2)
for h2 in h2s:
    if h2.get_text() == "Vietnamese Noodles with Lemongrass Chicken":
        heading = h2
description_blocks = heading.find_all_next('p', limit=9)
description = ' '.join(block.get_text() for block in description_blocks)
recipes_df2.loc[13, "Description"] = description

Banh Mi

In [18]:
banh_mi_page = html_parser('https://www.recipetineats.com/banh-mi-vietnamese-sandwich/')
h2s = banh_mi_page.find_all('h2',limit=2)
for h2 in h2s:
    if h2.get_text() == "What is Banh Mi?":
        heading = h2
description_blocks = heading.find_all_next('p', limit=2)
description = ' '.join(block.get_text() for block in description_blocks)
recipes_df2.loc[15, "Description"] = description

Caramelized Pork Bowls

In [19]:
pork_page = html_parser('https://www.recipetineats.com/vietnamese-caramelised-pork-bowls/')
paragraph_description = pork_page.find_all('p', limit=5)
paragraph_description = paragraph_description[2].get_text()
print(paragraph_description)

recipes_df2.loc[17, "Description"] = paragraph_description

Here’s a super fast pork stir fry made with ground pork infused with flavours from the streets of Vietnam. With just a handful of ingredients you probably already have, it’s sweet, salty, beautifully caramelised and absolutely irresistible. It’s the quick and easy version of Vietnamese Caramel Pork, a famous Vietnamese food speciality!


Vietnamese Coconut Caramel Chicken

In [20]:
chicken_page = html_parser('https://www.recipetineats.com/vietnamese-coconut-caramel-chicken/')
paragraph_description = chicken_page.find_all('p', limit=7)
description = (paragraph_description[2].get_text() + paragraph_description[3].get_text() + paragraph_description[5].get_text() + paragraph_description[6].get_text())

recipes_df2.loc[19, "Description"] = description

In [21]:
recipes_df2

Unnamed: 0,Title,Ingredients,Instructions,Tags,Description
0,Vietnamese Lettuce Wraps with Peanut Sauce,300g / 10 oz peeled whole cooked prawns/shrimp...,"Pickle first – Put the boiling water, salt and...",,These lettuce wraps are not strictly Vietnames...
1,Vietnamese Caramel Ginger Chicken,"1 kg / 2 lb skinless chicken thigh fillets , c...","Toss chicken with fish sauce and chilli, then ...",,"When you see today’s recipe, you’re going to d..."
2,Vietnamese lemongrass pork noodle bowls (bun t...,1 batch lemongrass marinated pork (it’s marina...,"Pickle – In a large bowl, dissolve the salt an...",,I find it funny that pho is the dish that’s be...
3,Vietnamese lemongrass pork steaks,"500g/1 lb pork shoulder , skinless and boneles...","Cut pork – Cut into 8 equal, thinnish slices o...",,I’ve been wanting to recreate the chargrilled ...
4,Chicken Banh Mi (Vietnamese sandwich),"2 medium carrots , peeled cut into 2-3mm / 1/1...","Pickle – In a large bowl, dissolve the salt an...",,Banh Mi is a meat filled French baguette sandw...
5,Red Vietnamese Fried Rice,"30g / 2 tbsp unsalted butter 3 garlic cloves ,...",Melt most of the butter in a large non-stick s...,,Fried rice always makes for a great quick meal...
8,Vietnamese Chicken Salad,"350g/12oz cooked chicken , cut into thin baton...",Dressing: Shake Dressing ingredients in a jar....,,While this exact salad is not strictly authent...
9,Vietnamese Chicken Pho soup (Pho Ga),"1 tbsp oil , vegetable or canola (or other pla...",Char onion & ginger - Heat oil in a 6 litre / ...,,Chicken Pho – called Pho Ga in Vietnamese – is...
10,Vietnamese Rice Paper Rolls,7 – 14 sheets of 22cm/8.5″ round rice paper (N...,Peanut Sauce: Combine the Peanut Dipping Sauce...,,If I took a platter of these to a gathering wi...
11,Vietnamese Pho recipe,"2 large onions , halved 150g / 5oz ginger , s...",Heat a heavy based skillet over high heat (no ...,,If you’re wondering “What is Pho?” then you’re...


Now that we have added the descriptions of each food, let's now generate tags for each description.

For us to generate the tags for the description, we could implement Topic Modeling unsupervised model to detect groups of similar words in a word context

## Topic Model

Load the libraries

In [22]:
import pandas as pd
import matplotlib.pyplot as plt 
import numpy as np 
import os 
import re 

# Deep Learning packages
from absl import logging 
import tensorflow as tf 
import tensorflow_hub as hub 

# Hugging face
from transformers import pipeline

# Scikit-learn
from sklearn.cluster import KMeans

# NLP
from nltk import word_tokenize
from nltk.corpus import stopwords 
import nltk 



In [23]:
# Download nltk required data
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /Users/kenlam/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/kenlam/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

Let's load a pretrained deep learning model that will be used as a USE encoder to embed our description 

In [24]:
# Universal sentence encoder (from Google)
USE_encoder = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")

# Encoder function
def embed(input):
    return np.array(USE_encoder(input))

2024-03-19 11:45:03.143321: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


In addition to our encoder, let's load in a pre-trained sentiment model from Hugging Face

In [28]:
sentiment_model = pipeline("sentiment-analysis")

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


Although the model returns whether the sentiment is POS or NEG, the actual value is always positive so let's create a function that converts a NEG labeled sentiment sentence as negative.

In [34]:
def sentiment_sign_conversion(sentiment):
    result = sentiment_model(sentiment)
    sign = 1 if (result[0]['label'] == "POSITIVE") else -1
    value = result[0]['score']
    return sign * value 


Using our recipes_df, we're only going to look at the description column to generate our tags

In [70]:
description_df = recipes_df2['Description']

Now we're going to create a class to prepare our df before using it to our model.

In [83]:
class TopicModel:

    STOPWORDS = stopwords.words('english')

    EMBEDDING_DIM = 512

    # Constructor
    def __init__(self, df):
        self.X = self.embed_df(df)
    
    # Create an embedded df
    def embed_df(self, df):
        X = embed(df) # embed our df as an np matrix
        X = pd.DataFrame(X) # create a df to store our matrix with the cols being from 1 to 512
        X.index = df # the index of the df is the description itself
        return X
    
    # Method to determine the number of topics to generate using kmeans cluster number via an elbow plot
    def elbow_plot(self):
        max_cluster = min(len(self.X), 80) # ensure max_clusers doesn't exceed number of samples
        cluster_sizes = list(range(1, max_cluster))
        cluster_scores = []
        for n in cluster_sizes:
            kmeans = KMeans(n_clusters=n)
            kmeans.fit(self.X)
            cluster_scores.append(kmeans.inertia_)
        plt.figure()
        plt.plot(cluster_sizes, cluster_scores)
        plt.show()


    # Cosine similarity
    def consine_similarity(self, x, y):
        # put x,y into appropriate dimension
        x = x.reshape(self.EMBEDDING_DIM)
        y = y.reshape(self.EMBEDDING_DIM)
        dotproduct = x.dot(y)
        x_mag = x.dot(x)**0.5
        y_mag = y.dot(y)**0.5
        # return closer to 1 if x and y are similar and 0 if they are different
        return dotproduct / (x_mag * y_mag)
        
    # Get closest word to the centroid
    def get_closest_words(self, topic_list, centroid):
        word_distances = {}
        for r in topic_list:
            # tokenize the review if the word isn't in the STOPWORDS
            review_words = [w for w in word_tokenize(r) if (w not in self.STOPWORDS)]
            for w in review_words:
                word_embedding = embed([w]) # return a 512 dimensional vector for the word 'w'
                # how similar is the word embedding to the topic's centroid
                word_distances[w] = self.consine_similarity(word_embedding, centroid)
                # if the cosine similarity gives 1, that indicates a match and any value closer to 0 means different from the topic
        top_5_keywords = sorted([(word_distances[w], w) for w in word_distances])[-5:] # get the last five in the list
        return ",".join([x[1] for x in top_5_keywords]) # return a list of keywords
        

    # Method to extract topics from text data
    def create_topics(self, num_topics):
        # Cluster the reviews
        kmeans = KMeans(n_clusters=num_topics)
        kmeans.fit(self.X)

        # Create the final topics df
        topics_df = self.X.copy()
        topics_df['topic'] = kmeans.labels_.copy() # once you fit the kmeans, you get a numeric label
        topics_df['topic'] = topics_df['topic'].astype(int)

        # Create summary keywords per topic
        topic_keywords = {}
        for topic in topics_df['topic'].unique():
            topic_list = topics_df.query("topic == {}".format(topic)).index.tolist()
            topic_centroid = kmeans.cluster_centers_[topic] # topic is the cluser id from the kmeans and want to get the centroid to see how close the topic word is close to the topic centroid
            # Get the closest word in the topic_list that's the closest to the topic centroid
            topic_keywords[topic] = self.get_closest_words(topic_list, topic_centroid)

        topics_df['topic_keywords'] = topics_df['topic'].map(topic_keywords) # use map to map the string of keywords using the topic key

        # Score the sentiment of each review
        topics_df['sentiment'] = [sentiment_sign_conversion(r) for r in topics_df.index.values.tolist()]

 
        # Final result
        self.topics_keywords = topic_keywords
        self.topics_df = topics_df.copy()[['topic', 'topic_keywords', 'sentiment']] # we only want the topic column and not the embedding column

Initialize model

In [84]:
topic_model = TopicModel(description_df)

In [None]:
topic_model.X

Let's look at our elbow plot

In [None]:
topic_model.elbow_plot()

Because we have a relatively small samples (17 recipes), we couldn't really create a normal elbow plot but based on this plot, we can see that after 8 topics, our line decreases from exponentially to linearly.

Create final moel and evaluate

In [85]:
topic_model.create_topics(num_topics=8)

  super()._check_params_vs_input(X, default_n_init=10)


In [86]:
topic_model.topics_df

Unnamed: 0_level_0,topic,topic_keywords,sentiment
Description,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"These lettuce wraps are not strictly Vietnamese authentic (as far as I know), but they certainly embrace all that we love about Vietnamese food – fresh and light yet never boring, using an abundance of crunchy vegetables, herbs and a killer peanut dipping sauce. That’s definitely a signature of Vietnamese food – wickedly good sauces that make everything delicious! Honestly, you could just smother a bowl of otherwise bland boiled vegetables in that sauce and it will make it 100% scoff-able!",4,"Vietnam,sauces,salad,salads,Vietnamese",0.999498
"When you see today’s recipe, you’re going to doubt me. How can a recipe with so few ingredients that’s so fast to make be as good as I promise?? Answer: Because the Vietnamese have been making this for centuries! The caramelisation cooking method used in today’s recipe is a traditional Vietnamese technique. Proteins such as chicken, pork, egg and sometimes vegetables are braised in a simple mixture of sugar and water (or coconut juice), seasoned with fish sauce and flavoured with aromatics such as ginger, garlic and chilli. At first the mixture looks thoroughly unimpressive – watery and foamy. Like this: But just give it a mere 12 minutes, and this is what it looks like: I know, right?? Incredible. We should know better than to doubt the Vietnamese! And here’s a nice close up for you, including the inside – proof of juiciness:",2,"marinade,recipes,Vietnamese,recipe,braised",0.99394
"I find it funny that pho is the dish that’s become the superstar of Vietnamese food when bun thit nuong is tastier to me! I adore the contrast of fresh vegetables and herbs with delicious grilled meats, that it’s light and healthy yet anything but dull. It’s a big bowl of delicious, and I shared the chicken version many years ago (bun ga nuong). And as soon as I cracked the pork version, I shared in immediately (just last Wednesday!). And I’m back today with the noodle bowls recipe that is made using the lemongrass pork – just like you get on the streets of Vietnam! PS The photo below is in my own home. Not the streets of Vietnam!",3,"pork,Saigon,Vietnamese,Banh,pho",0.927161
"I’ve been wanting to recreate the chargrilled lemongrass marinated pork you find “everywhere” on the streets of Vietnam for years – but it always came out dry. Finally figured out the secret to keeping it juicy, so I had to share the recipe immediately! 😂 Across Vietnam, variations of this lemongrass pork are served in various forms. On rice, soups, in rice paper rolls, even in Banh Mi. While all are delicious, my favourite are the popular Vietnamese noodle bowl salads, bun thit nuong. Thin rice noodles, topped with this pork, pickled vegetables, lettuce, a smattering of peanuts, lime and served with nuoc cham, that sauce served with “everything” in Vietnam. I’ve shared the pork noodle bowl as a separate recipe as I wanted to publish the marinated pork separately because I think it’s worthy of your attention! Here’s a photo of the pork rice noodle bowls. I actually took this photo in Vietnam – it’s a dish I bought from a street vendor in Saigon and took back to my hotel to take a photo: Oh look! And said vendor in Saigon. 🙂 They cooked the pork on skewers over charcoal. I’m doing a home version – steaks on a stove. So you can make this any night of the week – rain, hail or shine!",3,"pork,Saigon,Vietnamese,Banh,pho",-0.996114
"Banh Mi is a meat filled French baguette sandwich stuffed with Asian flavours. Born from the time of France’s occupation of Vietnam, I imagine the French just couldn’t cope without crusty bread so they introduced baguettes to the Vietnamese and the Vietnamese filled it with their meats and herbs and voila! The now-world-famous Banh Mi was born. While Vietnamese locals love the classic mystery-meat-pork-cold-cuts version, there are all sorts of other varieties across Vietnam and around the world these days. Grilled meats, beef, chicken, meatballs, egg, sausage, BBQ pork, fish, egg, to name a few. Today, I’m sharing a shredded chicken version which is very popular among Sydney locals who remain suspicious of the mystery pink cold-cut slices!! One tip: Stuff generously. The photo is above is how it looks when squished. Look how full it is before I picked it up. ⬇️ The motto here – don’t skimp on fillings!!!",1,"Pork,pork,Vietnam,Banh,Vietnamese",-0.958432
"Fried rice always makes for a great quick meal if you can incorporate enough “stuff” into it to fill it out from a nutrition perspective so you can justify calling it “dinner” rather than just a side dish. Today we’re turning to frozen peas, pre-chopped ham and egg for literal no-effort, no-prep add-ins to make this tasty dish I’ve called Red Vietnamese Fried Rice. In case I wasn’t clear about it in the opening, this is not an authentic Vietnamese recipe! At least, not to my knowledge. It is based on a traditional Vietnamese dish, Red Rice, which is a tomato-flavoured rice served with meats and other mains, somewhat like Mexican Red Rice. I have always loved the flavour but I wanted to turn Vietnamese Red Rice into a more substantial, can-be-a-standalone meal. So I thought I’d spin it into a fried rice dish, and here it is!",0,"meal,dish,Vietnamese,Rice,rice",-0.963759
"While this exact salad is not strictly authentic, the flavours, texture and spirit of the dish I drew heavily from traditional Vietnamese food. All across Vietnam you see these shredded “slaw-style” salads like green papaya salad and banana blossom salad, loaded with fresh herbs, compared to green leafy salads common in Western cuisine. And similarly it is with the Nuoc Cham salad dressing that I’m using here. This is the fish sauce-based lime dressing spiked with finely chopped garlic and chilli that the Vietnamese use for literally everything. And that is no exaggeration! Dipping sauce, drizzling, dressing, sauce for meats, noodle bowls… (See it in action here and here and here). And the most wonderful thing about this salad, like most Vietnamese food? It’s light and fresh and yet SO GOOD it doesn’t even register that it’s actually incredibly healthy!",4,"Vietnam,sauces,salad,salads,Vietnamese",0.998437
"Chicken Pho – called Pho Ga in Vietnamese – is the chicken version of Beef Pho, Vietnam’s most famous food export. The magic of Pho is that while the broth looks completely unassuming, it’s actually full of complex-yet-delicate spice infused flavours. That special something-something that makes it unforgettable, and you just can’t stop eating it. If you’re a Pho fan, you will love this chicken version because it’s easier to make than Beef Pho – no need to hunt down specific bones, just use chicken pieces! This magical Pho soup broth is made the traditional Vietnamese way, using chicken pieces and infused with spices. Using store bought stock just isn’t the same!",5,"noodle,Soups,soup,broth,Pho",0.999039
"If I took a platter of these to a gathering with my friends, I guarantee they’d be one of the first things to go. Everybody I know loves these. Even the hardest of hard-core carnivores munch these down as enthusiastically as they would a rack of ribs. They truly are that good. Vietnamese food is my idea of the ultimate “accidently healthy” food. Sure, there are a handful of deep fried recipes. But generally, most Vietnamese dishes are super fresh, full of bright flavours, loaded with herbs and salads, with just a bit of protein. Dressings and sauces are refreshingly light and devoid of oil, unlike basically every Western dressing! I think that Vietnamese Rice Paper Rolls are one of those things that people love but always assume are just too fiddly or too hard to make. To dispel of that myth, let me tell you – I am not into fiddly. That’s why you’ll never see fancy decorated cakes on my blog. I simply don’t have the patience or co-ordination for fiddly dishes – sweet or savoury. I actually posted this recipe way back when I started my blog. A couple of years on, and my photos have somewhat improved but more importantly – VIDEO! I REALLY wanted to remake this with a video, it is so great to be able to demonstrate how to make these rolls.",2,"marinade,recipes,Vietnamese,recipe,braised",0.995722
"If you’re wondering “What is Pho?” then you’re probably also wondering “Why is she so bonkers over it???” I don’t blame you. It looks like a relatively harmless bowl of beef noodle soup. That is, until you take your first slurp. The Pho soup broth is everything. It’s light yet full of flavour, deceptively beefy, savoury, complex, has the tiniest hint of richness and is filled with beautiful spices like cinnamon. It is, without question, one of The Best Soups in the whole world!",5,"noodle,Soups,soup,broth,Pho",0.998697


Now that we have our topics generated, let's merge it with our df

In [87]:
recipes_df3 = recipes_df2.copy()
recipes_df3 = pd.merge(recipes_df3, topic_model.topics_df, on= 'Description', how='left')

In [88]:
recipes_df3.head()

Unnamed: 0,Title,Ingredients,Instructions,Tags,Description,topic,topic_keywords,sentiment
0,Vietnamese Lettuce Wraps with Peanut Sauce,300g / 10 oz peeled whole cooked prawns/shrimp...,"Pickle first – Put the boiling water, salt and...",,These lettuce wraps are not strictly Vietnames...,4,"Vietnam,sauces,salad,salads,Vietnamese",0.999498
1,Vietnamese Caramel Ginger Chicken,"1 kg / 2 lb skinless chicken thigh fillets , c...","Toss chicken with fish sauce and chilli, then ...",,"When you see today’s recipe, you’re going to d...",2,"marinade,recipes,Vietnamese,recipe,braised",0.99394
2,Vietnamese lemongrass pork noodle bowls (bun t...,1 batch lemongrass marinated pork (it’s marina...,"Pickle – In a large bowl, dissolve the salt an...",,I find it funny that pho is the dish that’s be...,3,"pork,Saigon,Vietnamese,Banh,pho",0.927161
3,Vietnamese lemongrass pork steaks,"500g/1 lb pork shoulder , skinless and boneles...","Cut pork – Cut into 8 equal, thinnish slices o...",,I’ve been wanting to recreate the chargrilled ...,3,"pork,Saigon,Vietnamese,Banh,pho",-0.996114
4,Chicken Banh Mi (Vietnamese sandwich),"2 medium carrots , peeled cut into 2-3mm / 1/1...","Pickle – In a large bowl, dissolve the salt an...",,Banh Mi is a meat filled French baguette sandw...,1,"Pork,pork,Vietnam,Banh,Vietnamese",-0.958432


In [90]:
# Drop tag column
recipes_df4 = recipes_df3.copy()
recipes_df4.drop(['Tags'], axis=1)

Unnamed: 0,Title,Ingredients,Instructions,Description,topic,topic_keywords,sentiment
0,Vietnamese Lettuce Wraps with Peanut Sauce,300g / 10 oz peeled whole cooked prawns/shrimp...,"Pickle first – Put the boiling water, salt and...",These lettuce wraps are not strictly Vietnames...,4,"Vietnam,sauces,salad,salads,Vietnamese",0.999498
1,Vietnamese Caramel Ginger Chicken,"1 kg / 2 lb skinless chicken thigh fillets , c...","Toss chicken with fish sauce and chilli, then ...","When you see today’s recipe, you’re going to d...",2,"marinade,recipes,Vietnamese,recipe,braised",0.99394
2,Vietnamese lemongrass pork noodle bowls (bun t...,1 batch lemongrass marinated pork (it’s marina...,"Pickle – In a large bowl, dissolve the salt an...",I find it funny that pho is the dish that’s be...,3,"pork,Saigon,Vietnamese,Banh,pho",0.927161
3,Vietnamese lemongrass pork steaks,"500g/1 lb pork shoulder , skinless and boneles...","Cut pork – Cut into 8 equal, thinnish slices o...",I’ve been wanting to recreate the chargrilled ...,3,"pork,Saigon,Vietnamese,Banh,pho",-0.996114
4,Chicken Banh Mi (Vietnamese sandwich),"2 medium carrots , peeled cut into 2-3mm / 1/1...","Pickle – In a large bowl, dissolve the salt an...",Banh Mi is a meat filled French baguette sandw...,1,"Pork,pork,Vietnam,Banh,Vietnamese",-0.958432
5,Red Vietnamese Fried Rice,"30g / 2 tbsp unsalted butter 3 garlic cloves ,...",Melt most of the butter in a large non-stick s...,Fried rice always makes for a great quick meal...,0,"meal,dish,Vietnamese,Rice,rice",-0.963759
6,Vietnamese Chicken Salad,"350g/12oz cooked chicken , cut into thin baton...",Dressing: Shake Dressing ingredients in a jar....,While this exact salad is not strictly authent...,4,"Vietnam,sauces,salad,salads,Vietnamese",0.998437
7,Vietnamese Chicken Pho soup (Pho Ga),"1 tbsp oil , vegetable or canola (or other pla...",Char onion & ginger - Heat oil in a 6 litre / ...,Chicken Pho – called Pho Ga in Vietnamese – is...,5,"noodle,Soups,soup,broth,Pho",0.999039
8,Vietnamese Rice Paper Rolls,7 – 14 sheets of 22cm/8.5″ round rice paper (N...,Peanut Sauce: Combine the Peanut Dipping Sauce...,If I took a platter of these to a gathering wi...,2,"marinade,recipes,Vietnamese,recipe,braised",0.995722
9,Vietnamese Pho recipe,"2 large onions , halved 150g / 5oz ginger , s...",Heat a heavy based skillet over high heat (no ...,If you’re wondering “What is Pho?” then you’re...,5,"noodle,Soups,soup,broth,Pho",0.998697


In [91]:
# Rename the topic_keywords into Tags
recipes_df4.rename(columns={'topic_keywords': 'Tags'}, inplace=True)

In [92]:
recipes_df4

Unnamed: 0,Title,Ingredients,Instructions,Tags,Description,topic,Tags.1,sentiment
0,Vietnamese Lettuce Wraps with Peanut Sauce,300g / 10 oz peeled whole cooked prawns/shrimp...,"Pickle first – Put the boiling water, salt and...",,These lettuce wraps are not strictly Vietnames...,4,"Vietnam,sauces,salad,salads,Vietnamese",0.999498
1,Vietnamese Caramel Ginger Chicken,"1 kg / 2 lb skinless chicken thigh fillets , c...","Toss chicken with fish sauce and chilli, then ...",,"When you see today’s recipe, you’re going to d...",2,"marinade,recipes,Vietnamese,recipe,braised",0.99394
2,Vietnamese lemongrass pork noodle bowls (bun t...,1 batch lemongrass marinated pork (it’s marina...,"Pickle – In a large bowl, dissolve the salt an...",,I find it funny that pho is the dish that’s be...,3,"pork,Saigon,Vietnamese,Banh,pho",0.927161
3,Vietnamese lemongrass pork steaks,"500g/1 lb pork shoulder , skinless and boneles...","Cut pork – Cut into 8 equal, thinnish slices o...",,I’ve been wanting to recreate the chargrilled ...,3,"pork,Saigon,Vietnamese,Banh,pho",-0.996114
4,Chicken Banh Mi (Vietnamese sandwich),"2 medium carrots , peeled cut into 2-3mm / 1/1...","Pickle – In a large bowl, dissolve the salt an...",,Banh Mi is a meat filled French baguette sandw...,1,"Pork,pork,Vietnam,Banh,Vietnamese",-0.958432
5,Red Vietnamese Fried Rice,"30g / 2 tbsp unsalted butter 3 garlic cloves ,...",Melt most of the butter in a large non-stick s...,,Fried rice always makes for a great quick meal...,0,"meal,dish,Vietnamese,Rice,rice",-0.963759
6,Vietnamese Chicken Salad,"350g/12oz cooked chicken , cut into thin baton...",Dressing: Shake Dressing ingredients in a jar....,,While this exact salad is not strictly authent...,4,"Vietnam,sauces,salad,salads,Vietnamese",0.998437
7,Vietnamese Chicken Pho soup (Pho Ga),"1 tbsp oil , vegetable or canola (or other pla...",Char onion & ginger - Heat oil in a 6 litre / ...,,Chicken Pho – called Pho Ga in Vietnamese – is...,5,"noodle,Soups,soup,broth,Pho",0.999039
8,Vietnamese Rice Paper Rolls,7 – 14 sheets of 22cm/8.5″ round rice paper (N...,Peanut Sauce: Combine the Peanut Dipping Sauce...,,If I took a platter of these to a gathering wi...,2,"marinade,recipes,Vietnamese,recipe,braised",0.995722
9,Vietnamese Pho recipe,"2 large onions , halved 150g / 5oz ginger , s...",Heat a heavy based skillet over high heat (no ...,,If you’re wondering “What is Pho?” then you’re...,5,"noodle,Soups,soup,broth,Pho",0.998697


Yay, we have finally completed our dataframe containing all of the recipes and their information!

Let's move into the last stage: build a NLP that could take a user's request and generate texts to recommend them what food recipes to make that fits their needs!