In [81]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

checkpoint = "openai-community/gpt2-large"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(checkpoint)

prompt = "Hugging face company is"
inputs = tokenizer(prompt, return_tensors = "pt")

outputs = model.generate(**inputs, penalty_alpha = 0.6, top_k=4, max_new_tokens=100)
tokenizer.batch_decode(outputs, skip_special_tokens=True)

  from .autonotebook import tqdm as notebook_tqdm
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


KeyboardInterrupt: 

In [None]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer

tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2-large")
model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2-large")

In [None]:
input_text = "Generate a vegan Italian recipe with the following ingredients: tomatoes, basil, garlic."
input_ids = tokenizer.encode(input_text, return_tensors="pt")

outputs = model.generate(input_ids, max_length=500, num_return_sequences=1)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

# The Best Vietnamese Recipe to fit your vibe!

## Data collection

Let's scrap the data now from a website!

In [None]:
from bs4 import BeautifulSoup
import requests

url = 'https://www.recipetineats.com/category/vietnamese-recipes/'
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')

Now that we have parsed through the website HTML, let's get all of the recipe links using find_all

In [None]:
recipe_links = soup.find_all('a', class_= 'entry-image-link')
# Get the href attribute to store the link
links = []
for recipe in recipe_links:
    links.append(recipe.get('href'))
print(links)

['https://www.recipetineats.com/vietnamese-lettuce-wraps-with-peanut-sauce/', 'https://www.recipetineats.com/vietnamese-caramel-ginger-chicken/', 'https://www.recipetineats.com/vietnamese-pork-noodle-bowls/', 'https://www.recipetineats.com/vietnamese-lemongrass-pork-steaks/', 'https://www.recipetineats.com/chicken-banh-mi-vietnamese-sandwich/', 'https://www.recipetineats.com/red-vietnamese-fried-rice/', 'https://www.recipetineats.com/vietnamese-shaking-beef/', 'https://www.recipetineats.com/vietnamese-baked-chicken/', 'https://www.recipetineats.com/vietnamese-chicken-salad/', 'https://www.recipetineats.com/vietnamese-chicken-pho-soup-pho-ga/', 'https://www.recipetineats.com/vietnamese-rice-paper-rolls-spring-rolls/', 'https://www.recipetineats.com/vietnamese-pho-recipe/', 'https://www.recipetineats.com/vietnamese-top-10-best-street-food-ho-chi-minh-city/', 'https://www.recipetineats.com/vietnamese-chicken-noodle-bowl/', 'https://www.recipetineats.com/vietnamese-caramelised-pork-bowls/'

After getting all of the links for all of the recipe, let's iterate through each recipe title to see if it works

In [None]:
from urllib.parse import urljoin

for link in links:
    recipe_page = requests.get(link)

    recipe_soup = BeautifulSoup(recipe_page.content, 'html.parser')

    # Extract the recipe's title
    recipe_title = recipe_soup.find('h1').get_text()
    print(f'Recipe Title: {recipe_title}')

Recipe Title: Vietnamese Lettuce Wraps with Peanut Sauce
Recipe Title: Vietnamese Caramel Ginger Chicken
Recipe Title: Vietnamese lemongrass pork noodle bowls (bun thit nuong)
Recipe Title: Vietnamese lemongrass pork steaks
Recipe Title: Chicken Banh Mi (Vietnamese sandwich)
Recipe Title: Red Vietnamese Fried Rice
Recipe Title: Vietnamese Shaking Beef
Recipe Title: Vietnamese Baked Chicken
Recipe Title: Vietnamese Chicken Salad
Recipe Title: Vietnamese Chicken Pho soup (Pho Ga)
Recipe Title: Vietnamese Rice Paper Rolls
Recipe Title: Vietnamese Pho recipe
Recipe Title: {Pilot Travel Video!!} Top 10 BEST Street Food in Vietnam – Ho Chi Minh City
Recipe Title: Vietnamese Noodles with Lemongrass Chicken
Recipe Title: Vietnamese Caramelised Pork Bowls
Recipe Title: Banh Mi ! (Vietnamese sandwich)
Recipe Title: Pork Meatballs for Banh Mi
Recipe Title: Caramelised Vietnamese Shredded Beef
Recipe Title: Bun Cha (Vietnamese Meatballs!)
Recipe Title: Vietnamese Coconut Caramel Chicken


Now that we're able to iterate through each of the link, we can start create a pipeline to extracts the necessary information (ingredients, instructions, and title)

In [None]:
def extract_info(div_class, ul_class, extra_char_to_strip, soup):
    divs = soup.find_all('div', class_= div_class)
    list_of_info = []
    for div in divs:
        ul = div.find_next('ul', class_=ul_class)
        for li in ul.find_all('li'):
            list_of_info.append(li.get_text().strip(extra_char_to_strip))
    return list_of_info

In [None]:
from urllib.parse import urljoin
import pandas as pd
# Create a dataframe storing all of the vietnamese recipes
recipes_df = pd.DataFrame(columns=['Title', 'Ingredients', 'Instructions', 'Tags'])
for link in links:
    recipe_page = requests.get(link)

    recipe_soup = BeautifulSoup(recipe_page.content, 'html.parser')

    # Extract the recipe's title and add to df
    recipe_title = recipe_soup.find('h1').get_text()

    new_row = pd.DataFrame({'Title': [recipe_title], 'Ingredients': [extract_info('wprm-recipe-ingredient-group', 'wprm-recipe-ingredients','▢ ', recipe_soup)], 'Instructions': [extract_info('wprm-recipe-instruction-group', 'wprm-recipe-instructions','▢ ', recipe_soup)]})
    recipes_df = pd.concat([recipes_df, new_row], ignore_index=True)

In [None]:
recipes_df.head()

Unnamed: 0,Title,Ingredients,Instructions,Tags
0,Vietnamese Lettuce Wraps with Peanut Sauce,[300g / 10 oz peeled whole cooked prawns/shrim...,"[Pickle first – Put the boiling water, salt an...",
1,Vietnamese Caramel Ginger Chicken,"[1 kg / 2 lb skinless chicken thigh fillets , ...","[Toss chicken with fish sauce and chilli, then...",
2,Vietnamese lemongrass pork noodle bowls (bun t...,[1 batch lemongrass marinated pork (it’s marin...,"[Pickle – In a large bowl, dissolve the salt a...",
3,Vietnamese lemongrass pork steaks,"[500g/1 lb pork shoulder , skinless and bonele...","[Cut pork – Cut into 8 equal, thinnish slices ...",
4,Chicken Banh Mi (Vietnamese sandwich),"[2 medium carrots , peeled cut into 2-3mm / 1/...","[Pickle – In a large bowl, dissolve the salt a...",


Our df is almost done! Now, we just need to append tags for each of the food's description. To do so, we'll use NLP's library called spacy and download its pre-trained model. We'll use this model to find tags and filter out unecessary words.

In [None]:
import spacy

# Load the English NLP model
nlp = spacy.load("en_core_web_sm")

# The food description
text = ("I find it funny that pho is the dish that’s become the superstar of Vietnamese food when bun thit nuong is tastier to me! "
        "I adore the contrast of fresh vegetables and herbs with delicious grilled meats, that it’s light and healthy yet anything but dull. "
        "It’s a big bowl of delicious, and I shared the chicken version many years ago (bun ga nuong). "
        "And as soon as I cracked the pork version, I shared in immediately (just last Wednesday!). "
        "And I’m back today with the noodle bowls recipe that is made using the lemongrass pork – just like you get on the streets of Vietnam!")


# Process the text
doc = nlp(text)

# Extract tags with filtering
filtered_tags = [token.text for token in doc 
                 if token.pos_ not in ["PRON", "VERB","AUX", "ADP","SYM","PREP"]
                 and not token.is_stop
                 and not token.is_punct]

# Print extracted tags
print(filtered_tags)

['funny', 'pho', 'dish', 'superstar', 'Vietnamese', 'food', 'bun', 'nuong', 'tastier', 'contrast', 'fresh', 'vegetables', 'herbs', 'delicious', 'meats', 'light', 'healthy', 'dull', 'big', 'bowl', 'delicious', 'chicken', 'version', 'years', 'ago', 'bun', 'ga', 'nuong', 'soon', 'pork', 'version', 'immediately', 'Wednesday', 'today', 'noodle', 'bowls', 'recipe', 'lemongrass', 'pork', 'like', 'streets', 'Vietnam']


Based on this filtering, we can see that the tags found from this description are good. Let's add a column into the df called food description and apply this pipeline to append to the tags column.

In [None]:
# Add an empty column calle Description
recipes_df["Description"] = None

In [None]:
from bs4 import BeautifulSoup
import requests 
recipes_df = pd.DataFrame(columns=['Title', 'Ingredients', 'Instructions', 'Tags', 'Description'])
for link in links:
    recipe_page = requests.get(link)

    recipe_soup = BeautifulSoup(recipe_page.content, 'html.parser')

    recipe_title = recipe_soup.find('h1').get_text()
    print(recipe_title)
    h2_heading = recipe_soup.find('h2', class_ = ['has-text-align-center wp-block-heading', 'wp-block-heading has-text-align-center', 'text-align: center;'])
    paragraphs_block = []
    if h2_heading:
        for sibling in h2_heading.find_next_siblings():
            if sibling.name == "h2" or sibling.name == "h3":
                break
            if sibling.name == "p":
                paragraphs_block.append(sibling.get_text().strip())
    new_row = pd.DataFrame({'Title': [recipe_title], 'Ingredients': [' '.join(extract_info('wprm-recipe-ingredient-group', 'wprm-recipe-ingredients','▢ ', recipe_soup))], 'Instructions': [' '.join(extract_info('wprm-recipe-instruction-group', 'wprm-recipe-instructions','▢ ', recipe_soup))], 'Description': [' '.join(paragraphs_block)]})
    recipes_df = pd.concat([recipes_df, new_row], ignore_index=True)

Vietnamese Lettuce Wraps with Peanut Sauce
Vietnamese Caramel Ginger Chicken
Vietnamese lemongrass pork noodle bowls (bun thit nuong)
Vietnamese lemongrass pork steaks
Chicken Banh Mi (Vietnamese sandwich)
Red Vietnamese Fried Rice
Vietnamese Shaking Beef
Vietnamese Baked Chicken
Vietnamese Chicken Salad
Vietnamese Chicken Pho soup (Pho Ga)
Vietnamese Rice Paper Rolls
Vietnamese Pho recipe
{Pilot Travel Video!!} Top 10 BEST Street Food in Vietnam – Ho Chi Minh City
Vietnamese Noodles with Lemongrass Chicken
Vietnamese Caramelised Pork Bowls
Banh Mi ! (Vietnamese sandwich)
Pork Meatballs for Banh Mi
Caramelised Vietnamese Shredded Beef
Bun Cha (Vietnamese Meatballs!)
Vietnamese Coconut Caramel Chicken


In [None]:
# Remove the row containing the Top 10 Street food in Vietnam.
recipes_df1 = recipes_df.copy()
recipes_df1 = recipes_df1.drop([6, 7, 12])


In [83]:
recipes_df1

Unnamed: 0,Title,Ingredients,Instructions,Tags,Description
0,Vietnamese Lettuce Wraps with Peanut Sauce,300g / 10 oz peeled whole cooked prawns/shrimp...,"Pickle first – Put the boiling water, salt and...",,These lettuce wraps are not strictly Vietnames...
1,Vietnamese Caramel Ginger Chicken,"1 kg / 2 lb skinless chicken thigh fillets , c...","Toss chicken with fish sauce and chilli, then ...",,"When you see today’s recipe, you’re going to d..."
2,Vietnamese lemongrass pork noodle bowls (bun t...,1 batch lemongrass marinated pork (it’s marina...,"Pickle – In a large bowl, dissolve the salt an...",,I find it funny that pho is the dish that’s be...
3,Vietnamese lemongrass pork steaks,"500g/1 lb pork shoulder , skinless and boneles...","Cut pork – Cut into 8 equal, thinnish slices o...",,I’ve been wanting to recreate the chargrilled ...
4,Chicken Banh Mi (Vietnamese sandwich),"2 medium carrots , peeled cut into 2-3mm / 1/1...","Pickle – In a large bowl, dissolve the salt an...",,Banh Mi is a meat filled French baguette sandw...
5,Red Vietnamese Fried Rice,"30g / 2 tbsp unsalted butter 3 garlic cloves ,...",Melt most of the butter in a large non-stick s...,,Fried rice always makes for a great quick meal...
8,Vietnamese Chicken Salad,"350g/12oz cooked chicken , cut into thin baton...",Dressing: Shake Dressing ingredients in a jar....,,While this exact salad is not strictly authent...
9,Vietnamese Chicken Pho soup (Pho Ga),"1 tbsp oil , vegetable or canola (or other pla...",Char onion & ginger - Heat oil in a 6 litre / ...,,
10,Vietnamese Rice Paper Rolls,7 – 14 sheets of 22cm/8.5″ round rice paper (N...,Peanut Sauce: Combine the Peanut Dipping Sauce...,,If I took a platter of these to a gathering wi...
11,Vietnamese Pho recipe,"2 large onions , halved 150g / 5oz ginger , s...",Heat a heavy based skillet over high heat (no ...,,


We noticed that for some title like Vietnamese Chicken Pho, we're missing some information so let's complete our dataframe before we generate the tags.

Pho Ga

In [84]:
recipes_df.iloc[9]

Title                        Vietnamese Chicken Pho soup (Pho Ga)
Ingredients     1 tbsp oil , vegetable or canola (or other pla...
Instructions    Char onion & ginger - Heat oil in a 6 litre / ...
Tags                                                          NaN
Description                                                      
Name: 9, dtype: object

In [85]:
def html_parser(url):
    request = requests.get(url)
    page = BeautifulSoup(request.content, 'html.parser')
    return page

In [None]:
def get_description(url, title, num_p, df, row_index, column_name):
    page = html_parser(url)
    h2s = page.find_all('h2')
    for h2 in h2s:
        if h2.get_text() == title:
            heading = h2

    # Get the paragraph after the h2
    description_blocks = heading.find_all_next('p', limit=num_p)
    description = ' '.join(block.get_text() for block in description_blocks)
    df.loc[row_index, column_name] = description

In [86]:
recipes_df2 = recipes_df1.copy()
get_description('https://www.recipetineats.com/vietnamese-chicken-pho-soup-pho-ga/', "Chicken Pho soup", 4, recipes_df2, 9, "Description")

Pho

In [88]:
get_description('https://www.recipetineats.com/vietnamese-pho-recipe/', "What is Pho?", 5, recipes_df2,11,"Description")

Vietnamese Lemon Grass Chicken

In [101]:
lemon_grass_chicken_page = html_parser('https://www.recipetineats.com/vietnamese-chicken-noodle-bowl/')
h2s = lemon_grass_chicken_page.find_all('h2',limit=2)
for h2 in h2s:
    if h2.get_text() == "Vietnamese Noodles with Lemongrass Chicken":
        heading = h2
description_blocks = heading.find_all_next('p', limit=9)
description = ' '.join(block.get_text() for block in description_blocks)
recipes_df2.loc[13, "Description"] = description

<h2 style="text-align: center;">Vietnamese Noodles with Lemongrass Chicken</h2>


Banh Mi

In [105]:
banh_mi_page = html_parser('https://www.recipetineats.com/banh-mi-vietnamese-sandwich/')
h2s = banh_mi_page.find_all('h2',limit=2)
for h2 in h2s:
    if h2.get_text() == "What is Banh Mi?":
        heading = h2
description_blocks = heading.find_all_next('p', limit=2)
description = ' '.join(block.get_text() for block in description_blocks)
recipes_df2.loc[15, "Description"] = description

Caramelized Pork Bowls

In [113]:
pork_page = html_parser('https://www.recipetineats.com/vietnamese-caramelised-pork-bowls/')
paragraph_description = pork_page.find_all('p', limit=5)
paragraph_description = paragraph_description[2].get_text()
print(paragraph_description)

recipes_df2.loc[17, "Description"] = paragraph_description

Here’s a super fast pork stir fry made with ground pork infused with flavours from the streets of Vietnam. With just a handful of ingredients you probably already have, it’s sweet, salty, beautifully caramelised and absolutely irresistible. It’s the quick and easy version of Vietnamese Caramel Pork, a famous Vietnamese food speciality!


Vietnamese Coconut Caramel Chicken

In [130]:
chicken_page = html_parser('https://www.recipetineats.com/vietnamese-coconut-caramel-chicken/')
paragraph_description = chicken_page.find_all('p', limit=7)
description = (paragraph_description[2].get_text() + paragraph_description[3].get_text() + paragraph_description[5].get_text() + paragraph_description[6].get_text())

recipes_df2.loc[19, "Description"] = description

In [131]:
recipes_df2

Unnamed: 0,Title,Ingredients,Instructions,Tags,Description
0,Vietnamese Lettuce Wraps with Peanut Sauce,300g / 10 oz peeled whole cooked prawns/shrimp...,"Pickle first – Put the boiling water, salt and...",,These lettuce wraps are not strictly Vietnames...
1,Vietnamese Caramel Ginger Chicken,"1 kg / 2 lb skinless chicken thigh fillets , c...","Toss chicken with fish sauce and chilli, then ...",,"When you see today’s recipe, you’re going to d..."
2,Vietnamese lemongrass pork noodle bowls (bun t...,1 batch lemongrass marinated pork (it’s marina...,"Pickle – In a large bowl, dissolve the salt an...",,I find it funny that pho is the dish that’s be...
3,Vietnamese lemongrass pork steaks,"500g/1 lb pork shoulder , skinless and boneles...","Cut pork – Cut into 8 equal, thinnish slices o...",,I’ve been wanting to recreate the chargrilled ...
4,Chicken Banh Mi (Vietnamese sandwich),"2 medium carrots , peeled cut into 2-3mm / 1/1...","Pickle – In a large bowl, dissolve the salt an...",,Banh Mi is a meat filled French baguette sandw...
5,Red Vietnamese Fried Rice,"30g / 2 tbsp unsalted butter 3 garlic cloves ,...",Melt most of the butter in a large non-stick s...,,Fried rice always makes for a great quick meal...
8,Vietnamese Chicken Salad,"350g/12oz cooked chicken , cut into thin baton...",Dressing: Shake Dressing ingredients in a jar....,,While this exact salad is not strictly authent...
9,Vietnamese Chicken Pho soup (Pho Ga),"1 tbsp oil , vegetable or canola (or other pla...",Char onion & ginger - Heat oil in a 6 litre / ...,,Chicken Pho – called Pho Ga in Vietnamese – is...
10,Vietnamese Rice Paper Rolls,7 – 14 sheets of 22cm/8.5″ round rice paper (N...,Peanut Sauce: Combine the Peanut Dipping Sauce...,,If I took a platter of these to a gathering wi...
11,Vietnamese Pho recipe,"2 large onions , halved 150g / 5oz ginger , s...",Heat a heavy based skillet over high heat (no ...,,If you’re wondering “What is Pho?” then you’re...


Now that we have added the descriptions of each food, let's now generate tags for each description.