# # Dependencies and Loading Mistral

In [1]:
!pip install -q -U transformers

In [2]:
!pip install -q -U accelerate
!pip install -q -U bitsandbytes

In [3]:
!pip install -Uqqq pip --progress-bar off


In [4]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
import torch
from transformers import (
    GenerationConfig,
    TextStreamer,
    pipeline,
)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)



In [5]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
import torch

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

In [6]:
model_name = "/kaggle/input/mistral/pytorch/7b-instruct-v0.1-hf/1"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
        model_name,
        load_in_4bit=True,
        quantization_config=bnb_config,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        trust_remote_code=True,
    )

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [7]:
from transformers import (
    GenerationConfig,
    TextStreamer,
    pipeline
)

generation_config = GenerationConfig.from_pretrained(model_name)
generation_config.max_new_tokens = 1024
generation_config.temperature = 0.0001
generation_config.do_sample = True

In [8]:
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

In [9]:
llm = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,
    generation_config=generation_config,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    streamer=streamer,
    device_map="auto", 
    torch_dtype=torch.bfloat16, 
)
     

In [10]:
prompt = "[INST] Give one named recipe with steps using ingredients: egg whites, new potatoes, kosher salt, finely ground black pepper, rosemary, thyme, parsley. Give ingredients, instructions, source url. [/INST]"
result = llm(prompt)

Recipe: Roasted New Potatoes with Rosemary, Thyme, and Parsley

Ingredients:

* 2 cups new potatoes, washed and cut into bite-sized pieces
* 4 egg whites
* 2 tablespoons kosher salt
* 1 teaspoon finely ground black pepper
* 2 tablespoons chopped fresh rosemary
* 2 tablespoons chopped fresh thyme
* 2 tablespoons chopped fresh parsley
* 2 tablespoons olive oil

Instructions:

1. Preheat the oven to 400°F (200°C).
2. In a large bowl, combine the new potatoes, egg whites, kosher salt, black pepper, rosemary, thyme, parsley, and olive oil. Toss to coat the potatoes evenly.
3. Spread the potatoes out in a single layer on a baking sheet lined with parchment paper.
4. Roast in the preheated oven for 20-25 minutes, or until the potatoes are tender and golden brown.
5. Serve hot and enjoy!

Source URL: <https://www.foodnetwork.com/recipes/alton-brown/roasted-new-potatoes-with-rosemary-thyme-and-parsley-recipe-1941155>


In [11]:
print((result[0]['generated_text']))

[INST] Give one named recipe with steps using ingredients: egg whites, new potatoes, kosher salt, finely ground black pepper, rosemary, thyme, parsley. Give ingredients, instructions, source url. [/INST] Recipe: Roasted New Potatoes with Rosemary, Thyme, and Parsley

Ingredients:

* 2 cups new potatoes, washed and cut into bite-sized pieces
* 4 egg whites
* 2 tablespoons kosher salt
* 1 teaspoon finely ground black pepper
* 2 tablespoons chopped fresh rosemary
* 2 tablespoons chopped fresh thyme
* 2 tablespoons chopped fresh parsley
* 2 tablespoons olive oil

Instructions:

1. Preheat the oven to 400°F (200°C).
2. In a large bowl, combine the new potatoes, egg whites, kosher salt, black pepper, rosemary, thyme, parsley, and olive oil. Toss to coat the potatoes evenly.
3. Spread the potatoes out in a single layer on a baking sheet lined with parchment paper.
4. Roast in the preheated oven for 20-25 minutes, or until the potatoes are tender and golden brown.
5. Serve hot and enjoy!

Sour

In [12]:
import re

# Input string
input_string = (result[0]['generated_text'])
# Define regular expressions
recipe_name_pattern = re.compile(r"Recipe: (.+)")
ingredients_pattern = re.compile(r"Ingredients:(.+?)Instructions:", re.DOTALL)
instructions_pattern = re.compile(r"Instructions:(.+?)Source", re.DOTALL)
source_url_pattern = re.compile(r"Source URL: <(.+)>")

# Extract information
recipe_name_match = recipe_name_pattern.search(input_string)
ingredients_match = ingredients_pattern.search(input_string)
instructions_match = instructions_pattern.search(input_string)
source_url_match = source_url_pattern.search(input_string)

In [13]:
if recipe_name_match:
    print("Recipe Name:", recipe_name_match.group(1).strip())

if ingredients_match:
    print("Ingredients:", ingredients_match.group(1).strip())

if instructions_match:
    print("Instructions:", instructions_match.group(1).strip())

if source_url_match:
    print("Source URL:", source_url_match.group(1).strip())

Recipe Name: Roasted New Potatoes with Rosemary, Thyme, and Parsley
Ingredients: * 2 cups new potatoes, washed and cut into bite-sized pieces
* 4 egg whites
* 2 tablespoons kosher salt
* 1 teaspoon finely ground black pepper
* 2 tablespoons chopped fresh rosemary
* 2 tablespoons chopped fresh thyme
* 2 tablespoons chopped fresh parsley
* 2 tablespoons olive oil
Instructions: 1. Preheat the oven to 400°F (200°C).
2. In a large bowl, combine the new potatoes, egg whites, kosher salt, black pepper, rosemary, thyme, parsley, and olive oil. Toss to coat the potatoes evenly.
3. Spread the potatoes out in a single layer on a baking sheet lined with parchment paper.
4. Roast in the preheated oven for 20-25 minutes, or until the potatoes are tender and golden brown.
5. Serve hot and enjoy!
Source URL: https://www.foodnetwork.com/recipes/alton-brown/roasted-new-potatoes-with-rosemary-thyme-and-parsley-recipe-1941155


In [14]:
! pip install bert-score

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting bert-score
  Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: bert-score
Successfully installed bert-score-0.3.13


In [15]:
! pip3 install ingredient_parser_nlp

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting ingredient_parser_nlp
  Downloading ingredient_parser_nlp-0.1.0b7-py3-none-any.whl.metadata (4.3 kB)
Collecting python-crfsuite (from ingredient_parser_nlp)
  Downloading python_crfsuite-0.9.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (993 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m993.5/993.5 kB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Downloading ingredient_parser_nlp-0.1.0b7-py3-none-any.whl (440 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m440.2/440.2 kB[0m [31m27.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: python-crfsuite, ingredient_parser_nlp
Successfully installed ingredient_parser_nlp-0.1.0b7 python-crfsuite-0.9.9


### BERT SCORE 

In [16]:
import torch
from bert_score import score

# Reference and candidate sentences
reference = recipe_name_match.group(1).strip()
candidate = "Crispy Salt and Pepper Potatoes"

# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Calculate BERT score
P, R, F1 = score([reference], [candidate], lang="en", device=device)

# Print the results
print(f"Precision: {P.item():.4f}")
print(f"Recall: {R.item():.4f}")
print(f"F1 score: {F1.item():.4f}")


Downloading config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

Downloading vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Precision: 0.8631
Recall: 0.8799
F1 score: 0.8714


### BLUE SCORE 

In [17]:
 
import re
import pandas as pd

def extract_ingredient_names(ing_list):
    ingredient_names = []

    for parsed_ingredient in ing_list:
        # Extract the text attribute from the IngredientText object
        pattern = re.compile(r"name=IngredientText\(text='([^']+)'")
        matches = pattern.findall(str(parsed_ingredient))
        ingredient_names.extend([match.lower() for match in matches])

    return ingredient_names

# Example usage with a DataFrame column 'parsed_ingredients'
# Assuming sample_df is your DataFrame
    


In [18]:

# for ingredients
from ingredient_parser import parse_multiple_ingredients
import nltk
from nltk.translate.bleu_score import sentence_bleu

# Reference and candidate sentences
reference = instructions_match.group(1).strip().split()
candidate = "egg whites, new potatoes, kosher salt, finely ground black pepper, rosemary, thyme, parsley"

# Tokenize the sentences
candidate_tokenized = candidate.split()
reference_tokenized = [word.lower() for phrase in reference for word in phrase.split()]



# Calculate BLEU score
bleu_score = sentence_bleu(reference, candidate)

# Print the result
print(f"BLEU Score: {bleu_score:.4f}")


BLEU Score: 0.4511


In [19]:
# for recipe

reference = ingredients_match.group(1).strip()
candidate = "Preheat oven to 400°F and line a rimmed baking sheet with parchment. In a large bowl, whisk the egg whites until foamy (there shouldn’t be any liquid whites in the bowl). Add the potatoes and toss until they’re well coated with the egg whites, then transfer to a strainer or colander and let the excess whites drain. Season the potatoes with the salt, pepper, and herbs. Scatter the potatoes on the baking sheet (make sure they’re not touching) and roast until the potatoes are very crispy and tender when poked with a knife, 15 to 20 minutes (depending on the size of the potatoes).\nTransfer to a bowl and serve."
# Tokenize the sentences
reference_tokenized = reference.split()
candidate_tokenized = candidate.split()

# Calculate BLEU score
bleu_score = sentence_bleu(reference_tokenized, candidate_tokenized)

# Print the result
print(f"BLEU Score: {bleu_score:.4f}")

BLEU Score: 0.3088


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


### Verification of Source URL

In [20]:
!pip install validators

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting validators
  Downloading validators-0.22.0-py3-none-any.whl.metadata (4.7 kB)
Downloading validators-0.22.0-py3-none-any.whl (26 kB)
Installing collected packages: validators
Successfully installed validators-0.22.0


# **Prompt Engineering**

## Simple Prompt - 1

In [21]:
lst=[['whole chicken', 'kosher salt', 'acorn squash', 'sage', 'rosemary', 'unsalted butter', 'ground allspice', 'crushed red pepper flakes', 'freshly ground black pepper', 'white bread', 'apples', 'extra-virgin olive oil', 'red onion', 'apple cider vinegar', 'white miso', 'all-purpose flour', 'unsalted butter', 'dry white wine', 'unsalted chicken broth', 'white miso', 'kosher salt', 'freshly ground pepper'],
['egg whites', 'new potatoes', 'kosher salt', 'finely ground black pepper', 'rosemary', 'thyme', 'parsley'],
['evaporated milk', 'whole milk', 'garlic powder', 'onion powder', 'smoked paprika', 'freshly ground black pepper', 'kosher salt', 'extra-sharp cheddar', 'full-fat cream cheese', 'elbow macaroni'],
['round italian', 'olive oil', 'sweet italian sausage', 'unsalted butter', 'onions', 'celery', 'garlic', 'eggs', 'heavy cream', 'turkey giblet stock or reduced-sodium chicken broth', 'parmigiano-reggiano', 'flat-leaf parsley', 'qt shallow ceramic or glass baking dish'],
['dark brown sugar', 'hot water', 'bourbon', 'fresh lemon juice', 'apple butter', 'garnish : orange twist and, or ground cinnamon'],
['chamomile tea bags', 'reposado tequila', 'fresh lemon juice', 'agave nectar'],
['grand marnier', 'amaro averna', 'pat salted butter', 'hot apple cider', 'fresh lemon juice', 'garnish : freshly ground pink peppercorns'],
['granulated sugar', 'ground turmeric', 'amontillado sherry', 'bourbon, scotch', 'turmeric syrup', 'fresh lemon juice', 'garnish : dehydrated lemon wheel'],
['assorted dals', 'white jasmine rice or other long-grain rice', 'pearl barley', 'bone-in lamb stew meat', 'kosher salt', 'fresh ginger', 'shallots', 'ghee or vegetable oil', 'garlic', 'garam masala', 'cayenne pepper', 'ground turmeric', 'green thai chiles', 'cilantro', 'white onion', 'limes'],
['basic lentil soup', 'onion', 'turmeric', 'cumin', 'aleppo pepper or, red pepper flakes', 'tomato paste', 'eggs', 'ghee, unsalted butter', 'whole cumin seeds', 'olive oil', 'kosher salt and freshly ground black pepper', 'parsley'],
['chipotle in adobo sauce', 'garlic', 'kosher salt', 'cornstarch', 'evaporated milk', 'sharp or extra-sharp cheddar cheese', 'cream cheese', 'mayonnaise', 'pimento peppers', 'chipotle pepper purée'],
['bone-in', 'parsnips', 'carrots', 'maple syrup', 'unsweetened apple juice', 'dark brown sugar', 'smooth dijon mustard', 'grainy dijon mustard', 'apple cider vinegar', 'kosher salt'],
['bacon', 'celery', 'carrot', 'onion', 'cinnamon', 'boiling potatoes', 'granny smith apples', 'butternut squash', 'reduced-sodium chicken stock or broth', 'water', 'sour cream'],
['anchovies', 'garlic', 'mayonnaise', 'dijon mustard', 'extra-virgin olive oil', 'freshly ground black pepper', 'whole chicken', 'kosher salt', 'shallots', 'lemons', 'parmesan', 'country-style bread', 'romaine hearts'],
['skinless, boneless chicken thighs', 'kosher salt, freshly ground pepper', 'unsalted butter', 'leeks', 'zest and, lemon', 'long-grain white rice', 'low-sodium chicken broth', 'oil-packed anchovy fillet', 'garlic', 'capers', 'red pepper flakes', 'tender herb leaves', 'extra-virgin olive oil'],
['instant corn masa flour', 'lard, vegetable oil', 'diamond crystal or, morton kosher salt', 'guajillo or dried new mexico chiles', 'morita chiles', 'ground coriander or cumin', 'medium shrimp', 'garlic', 'thyme', 'apple cider vinegar', 'agave syrup or honey', 'unsalted butter', 'onion', 'avocado', 'cilantro leaves', 'and lime wedges'],
['vegetable oil', 'corn tortillas', 'fresh chorizo', 'garlic', 'white onion', 'kosher salt', 'black beans', 'low-sodium chicken broth', 'queso fresco or cotija cheese', 'cilantro leaves, avocado'],
['virgin coconut oil', 'ripe, plátanos manzanos or, ripe (black) plantains', 'kosher salt', 'plain unsweetened coconut milk yogurt or greek yogurt', 'ripe mangoes', 'toasted nuts', 'seeds', 'and / or unsweetened, coconut'],
['skin-on', 'kosher salt', 'potatoes', 'shallots', 'unsalted butter', 'heavy cream', 'garlic', 'freshly ground black pepper', 'or ground nutmeg', 'thyme', 'sage leaves', 'country-style bread', 'lemon'],
['of zest no pith, orange', 'fresh ginger', 'fresh lime juice', 'champagne vinegar', 'sugar', 'kosher salt', 'extra-virgin olive oil', 'thai chile', 'beets', 'extra-virgin olive oil', 'white wine vinegar', 'water', 'kosher salt and freshly ground black pepper', 'extra-virgin olive oil', 'cashews', 'sunchokes', 'vegetable oil', 'kosher salt', 'orange', 'roasted beets', 'extra-virgin olive oil', 'kosher salt', 'pickled orange-ginger puree', 'cashews', 'tarragon', 'dill fronds', 'sunchoke chips'],
['toasted pepitas', 'kale', 'garlic', 'chickpeas', 'ground turmeric', 'coriander (cilantro) leaves', 'sea salt and cracked black pepper', 'pumpkin', 'extra virgin olive oil', 'plain greek-style yogurt and hulled tahini', 'carrots', 'apple cider vinegar', 'honey', 'red chiles', 'coriander (cilantro) leaves'],
['butternut squash', 'extra virgin olive oil', 'pure maple syrup', 'chile flakes', 'sea salt and cracked black pepper', 'almonds', 'quinoa', 'mint leaves', 'wild rocket (arugula) leaves', 'hulled tahini', 'lemon juice', 'water', 'garlic', 'sea salt flakes'],
['ghee', 'paneer', 'confectioners’ sugar', 'ground cardamom', 'semolina', 'ghee', 'whole milk', 'cashews', 'golden raisins', 'granulated sugar', 'freshly whipped cream, almonds'],
['whole milk', 'lemons', 'kosher salt'],
['carrot', 'tomatoes', 'extra virgin olive oil', 'garlic', 'fresh spinach', 'kosher salt', 'ground turkey', 'ground cumin', 'smoked paprika', 'ground black pepper', 'r-rated onions', 'red wine vinegar', 'eggs parmigiano reggiano'],
['yellow or white onions', 'olive or vegetable oil', 'kosher salt'],
['extra-virgin olive oil', 'white miso', 'pure maple syrup or brown sugar', 'regular soy sauce or tamari', 'kabocha squash', 'extra-virgin olive oil', 'ginger', 'garlic', 'low-sodium vegetable broth', 'white miso', 'kosher salt, freshly ground pepper', 'broccolini', 'wavy ramen noodles', 'cilantro leaves with tender stems'],
['coconut oil', 'calabaza pumpkin', 'onion', 'garlic', 'ají dulce pepper', 'dark brown sugar', 'ground cumin', 'coconut milk', 'chicken broth or vegetable broth', 'salt and freshly ground black pepper', 'fresh cilantro'],
['italian eggplants', 'canola oil', 'onion', 'garlic', 'tomato', 'coarse salt', 'freshly ground black pepper', 'cilantro', 'roti'],
['chicken', 'lime juice', 'green seasoning', 'coarse salt', 'scotch bonnet pepper or other hot red chili pepper', 'garlic', 'trinidad curry powder', 'canola oil', 'onion', 'chicken stock or water', 'roti or white rice'],
['cardamom pods', 'coriander seeds', 'cumin seeds', 'mustard seeds', 'fenugreek seeds', 'curry leaves', 'whole black peppercorns', 'whole cloves', 'ground turmeric'],
['garlic', 'onions', 'celery', 'red bell pepper', 'green bell pepper', 'olive oil', 'chicken broth', 'whole tomatoes', 'bay leaf', 'ground cumin', 'salt', 'freshly ground black pepper', 'large shrimp', 'accompaniment, uncooked rice', 'prepared according'],
['fresh chives', 'fresh shado beni or cilantro leaves', 'fresh thyme', 'fresh oregano', 'fresh parsley', 'garlic'],
['dry or 1, pigeon peas, pinto beans', 'long-grain rice', 'canola oil', 'sugar', 'chicken', 'onion', 'garlic', 'coconut milk', 'bay leaf', 'green seasoning', 'parsley', 'thyme', 'carrots', 'scallions', 'kosher salt', 'fresh calabaza or butternut squash', 'whole scotch bonnet pepper', 'ketchup', 'butter'],
['yukon gold potatoes', 'garlic', 'kosher salt', 'whole milk', 'thyme', 'unsalted butter', 'freshly ground black pepper', 'sour cream', 'a potato ricer or food mill'],
['london dry gin', 'cynar', 'fino sherry', 'orange twist'],
['graham cracker crumbs', 'granulated sugar', 'light brown sugar', 'kosher salt', 'unsalted butter', 'cream cheese', 'granulated sugar', 'creole cream cheese', 'vanilla bean paste', 'eggs', 'ingredient info, sour cream mixed with, buttermilk, creole cream cheese.', 'unsalted butter', 'granulated sugar', 'light brown sugar', 'honey', 'granny smith apples', 'lemon zest', 'fresh lemon juice', 'kosher salt', 'ground cinnamon', 'ground cardamom', 'ground ginger', 'vanilla extract'],
['skim milk', 'buttermilk', 'rennet tablet'],
['nonfat dry milk powder', 'baking powder', 'granulated sugar or light brown sugar', 'all-purpose flour', 'vegetable oil', 'vegetable shortening', 'ghee', 'unsalted butter', 'butter'],
['eggs', 'mayonnaise', 'dijon mustard', 'cayenne', 'garnishes : paprika and, fresh chives', 'pastry, star tip'],
['green, unripe mangoes', 'shallots', 'indian or thai green chiles', 'ginger', 'garlic', 'coconut vinegar or white vinegar', 'salt', 'coconut milk', 'coconut oil', 'black mustard seeds', 'dried red chiles', 'curry leaves', 'fried onions', 'basmati rice'],
['piloncillo or, dark brown sugar', 'cardamom pods', 'allspice berries', 'black peppercorns', 'kosher salt', 'orange zest', 'cold-brew coffee', 'unsweetened coconut milk or half-and-half'],
['eggs', 'pumpkin purée', 'milk', 'unsalted butter', 'all-purpose flour', 'cinnamon', 'allspice', 'ground ginger', 'nutmeg', 'kosher salt', 'apple cider or juice', 'maple syrup', 'powdered sugar'],
['chicken legs', 'kosher salt', 'extra-virgin olive oil', 'fennel', 'red flame or green table grapes', 'unsalted chicken broth', 'honey', 'calabrian chile paste or, calabrian chiles', 'red wine vinegar'],
['onion', 'unsalted butter', 'all-purpose flour', 'milk', 'boiling potatoes', 'sharp cheddar', 'dry bread crumbs'],
['skinless salmon fillet', 'greek yogurt', 'garlic granules', 'rose harissa', 'ground turmeric', 'paprika', 'zest of, unwaxed lime and', 'olive oil', 'maldon sea salt flakes and freshly ground black pepper', 'tortilla wraps', 'sliced tomatoes', 'onion', 'coriander leaves', 'greek yogurt'],
['water', 'instant yeast', 'bread flour', 'canola oil', 'morton kosher salt'],
['beets', 'white vinegar', 'boneless pork butt', 'beef, chicken', 'bay leaf', 'whole allspice berries', 'whole black peppercorns', 'carrots', 'celery', 'green cabbage', 'idaho potatoes', 'lima beans', 'salt'],
['skin-on red snapper fillets', 'okra salt or kosher salt', 'alligator pepper or freshly ground black pepper', 'cubeb pepper or ashanti pepper', 'green or medium-ripe papaya', 'cilantro', 'extra-virgin olive oil', 'red onion', 'garlic', 'ginger', 'ground dried bird chiles or other hot chile powder', 'lemon', 'puna, white yams, sweet potatoes', 'coconut sugar or dark brown sugar', 'kosher salt', 'extra-virgin olive oil', 'freshly ground black pepper', 'chile powder', 'drunk apricot shito', 'a spice mill or mortar and pestle'],
['dried apricots', 'dark rum or arrack', 'unsalted butter', 'ground cinnamon', 'coconut sugar or dark brown sugar', 'grains of selim pods', 'cubeb pepper or ashanti pepper', 'peanut oil or vegetable oil', 'white onion', 'red onion', 'garlic', 'ginger', 'red pepper flakes', 'rosemary', 'thyme', 'dried prawn or shrimp powder', 'okra salt or kosher salt', 'tomato purée', 'dried chile powder', 'low-sodium chicken broth', 'scotch bonnet or habanero chile', 'a spice mill or mortar and pestle'],
['dried beans', 'dried kombu', 'tomatoes', 'celery', 'carrot', 'leek', 'shallots', 'garlic', 'oil-packed anchovy fillets', 'extra-virgin olive oil', 'maple syrup', 'kosher salt', 'red pepper flakes', 'whole-grain mustard', 'bacon', 'romano beans or green beans', 'shallots', 'red pepper flakes', 'zest and, lemon', 'garlic', 'pickled green beans', 'cherry tomatoes', 'extra-virgin olive oil', 'sherry vinegar', 'tomato brown butter', 'feta', 'parsley leaves', 'and marjoram leaves'],
['tomato', 'unsalted butter', 'kosher salt', 'freshly ground black pepper'],
['acorn squash', 'unsalted butter', 'pure maple syrup', 'good olive oil', 'kosher salt and freshly ground black pepper', 'flaked sea salt'],
['extra-virgin olive oil', 'serrano chile or jalapeño', 'fresh ginger', 'garlic', 'scallions', 'ground turmeric', 'corn', 'grains', 'unsweetened coconut milk', 'kosher salt', 'store-bought crispy onions or shallots', 'lime wedges'],
['mixed heirloom tomatoes', 'extra-virgin olive oil', 'kosher salt', 'freshly ground black pepper', 'garlic', 'unsalted butter', 'stoned wheat thins or other whole grain crackers', 'eggs', 'taleggio cheese', 'parmesan', 'mayonnaise', 'thyme', 'shallot', 'special equipment, springform pan'],
['scallions', 'ginger', 'garlic', 'coriander seeds', 'cumin seeds', 'black mustard seeds', 'kashmiri chile powder or, cayenne powder', 'ground turmeric', 'extra-virgin olive oil', 'paneer, planks', 'kosher salt', 'cherry tomatoes', 'sugar snap peas', 'sugar', 'mint leaves', 'rice'],
['skinless, boneless chicken breasts', 'kosher salt', 'scallions', 'persian cucumbers', 'sesame seeds', 'fresh lime juice', 'tahini', 'soy sauce', 'honey', 'red pepper flakes', 'romaine or green or red leaf lettuce, little gem lettuce', 'cilantro leaves with tender stems', 'lime'],
['extra-virgin olive oil', 'garlic', 'double-concentrated tomato paste', 'pure maple syrup', 'soy sauce', 'worcestershire sauce', 'unseasoned rice vinegar', 'sriracha or other hot sauce', 'vegetable oil', 'whole chicken', 'kosher salt'],
['egg yolks', 'kefir (cultured milk) or plain whole-milk yogurt', 'dill fronds with tender stems', 'garlic', 'unsalted butter', 'pine nuts, almonds', 'dried currants or raisins', 'kosher salt', 'ground cumin', 'freshly ground black pepper', 'ground lamb', 'orecchiette or other short pasta', 'lemon'],
['short-grain sushi rice', 'scallions', 'ginger', 'vegetable oil', 'skinless, boneless cod', 'kosher salt', 'freshly ground black pepper', 'unseasoned rice vinegar', 'sugar', 'unsalted butter'],
['boneless, skinless center-cut salmon', 'scallions', 'ginger', 'garlic', 'mayonnaise', 'kosher salt', 'sesame oil', 'unseasoned rice vinegar', 'persian cucumbers', 'serrano chile', 'sugar', 'vegetable oil', 'rice flour', 'tender herbs', 'watercress', 'sesame seeds', 'brioche buns'],
['guajillo chiles', 'morita chiles', 'skinless striped bass or halibut fillets', 'fresh orange juice', 'achiote paste', 'garlic, whole', 'fresh lime juice', 'kosher salt', 'red onion', 'habanero chile', 'pineapple', 'cilantro', 'vegetable oil', 'corn tortillas', 'lime wedges'],
['raw nuts', 'raw pumpkin seeds', 'extra-virgin olive oil', 'old-fashioned oats', 'dried fruit', 'unsalted or low-salt natural peanut butter', 'honey', 'kosher salt', 'vanilla extract', 'egg white'],
['whole-milk yogurt', 'fresh lemon juice', 'sugar', 'kosher salt', 'red lentils', 'zucchini', 'onion', 'kosher salt', 'kashmiri chile powder or, cayenne pepper', 'ground turmeric', 'parsley leaves', 'lemon zest', 'ghee or neutral vegetable oil'],
['vegetable oil', 'cooking onions'],
['baby yukon gold potatoes', 'kosher salt', 'sour cream', 'mayonnaise', 'onion powder', 'dijon mustard', 'freshly ground black pepper', 'garlic', 'chives', 'red onion', 'sour cream and onion potato chips'],
['ground coriander', 'ground cumin', 'ground turmeric', 'red pepper flakes', 'extra-virgin olive oil', 'garlic', 'skinless, boneless chicken thighs', 'kosher salt', 'fresh lime juice', 'ripe avocados', 'ripe mangoes', 'red onion', 'torn little gem or green lettuce leaves', 'cherry tomatoes', 'flaky sea salt', 'mild red pepper flakes', 'and basil and / or mint leaves'],
['golden raisins', 'thin-skinned cucumbers', 'kosher salt', 'extra-virgin olive oil', 'pine nuts', 'castelvetrano olives', 'egg', 'garlic', 'panko', 'parsley', 'red pepper flakes', 'freshly ground black pepper', 'ground pork', 'red onion', 'tender herb leaves', 'red wine vinegar', 'plain whole-milk greek yogurt'],
['butter', 'whole graham crackers', 'digestive biscuits', 'sweetened condensed milk', 'hass avocados', 'cream cheese', 'fine sea salt', 'lemons', 'limes'],
['all purpose flour', 'unsweetened cocoa powder', 'baking soda', 'salt', 'sugar', 'unsalted butter', 'vegetable oil', 'eggs', 'vanilla extract', 'buttermilk', 'unpeeled zucchini', 'semisweet chocolate chips', 'walnuts'],
['hazelnuts', 'bow tie pasta', 'unsalted butter', 'garlic', 'salt and freshly ground black pepper', 'crushed red pepper flakes', 'swiss chard', 'parmesan cheese', 'balsamic vinegar'],
['olive oil', 'garlic', 'cherry tomatoes', 'dry white wine', 'fresh basil leaves', 'fine sea salt', 'freshly ground black pepper', 'cooking fat', 'scallions', 'garlic', 'potato', 'fresh, frozen corn kernels', 'eggs', 'red pepper flakes', 'feta cheese'],
['all-purpose flour', 'salt', 'dried parsley', 'unsalted butter', 'sour cream or plain full-fat yogurt', 'white wine vinegar', 'ice water', 'cooking fat', 'onion', 'mixed heirloom tomatoes', 'hard salty cheese', 'mozzarella cheese', 'mayonnaise', 'basil leaves', 'parsley', 'thyme', 'salt and freshly ground black pepper', 'egg', 'milk', 'cream', 'or water'],
['" bone-in pork chops', 'kosher salt', 'coriander seeds', 'small padrón chiles or shishito peppers', 'red onion', 'garlic', 'carrots', 'red wine vinegar', 'sherry vinegar or red wine vinegar', 'honey', 'paprika', 'extra-virgin olive oil', 'oregano'],
['dashi', 'doenjang', 'gochujang', 'gochugaru', 'garlic', 'soy sauce', 'onions', 'green zucchini', 'red korean chili pepper or other long hot chili', 'green korean chili pepper or other long hot chili', 'brisket', 'soft or silken tofu', 'scallions'],
['chicken wingettes, chicken', 'kosher salt', 'ground black pepper', 'potato starch', 'vegetable oil', 'peanuts', 'rice syrup or honey', 'soy sauce', 'brown or white sugar', 'white vinegar', 'yellow mustard', 'vegetable oil', 'garlic', 'peeled ginger', 'dried red chili peppers', 'sesame seeds', 'crushed red pepper flakes'],
['onion', 'gochujang', 'soy sauce', 'garlic', 'sugar', 'sake', 'mirin', 'sesame oil', 'skinless pork belly', 'unsalted butter', 'slider buns', 'mayonnaise', 'english cucumbers'],
['whole chicken', 'white or black peppercorns', 'garlic', 'cilantro roots', 'light brown sugar', 'fish sauce', 'ground turmeric', 'kosher salt', 'ground coriander', 'ground cumin', 'ground cardamom', 'fresh ripe pineapple', 'distilled white vinegar or apple cider vinegar', 'garlic', 'red jalapeños or serrano chiles', 'granulated sugar', 'kosher salt', 'freshly ground white pepper', 'saffron threads, ground turmeric', 'granulated sugar', 'kosher salt', 'virgin coconut oil', 'vegetable oil', 'cilantro', 'jasmine rice, coconut rice'],
['egg yolks', 'all-purpose flour', 'almond flour, all-purpose flour', 'sugar', 'kosher salt', 'unsalted butter', 'whole milk', 'vanilla paste or, vanilla extract', 'egg yolks', 'sugar', 'cornstarch', 'kosher salt', 'unsalted butter', 'fresh blueberries', 'fresh blackberries', 'special equipment, springform pan or tart pan'],
['raw thai jasmine rice', 'coconut milk', 'water', 'granulated sugar', 'salt', 'coconut cream'],
['raw sesame seeds', 'lemon', 'peaches ', 'sugar', 'kosher salt', 'all-purpose flour', 'unsalted butter', 'ground cinnamon', 'tahini', 'sesame oil', 'cornstarch', 'vanilla ice cream'],
['hazelnuts', 'sugar', 'all-purpose flour', 'unsalted butter', 'eggs', 'vanilla extract', 'almond extract', 'tart shell', 'firm-ripe bosc or anjou pears', 'apricot preserves'],
['kosher salt', 'broccoli', 'whole-wheat penne pasta', 'extra-virgin olive oil', 'panko or freezer bread crumbs', 'oil-packed anchovy fillets', 'capers', 'garlic', 'red pepper flakes'],
['water', 'dashima', 'dried shiitake mushrooms', 'dried anchovies'],
['water', 'sour tamarind pulp or paste', 'fresh ginger', 'sugar', 'club soda, water'],
['saffron strands', 'sugar', 'green cardamom pods', 'ground sumac', 'club soda or water'],
['smooth peanut butter', 'light brown sugar', 'egg', 'all-purpose flour', 'bicarbonate of soda', 'fine sea salt', 'salted peanuts', 'unsalted butter', 'cocoa powder', 'caster, sugar', 'light brown sugar', 'fine sea salt', 'vanilla extract', 'eggs', 'all-purpose flour'],
['neutral cooking oil', 'onion', 'garlic', 'poblano chiles', 'corn tortillas', 'limes', 'bacon', 'medium shrimp', 'kosher salt', 'queso asadero cheese', 'limes', 'tangy red salsa'],
['cream cheese', 'kimchi', 'scallions', 'cilantro leaves', 'lime', 'kosher salt', 'country-style bread', 'chili oil and toasted white sesame seeds'],
['scallions', 'unsalted butter', 'ginger', 'tomatoes', 'white or regular soy sauce', 'kosher salt', 'toasted country-style bread, rice or pasta'],
['extra-virgin olive oil', 'garlic', 'cherry tomatoes', 'basil', 'red pepper flakes', 'kosher salt', 'sugar', 'casarecce or other medium-size pasta', 'parmesan'],
['sleeve, ritz crackers', 'raw peanuts or other nut or seed', 'light or dark brown sugar', 'diamond crystal or, morton kosher salt', 'unsalted butter', 'eggs', 'light or dark brown sugar', 'diamond crystal or, morton kosher salt', 'vanilla extract', 'natural peanut butter or other natural nut or seed butter', 'heavy cream', 'grape jelly', 'jam'],
['semi-pearled farro', 'kosher salt', 'eggs', 'watermelon radish or 6-8 red radishes', 'unseasoned rice vinegar', 'sugar', 'block extra-firm tofu', 'grapeseed or vegetable oil', 'fresh lime juice', 'gochujang', 'mayonnaise', 'soy sauce', 'sesame oil', 'zucchini or summer squash', 'raw vegetables', 'scallions', 'sugar snap peas', 'baby bok choy', 'cucumbers', 'pea shoots'],
['heirloom tomatoes', 'mixed cherry tomatoes', 'kosher salt', 'lemon', 'garlic', 'extra-virgin olive oil', 'zaatar', 'pita chips', 'honey', 'feta', 'basil leaves', 'mint leaves'],
['skin-on, chicken thighs', 'kosher salt', 'cherry tomatoes', 'harissa paste', 'red wine vinegar', 'oregano', 'feta, planks', 'crusty bread'],
['jalapeños', 'ginger', 'garlic', 'cilantro leaves', 'fresh lime juice', 'tahini', 'kosher salt', 'extra-virgin olive oil', 'garlic', 'chickpeas rinsed', 'corn', 'spice blend', 'kosher salt', 'tender greens', 'avocados', 'tomatoes', 'feta', 'cilantro', 'and lime wedges'],
['unsalted butter', 'raw pistachios', 'egg', 'light brown sugar', 'all-purpose flour', 'kosher salt', 'baking powder', 'eggs', 'light brown sugar', 'sour cream', 'vanilla bean paste or vanilla extract', 'ground cardamom', 'ground ginger', 'orange zest', 'kosher salt', 'all-purpose flour', 'stone fruit', 'coarse sugar', 'powdered sugar, cream', '" -diameter springform pan'],
['heirloom bananas or small plantains', 'crema or cre me frai che', 'crumbled queso fresco'],
['fresh chorizo', 'ground beef', 'tomato paste', 'cumin', 'onion', 'jalapeños', 'kosher salt', 'beefsteak tomatoes', 'corn kernels', 'cheddar cheese', 'eggs', 'unsalted butter', 'sour cream', 'cornmeal', 'baking powder'],
['garlic', 'pickled peppers', 'stone-ground or whole grain mustard', 'kosher salt', 'plum tomatoes', 'thick-cut bacon', 'crusty multigrain bread', 'slightly firm peaches']]


In [22]:
# final prompt engineering
final_prompts=[]
for ingredient in lst:
    #print(ingredient)
    length_ing = len(ingredient)
    prompt="[INST] Give me recipe name using "
    for j in range(length_ing):
        prompt +=ingredient[j]
        if j==length_ing-1:
            prompt+='.'
        else:
            prompt+=', '
    final_prompts.append(prompt)
final_prompt_eng = []
#Prompt engineering 
for instruction in final_prompts: 
  instruction+= " Answer in format Recipe:  [/INST]"
  final_prompt_eng += [instruction]
print(final_prompt_eng[:15])

['[INST] Give me recipe name using whole chicken, kosher salt, acorn squash, sage, rosemary, unsalted butter, ground allspice, crushed red pepper flakes, freshly ground black pepper, white bread, apples, extra-virgin olive oil, red onion, apple cider vinegar, white miso, all-purpose flour, unsalted butter, dry white wine, unsalted chicken broth, white miso, kosher salt, freshly ground pepper. Answer in format Recipe:  [/INST]', '[INST] Give me recipe name using egg whites, new potatoes, kosher salt, finely ground black pepper, rosemary, thyme, parsley. Answer in format Recipe:  [/INST]', '[INST] Give me recipe name using evaporated milk, whole milk, garlic powder, onion powder, smoked paprika, freshly ground black pepper, kosher salt, extra-sharp cheddar, full-fat cream cheese, elbow macaroni. Answer in format Recipe:  [/INST]', '[INST] Give me recipe name using round italian, olive oil, sweet italian sausage, unsalted butter, onions, celery, garlic, eggs, heavy cream, turkey giblet st

In [23]:
len(final_prompt_eng)

100

In [24]:
import pandas as pd
file_path = "/kaggle/input/final-recipes/final_recipes.xlsx"
# Read the CSV file and handle the error by skipping the problematic row
# Read the Excel file into a pandas DataFrame
try:
    df = pd.read_excel(file_path)
    # Now 'df' contains the data from the Excel file
except pd.errors.ParserError as e:
    print(f"ParserError: {e}")

In [25]:
recipe_ingredient_answers = (df.head(100)['final_ing']).tolist()
recipe_answers = (df.head(100)['Instructions']).tolist()

In [26]:
predictions = []
for question in final_prompt_eng[:20]:  # here we only use the first 100 for testing
    print(f"Question: ", question)
    result = llm(question)  
    predictions.append(result)

Question:  [INST] Give me recipe name using whole chicken, kosher salt, acorn squash, sage, rosemary, unsalted butter, ground allspice, crushed red pepper flakes, freshly ground black pepper, white bread, apples, extra-virgin olive oil, red onion, apple cider vinegar, white miso, all-purpose flour, unsalted butter, dry white wine, unsalted chicken broth, white miso, kosher salt, freshly ground pepper. Answer in format Recipe:  [/INST]
Recipe: Whole Chicken with Acorn Squash, Sage, Rosemary, and Apple Cider Vinegar Dressing

Ingredients:

* 1 whole chicken (about 3-4 lbs)
* 2 acorn squashes, halved and seeded
* 2 tbsp kosher salt
* 1 tsp ground allspice
* 1 tsp crushed red pepper flakes
* 1 tsp freshly ground black pepper
* 1 loaf white bread, torn into small pieces
* 2 apples, cored and sliced
* 2 tbsp extra-virgin olive oil
* 1 red onion, thinly sliced
* 2 tbsp apple cider vinegar
* 1 tbsp white miso
* 1 tbsp all-purpose flour
* 2 tbsp unsalted butter
* 1 cup dry white wine
* 1 cup un



Recipe: Spicy Lentil Soup with Tomato Paste and Eggs
Question:  [INST] Give me recipe name using chipotle in adobo sauce, garlic, kosher salt, cornstarch, evaporated milk, sharp or extra-sharp cheddar cheese, cream cheese, mayonnaise, pimento peppers, chipotle pepper purée. Answer in format Recipe:  [/INST]
Recipe: Chipotle Adobo Mac and Cheese
Question:  [INST] Give me recipe name using bone-in, parsnips, carrots, maple syrup, unsweetened apple juice, dark brown sugar, smooth dijon mustard, grainy dijon mustard, apple cider vinegar, kosher salt. Answer in format Recipe:  [/INST]
Recipe: Maple Mustard Glazed Chicken with Parsnips and Carrots
Question:  [INST] Give me recipe name using bacon, celery, carrot, onion, cinnamon, boiling potatoes, granny smith apples, butternut squash, reduced-sodium chicken stock or broth, water, sour cream. Answer in format Recipe:  [/INST]
Recipe: Bacon and Vegetable Soup with Cinnamon and Granny Smith Apples
Question:  [INST] Give me recipe name using an

In [27]:
print(predictions)

[[{'generated_text': '[INST] Give me recipe name using whole chicken, kosher salt, acorn squash, sage, rosemary, unsalted butter, ground allspice, crushed red pepper flakes, freshly ground black pepper, white bread, apples, extra-virgin olive oil, red onion, apple cider vinegar, white miso, all-purpose flour, unsalted butter, dry white wine, unsalted chicken broth, white miso, kosher salt, freshly ground pepper. Answer in format Recipe:  [/INST] Recipe: Whole Chicken with Acorn Squash, Sage, Rosemary, and Apple Cider Vinegar Dressing\n\nIngredients:\n\n* 1 whole chicken (about 3-4 lbs)\n* 2 acorn squashes, halved and seeded\n* 2 tbsp kosher salt\n* 1 tsp ground allspice\n* 1 tsp crushed red pepper flakes\n* 1 tsp freshly ground black pepper\n* 1 loaf white bread, torn into small pieces\n* 2 apples, cored and sliced\n* 2 tbsp extra-virgin olive oil\n* 1 red onion, thinly sliced\n* 2 tbsp apple cider vinegar\n* 1 tbsp white miso\n* 1 tbsp all-purpose flour\n* 2 tbsp unsalted butter\n* 1 

In [28]:
import re

def extract_recipe_info(input_strings):
    # Define regular expressions
    recipe_name_pattern = re.compile(r"Recipe: (.+)")
    ingredients_pattern = re.compile(r"Ingredients:(.+?)Instructions:", re.DOTALL)
    instructions_pattern = re.compile(r'Instructions:(.+)', re.DOTALL)

    # Initialize lists to store extracted information
    recipe_names = []
    ingredients_list = []
    instructions_list = []
    source_urls = []

    # Process each input string
    for input_string in input_strings:
        # Extract information
        recipe_name_match = recipe_name_pattern.search(input_string)
        ingredients_match = ingredients_pattern.search(input_string)
        instructions_match = instructions_pattern.search(input_string)
        source_url_match = source_url_pattern.search(input_string)

        # Append extracted information to the respective lists
        if recipe_name_match:
            recipe_names.append(recipe_name_match.group(1).strip())
        else:
            recipe_names.append(None)

        if ingredients_match:
            ingredients_list.append(ingredients_match.group(1).strip())
        else:
            ingredients_list.append(None)

        if instructions_match:
            instructions_list.append(instructions_match.group(1).strip())
        else:
            instructions_list.append(None)

        if source_url_match:
            source_urls.append(source_url_match.group(1).strip())
        else:
            source_urls.append(None)

    # Return the lists
    return recipe_names, ingredients_list, instructions_list, source_urls

# Example usage
result = predictions
input_strings = []
for i in range(len(result)):
    input_strings += [result[i][0]['generated_text']]
recipe_names, ingredients_list, instructions_list, source_urls = extract_recipe_info(input_strings)




In [29]:
print(ingredients_list)

['* 1 whole chicken (about 3-4 lbs)\n* 2 acorn squashes, halved and seeded\n* 2 tbsp kosher salt\n* 1 tsp ground allspice\n* 1 tsp crushed red pepper flakes\n* 1 tsp freshly ground black pepper\n* 1 loaf white bread, torn into small pieces\n* 2 apples, cored and sliced\n* 2 tbsp extra-virgin olive oil\n* 1 red onion, thinly sliced\n* 2 tbsp apple cider vinegar\n* 1 tbsp white miso\n* 1 tbsp all-purpose flour\n* 2 tbsp unsalted butter\n* 1 cup dry white wine\n* 1 cup unsalted chicken broth\n* 1 tbsp white miso\n* 1 tsp kosher salt\n* 1 tsp freshly ground black pepper', '* 2 cups new potatoes, washed and cut into bite-sized pieces\n* 4 egg whites\n* 2 tablespoons kosher salt\n* 1 teaspoon finely ground black pepper\n* 2 teaspoons chopped fresh rosemary\n* 2 teaspoons chopped fresh thyme\n* 2 tablespoons chopped fresh parsley\n* 2 tablespoons olive oil', None, '* 1/4 cup round Italian olive oil\n* 1/2 lb sweet Italian sausage, sliced\n* 1/4 cup unsalted butter\n* 1 large onion, chopped\n*

### Bert score 


For recipe name

In [30]:
cleaned_recipes = [recipe.replace('[/INST] Recipe: ', '') for recipe in recipe_names]

# Print the cleaned recipes
for cleaned_recipe in cleaned_recipes:
    print(cleaned_recipe)

Whole Chicken with Acorn Squash, Sage, Rosemary, and Apple Cider Vinegar Dressing
Roasted New Potatoes with Rosemary, Thyme, and Parsley
Creamy Mac and Cheese with Garlic and Smoked Paprika
Creamy Turkey and Sausage Skillet with Parmesan and Parsley
Bourbon Apple Cider Mule
Chamomile Tequila Lemonade
Spicy Apple Cider Cocktail with Grand Marnier, Amaro Averna, and Pink Peppercorns
Spiced Sherry and Scotch Sour with Turmeric Syrup
Spicy Lamb and Rice Stew with Garam Masala and Cilantro
Spicy Lentil Soup with Tomato Paste and Eggs
Chipotle Adobo Mac and Cheese
Maple Mustard Glazed Chicken with Parsnips and Carrots
Bacon and Vegetable Soup with Cinnamon and Granny Smith Apples
Chicken and Anchovy Salad with Garlic, Mayonnaise, and Dijon Mustard
Lemon and Herb Chicken with Rice and Anchovy Sauce
Shrimp and Guacamole Stuffed Chiles Rellenos with Corn Masa Flour Crust
Chorizo and Black Bean Tacos with Garlic and Onion
Coconut Mango Plantain Yogurt Parfait
Roasted Garlic and Potato Soup with 

In [31]:
df.head(20)['Title'].tolist()
len(df.head(20)['Title'].tolist())

20

In [32]:
print(df.head(20)['Title'].tolist())

['Miso-Butter Roast Chicken With Acorn Squash Panzanella', 'Crispy Salt and Pepper Potatoes', 'Thanksgiving Mac and Cheese', 'Italian Sausage and Bread Stuffing', "Newton's Law", 'Warm Comfort', 'Apples and Oranges', 'Turmeric Hot Toddy', 'Instant Pot Lamb Haleem', 'Spiced Lentil and Caramelized Onion Baked Eggs', 'Hot Pimento Cheese Dip', 'Spiral Ham in the Slow Cooker', 'Butternut Squash and Apple Soup', 'Caesar Salad Roast Chicken', 'Chicken and Rice With Leeks and Salsa Verde', 'Gorditas con Camarones', 'Enfrijoladas', 'Caramelized Plantain Parfait', 'Chicken and Potato Gratin With Brown Butter Cream', 'Roasted Beets With Crispy Sunchokes and Pickled Orange-Ginger Purée']


In [33]:
import torch
from bert_score import score

def calculate_bert_score(references, candidates):
    # Use GPU if available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Calculate BERT score
    P, R, F1 = score(references, candidates, lang="en", device=device)

    # Calculate average scores
    avg_precision = P.mean().item()
    avg_recall = R.mean().item()
    avg_f1 = F1.mean().item()

    return avg_precision, avg_recall, avg_f1

# Example usage
references = cleaned_recipes
candidates = df.head(20)['Title'].tolist()

avg_precision, avg_recall, avg_f1 = calculate_bert_score(references, candidates)

# Print the results
print(f"Average Precision: {avg_precision:.4f}")
print(f"Average Recall: {avg_recall:.4f}")
print(f"Average F1 score: {avg_f1:.4f}")


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Average Precision: 0.8360
Average Recall: 0.8580
Average F1 score: 0.8464


### Blue Score

In [129]:
import nltk
from nltk.translate.bleu_score import sentence_bleu

def calculate_bleu_score(references, candidates):
    bleu_scores = []

    for reference, candidate in zip(references, candidates):
        # Tokenize the sentences
        candidate_tokenized = candidate.split()
        reference_tokenized = [word.lower() for phrase in reference.split() for word in phrase.split()]

        # Calculate BLEU score
        bleu_score = sentence_bleu([reference_tokenized], candidate_tokenized)
        bleu_scores.append(bleu_score)

    return bleu_scores

def calculate_average_bleu_score(references, candidates):
    bleu_scores = calculate_bleu_score(references, candidates)
    average_bleu_score = sum(bleu_scores) / len(bleu_scores) if len(bleu_scores) > 0 else 0
    return average_bleu_score

# Example usage


for Ingredients 

In [118]:
import ast 
from ast import literal_eval
ing_list += [ingredients_list[i].strip().split("*") for i in range(len(ingredients_list)) if ingredients_list[i]!= None]
reference_ing = []
candidate_ing = []

for i, ingredient_string in enumerate(ingredients_list):
    # Split, parse, and extract ingredient names
    if ingredient_string is None: 
        continue
    ing = ingredient_string.strip().split("*") 
    ings = parse_multiple_ingredients(ing)
    fin = extract_ingredient_names(ings)
    list_of_strings = df.iloc[i]['final_ing']

# Concatenate the list of strings into a single string
    combined_cand= ', '.join(literal_eval(list_of_strings))

    # Combine the extracted ingredient names into a single string
    combined_string = ', '.join(fin)

    # Add the combined string to the resultant string
    reference_ing.append(combined_string)  
    candidate_ing.append(combined_cand)


['whole chicken, kosher salt, acorn squash, sage, rosemary, unsalted butter, ground allspice, crushed red pepper flakes, freshly ground black pepper, white bread, apples, extra-virgin olive oil, red onion, apple cider vinegar, white miso, all-purpose flour, unsalted butter, dry white wine, unsalted chicken broth, white miso, kosher salt, freshly ground pepper', 'egg whites, new potatoes, kosher salt, finely ground black pepper, rosemary, thyme, parsley', 'round italian, olive oil, sweet italian sausage, unsalted butter, onions, celery, garlic, eggs, heavy cream, turkey giblet stock or reduced-sodium chicken broth, parmigiano-reggiano, flat-leaf parsley, qt shallow ceramic or glass baking dish', 'dark brown sugar, hot water, bourbon, fresh lemon juice, apple butter, garnish : orange twist and, or ground cinnamon', 'chamomile tea bags, reposado tequila, fresh lemon juice, agave nectar', 'grand marnier, amaro averna, pat salted butter, hot apple cider, fresh lemon juice, garnish : freshly

In [130]:
average_bleu_score = calculate_average_bleu_score(reference_ing, candidate_ing)
blue_score = calculate_bleu_score(reference_ing, candidate_ing)

# Print the results
for i, blue_score in enumerate(blue_score):
    print(f"BLEU Score for Candidate {i + 1}: {bleu_score:.4f}")

# Print the average BLEU score
print(f"\nAverage BLEU Score: {average_bleu_score:.4f}")


BLEU Score for Candidate 1: 0.8409
BLEU Score for Candidate 2: 0.8409
BLEU Score for Candidate 3: 0.8409
BLEU Score for Candidate 4: 0.8409
BLEU Score for Candidate 5: 0.8409
BLEU Score for Candidate 6: 0.8409
BLEU Score for Candidate 7: 0.8409
BLEU Score for Candidate 8: 0.8409
BLEU Score for Candidate 9: 0.8409
BLEU Score for Candidate 10: 0.8409

Average BLEU Score: 0.6675


In [148]:
from nltk.translate.bleu_score import sentence_bleu

def calculate_bleu_score_for_index(index, reference_list, candidate_list):
    # Get the reference and candidate sentences for the given index
    reference = reference_list[index]
    candidate = candidate_list[index]

    # Tokenize the sentences
    candidate_tokenized = candidate.split()
    reference_tokenized = [word.lower() for phrase in reference.split() for word in phrase.split()]


    

    # Calculate BLEU score
    bleu_score = sentence_bleu([reference_tokenized], candidate_tokenized)

    return bleu_score

for i in range(len(reference_ing)):
    bleu_score_for_index = calculate_bleu_score_for_index(i, reference_ing, candidate_ing)
    print(f"BLEU Score for Index {i}: {bleu_score_for_index:.4f}")


BLEU Score for Index 0: 0.7633
BLEU Score for Index 1: 0.3351
BLEU Score for Index 2: 0.5469
BLEU Score for Index 3: 0.4168
BLEU Score for Index 4: 0.6520
BLEU Score for Index 5: 0.7913
BLEU Score for Index 6: 0.7778
BLEU Score for Index 7: 0.6267
BLEU Score for Index 8: 0.7649
BLEU Score for Index 9: 1.0000


For recipe name

In [149]:
references = cleaned_recipes
candidates = df.head(20)['Title'].tolist()
for i in range(len(references)):
    bleu_score_for_index = calculate_bleu_score_for_index(i, references, candidates)
    print(f"BLEU Score for Index {i}: {bleu_score_for_index:.4f}")


BLEU Score for Index 0: 0.0000
BLEU Score for Index 1: 0.3670
BLEU Score for Index 2: 0.2026
BLEU Score for Index 3: 0.3005
BLEU Score for Index 4: 0.0000
BLEU Score for Index 5: 0.0000
BLEU Score for Index 6: 0.0378
BLEU Score for Index 7: 0.0000
BLEU Score for Index 8: 0.0000
BLEU Score for Index 9: 0.5329
BLEU Score for Index 10: 0.0000
BLEU Score for Index 11: 0.0000
BLEU Score for Index 12: 0.2460
BLEU Score for Index 13: 0.0000
BLEU Score for Index 14: 0.6240
BLEU Score for Index 15: 0.0000
BLEU Score for Index 16: 0.0000
BLEU Score for Index 17: 0.0000
BLEU Score for Index 18: 0.4631
BLEU Score for Index 19: 0.4137


For instructions

In [162]:

reference_ist = []
candidate_ist = []

for i, ist_string in enumerate(instructions_list):
    # Split, parse, and extract ingredient names
    if ist_string is None: 
        continue
    ist = ist_string.strip().split() 
    lst = df.iloc[i]['Instructions']

# Concatenate the list of strings into a single string

    # Combine the extracted ingredient names into a single string
    combined_string = ' '.join(ist)

    # Add the combined string to the resultant string
    reference_ist.append(combined_string)  
    candidate_ist.append(lst)


In [166]:
for i in range(len(reference_ing)):
    bleu_score_for_index = calculate_bleu_score_for_index(i, reference_ist, candidate_ist)
    print(f"BLEU Score for Index {i}: {bleu_score_for_index:.4f}")


BLEU Score for Index 0: 0.0277
BLEU Score for Index 1: 0.1150
BLEU Score for Index 2: 0.0759
BLEU Score for Index 3: 0.0493
BLEU Score for Index 4: 0.0639
BLEU Score for Index 5: 0.3412
BLEU Score for Index 6: 0.2556
BLEU Score for Index 7: 0.1636
BLEU Score for Index 8: 0.0543
BLEU Score for Index 9: 0.0523


# BM25

# # RAG

In [39]:
import pandas as pd
file_path = "/kaggle/input/final-recipes/final_recipes.xlsx"
# Read the CSV file and handle the error by skipping the problematic row
# Read the Excel file into a pandas DataFrame
try:
    df = pd.read_excel(file_path)
    # Now 'df' contains the data from the Excel file
except pd.errors.ParserError as e:
    print(f"ParserError: {e}")

In [40]:
columns_to_remove= ["Unnamed: 0"]
df = df.drop(columns=columns_to_remove, axis=1)

In [41]:
pd.set_option('display.max_colwidth', None)  # Set to None for unlimited width
df.head(2)

Unnamed: 0,Unnamed: 0.1,Title,Ingredients,Instructions,Cleaned_Ingredients,final_ing
0,0,Miso-Butter Roast Chicken With Acorn Squash Panzanella,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher salt, divided, plus more', '2 small acorn squash (about 3 lb. total)', '2 Tbsp. finely chopped sage', '1 Tbsp. finely chopped rosemary', '6 Tbsp. unsalted butter, melted, plus 3 Tbsp. room temperature', '¼ tsp. ground allspice', 'Pinch of crushed red pepper flakes', 'Freshly ground black pepper', '⅓ loaf good-quality sturdy white bread, torn into 1"" pieces (about 2½ cups)', '2 medium apples (such as Gala or Pink Lady; about 14 oz. total), cored, cut into 1"" pieces', '2 Tbsp. extra-virgin olive oil', '½ small red onion, thinly sliced', '3 Tbsp. apple cider vinegar', '1 Tbsp. white miso', '¼ cup all-purpose flour', '2 Tbsp. unsalted butter, room temperature', '¼ cup dry white wine', '2 cups unsalted chicken broth', '2 tsp. white miso', 'Kosher salt, freshly ground pepper']","Pat chicken dry with paper towels, season all over with 2 tsp. salt, and tie legs together with kitchen twine. Let sit at room temperature 1 hour.\nMeanwhile, halve squash and scoop out seeds. Run a vegetable peeler along ridges of squash halves to remove skin. Cut each half into ½""-thick wedges; arrange on a rimmed baking sheet.\nCombine sage, rosemary, and 6 Tbsp. melted butter in a large bowl; pour half of mixture over squash on baking sheet. Sprinkle squash with allspice, red pepper flakes, and ½ tsp. salt and season with black pepper; toss to coat.\nAdd bread, apples, oil, and ¼ tsp. salt to remaining herb butter in bowl; season with black pepper and toss to combine. Set aside.\nPlace onion and vinegar in a small bowl; season with salt and toss to coat. Let sit, tossing occasionally, until ready to serve.\nPlace a rack in middle and lower third of oven; preheat to 425°F. Mix miso and 3 Tbsp. room-temperature butter in a small bowl until smooth. Pat chicken dry with paper towels, then rub or brush all over with miso butter. Place chicken in a large cast-iron skillet and roast on middle rack until an instant-read thermometer inserted into the thickest part of breast registers 155°F, 50–60 minutes. (Temperature will climb to 165°F while chicken rests.) Let chicken rest in skillet at least 5 minutes, then transfer to a plate; reserve skillet.\nMeanwhile, roast squash on lower rack until mostly tender, about 25 minutes. Remove from oven and scatter reserved bread mixture over, spreading into as even a layer as you can manage. Return to oven and roast until bread is golden brown and crisp and apples are tender, about 15 minutes. Remove from oven, drain pickled onions, and toss to combine. Transfer to a serving dish.\nUsing your fingers, mash flour and butter in a small bowl to combine.\nSet reserved skillet with chicken drippings over medium heat. You should have about ¼ cup, but a little over or under is all good. (If you have significantly more, drain off and set excess aside.) Add wine and cook, stirring often and scraping up any browned bits with a wooden spoon, until bits are loosened and wine is reduced by about half (you should be able to smell the wine), about 2 minutes. Add butter mixture; cook, stirring often, until a smooth paste forms, about 2 minutes. Add broth and any reserved drippings and cook, stirring constantly, until combined and thickened, 6–8 minutes. Remove from heat and stir in miso. Taste and season with salt and black pepper.\nServe chicken with gravy and squash panzanella alongside.","['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher salt, divided, plus more', '2 small acorn squash (about 3 lb. total)', '2 Tbsp. finely chopped sage', '1 Tbsp. finely chopped rosemary', '6 Tbsp. unsalted butter, melted, plus 3 Tbsp. room temperature', '¼ tsp. ground allspice', 'Pinch of crushed red pepper flakes', 'Freshly ground black pepper', '⅓ loaf good-quality sturdy white bread, torn into 1"" pieces (about 2½ cups)', '2 medium apples (such as Gala or Pink Lady; about 14 oz. total), cored, cut into 1"" pieces', '2 Tbsp. extra-virgin olive oil', '½ small red onion, thinly sliced', '3 Tbsp. apple cider vinegar', '1 Tbsp. white miso', '¼ cup all-purpose flour', '2 Tbsp. unsalted butter, room temperature', '¼ cup dry white wine', '2 cups unsalted chicken broth', '2 tsp. white miso', 'Kosher salt', 'freshly ground pepper']","['whole chicken', 'kosher salt', 'acorn squash', 'sage', 'rosemary', 'unsalted butter', 'ground allspice', 'crushed red pepper flakes', 'freshly ground black pepper', 'white bread', 'apples', 'extra-virgin olive oil', 'red onion', 'apple cider vinegar', 'white miso', 'all-purpose flour', 'unsalted butter', 'dry white wine', 'unsalted chicken broth', 'white miso', 'kosher salt', 'freshly ground pepper']"
1,1,Crispy Salt and Pepper Potatoes,"['2 large egg whites', '1 pound new potatoes (about 1 inch in diameter)', '2 teaspoons kosher salt', '¾ teaspoon finely ground black pepper', '1 teaspoon finely chopped rosemary', '1 teaspoon finely chopped thyme', '1 teaspoon finely chopped parsley']","Preheat oven to 400°F and line a rimmed baking sheet with parchment. In a large bowl, whisk the egg whites until foamy (there shouldn’t be any liquid whites in the bowl). Add the potatoes and toss until they’re well coated with the egg whites, then transfer to a strainer or colander and let the excess whites drain. Season the potatoes with the salt, pepper, and herbs. Scatter the potatoes on the baking sheet (make sure they’re not touching) and roast until the potatoes are very crispy and tender when poked with a knife, 15 to 20 minutes (depending on the size of the potatoes).\nTransfer to a bowl and serve.","['2 large egg whites', '1 pound new potatoes (about 1 inch in diameter)', '2 teaspoons kosher salt', '¾ teaspoon finely ground black pepper', '1 teaspoon finely chopped rosemary', '1 teaspoon finely chopped thyme', '1 teaspoon finely chopped parsley']","['egg whites', 'new potatoes', 'kosher salt', 'finely ground black pepper', 'rosemary', 'thyme', 'parsley']"


In [42]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /usr/share/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [43]:
import os
import urllib.request
import tarfile
import json
import pandas as pd
import numpy as np
from tqdm import tqdm


#libraries for text preprocessing
import re
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
nltk.download('wordnet')
from nltk.stem.wordnet import WordNetLemmatizer

#libraries for keyword extraction with tf-idf
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from scipy.sparse import coo_matrix

#libraries for reading and writing files
import pickle

#libraries for BM25
!pip install rank_bm25
from rank_bm25 import BM25Okapi

[nltk_data] Downloading package stopwords to /usr/share/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /usr/share/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting rank_bm25
  Downloading rank_bm25-0.2.2-py3-none-any.whl (8.6 kB)
Installing collected packages: rank_bm25
Successfully installed rank_bm25-0.2.2


In [44]:
import nltk
nltk.download()

NLTK Downloader
---------------------------------------------------------------------------
    d) Download   l) List    u) Update   c) Config   h) Help   q) Quit
---------------------------------------------------------------------------



KeyboardInterrupt



In [None]:
import nltk
from nltk.stem import WordNetLemmatizer 
nltk.download('wordnet')

In [None]:
def preprocess(text):
    #define stopwords
    stop_words = set(stopwords.words("english"))
    #Remove punctuations
    text = re.sub('[^a-zA-Z]', ' ', text)
    #Convert to lowercase
    text = text.lower()
    #remove tags
    text=re.sub("&lt;/?.*?&gt;"," &lt;&gt; ",text)
    # remove special characters and digits
    text=re.sub("(\\d|\\W)+"," ",text)
    ##Convert to list from string
    text = text.split()
    ##Stemming
    #Lemmatisation
    lem = WordNetLemmatizer()
    text = [lem.lemmatize(word) for word in text if not word in  stop_words]
    text = " ".join(text)

    return text

In [None]:
import nltk
import subprocess

# Download and unzip wordnet
try:
    nltk.data.find('wordnet.zip')
except:
    nltk.download('wordnet', download_dir='/kaggle/working/')
    command = "unzip /kaggle/working/corpora/wordnet.zip -d /kaggle/working/corpora"
    subprocess.run(command.split())
    nltk.data.path.append('/kaggle/working/')

# Now you can import the NLTK resources as usual
from nltk.corpus import wordnet

In [None]:
df['try_processs'] = df['final_ing'].apply(lambda x:(preprocess(x)))

In [None]:
pt = df['try_processs'].to_list()

In [None]:
from rank_bm25 import BM25Okapi
tokenized_corpus = [doc.split(" ") for doc in pt]
bm25 = BM25Okapi(tokenized_corpus)

In [None]:
query = preprocess("chicken, potato")
tokenized_query = query.split(" ")
doc_scores = bm25.get_scores(tokenized_query)
print(tokenized_query)
# Define the target value as a list
target_values = bm25.get_top_n(tokenized_query, pt, n=5)

# Use boolean indexing to filter rows
result_df = pd.DataFrame(columns=df.columns)

for tv in target_values:
    # Use boolean indexing to filter rows with exactly the target value
    filtered_rows = df[df['try_processs'] == tv]
    result_df = result_df._append(filtered_rows,ignore_index=True)



In [None]:

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
row_10214 = df.loc[10214]
print(row_10214)

In [None]:
%%capture
! pip install farm-haystack

In [None]:
# Your desired template format
template = "Recipe name is {}. Ingredients are {}. Instructions are {}"

# List to store the formatted strings
docs = []

# Iterate through each row in the DataFrame
for index, row in df.iterrows():
    # Extract values from each column
    recipe_id = row['Unnamed: 0.1']
    recipe_name = row['Title']
    ingredients = row['try_processs']
    instructions = row['Instructions']

    # Create a formatted string using the template
    formatted_string = template.format(recipe_name, ingredients, instructions)
    formatted_dict = {
        'content': formatted_string,
        'id': recipe_id
    }

    # Convert the dictionary to a JSON string
    doc = Document.from_json(formatted_dict)
    docs.append(doc)
     
    

In [None]:
import glob,json
from haystack import Document
from haystack.nodes import PreProcessor

processor = PreProcessor(
    clean_empty_lines=True,
    clean_whitespace=True,
    clean_header_footer=True,
    split_by="word",
    split_length=200,
    split_respect_sentence_boundary=True,
    split_overlap=0,
    language="en",
)
preprocessed_docs = processor.process(docs)
     

In [None]:
preprocessed_docs[0]

In [None]:
from haystack.document_stores import InMemoryDocumentStore

document_store = InMemoryDocumentStore(use_bm25=True)

In [None]:
document_store.write_documents(preprocessed_docs)


In [None]:
from haystack import Pipeline
from haystack.nodes import BM25Retriever, PromptNode, PromptTemplate
     

In [None]:
retriever = BM25Retriever(document_store, top_k=4)

In [None]:
print(type(retriever))

In [None]:
qa_template = PromptTemplate(prompt=
  """[INST] Using the information contained in the context, answer the question.
  If the answer cannot be deduced from the context, answer \"I don't know.\"
  Context: {join(documents)};
  Question: {query}
  [/INST]""")

In [None]:
prompt_node = PromptNode(model_name_or_path="mistralai/Mistral-7B-Instruct-v0.1",
                         api_key='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',
                         default_prompt_template=qa_template,
                         max_length=5500,
                         model_kwargs={"model":model, "tokenizer": tokenizer,"model_max_length":8000})

In [None]:
rag_pipeline = Pipeline()
rag_pipeline.add_node(component=retriever, name="retriever", inputs=["Query"])
rag_pipeline.add_node(component=prompt_node, name="prompt_node", inputs=["retriever"])

In [None]:
from pprint import pprint
print_answer = lambda out: pprint(out["results"][0].strip())

In [None]:
 Preheat oven to 425°F with rack in middle.\nPeel potatoes and thinly slice (about 1/16 inch thick), then toss with butter, 3/4 teaspoon salt, and 1/2 teaspoon pepper. Spread evenly in a 2-quart shallow baking dish and add broth. Cover tightly with foil and bake 30 minutes. Uncover and bake until top is well-browned and most of stock is absorbed, 30 to 35 minutes more.

In [None]:
print_answer(rag_pipeline.run(query="What are ingredients of Italian Sausage and Bread Stuffing"))