In [20]:
import pandas as pd
from datasets import Dataset
from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments
import torch
import re


In [21]:
# Load the CSV data (use your dataset file here)
meal_data = pd.read_csv('new_data.csv')

# Prepare diverse question-answer pairs
def prepare_data(df):
    training_data = []
    for _, row in df.iterrows():
        # Extract relevant fields
        recipe_name = row['TranslatedRecipeName']
        ingredients = row['TranslatedIngredients']
        instructions = row['TranslatedInstructions']
        url = row['URL']
        image_url = row['image-url']
        cuisine = row['Cuisine']
        time = row['TotalTimeInMins']

        # Generate a variety of questions related to each meal
        questions = [
            f"What can I cook with {ingredients}?",
            f"Give me a recipe that includes {ingredients}.",
            f"I want a {cuisine} recipe.",
            f"Show me a recipe for {cuisine} cuisine.",
            f"Show me a recipe that takes {time} minutes to cook.",
            f"What can I make in under {time} minutes?",
            f"How do I make {recipe_name}?",
            f"Give me a {recipe_name} recipe with instructions.",
            f"Where can I find more info about {recipe_name}?",
            f"Show me an image of {recipe_name}.",
            f"Give me a quick {cuisine} meal recipe.",
        ]

        # Generate different answers based on question type
        for question in questions:
            if re.search(r'image of', question):
                # Answer contains only the image URL
                answer = f"![Recipe Image]({image_url})"
            elif re.search(r'cuisine recipe|meal recipe', question):
                # Answer contains the full recipe details
                answer = (
                    f"**Recipe Name**: {recipe_name}\n\n"
                    
                    f"**Ingredients**: {ingredients}\n\n"
                    f"**Instructions**: {instructions}\n\n"
                    f"**URL**: {url}\n\n"
                    f"![Recipe Image]({image_url})"
                )
            elif re.search(r'cook with|without', question):
                # Answer focuses on ingredients
                answer = (
                    f"You can make **{recipe_name}** with the following ingredients:\n\n"
                    f"**Ingredients**: {ingredients}\n\n"
                    f"**Instructions**: {instructions}\n\n"
                    f"![Recipe Image]({image_url})"
                )
            else:
                # Default to providing recipe instructions
                answer = (
                    f"**Recipe Name**: {recipe_name}\n\n"
                    f"**Ingredients**: {ingredients}\n\n"
                    f"**Instructions**: {instructions}\n\n"
                    f"**URL**: {url}\n\n"
                    f"![Recipe Image]({image_url})"
                )
            training_data.append({'question': question, 'answer': answer})
    return training_data
    

In [22]:
# Convert the data into a DataFrame 
train_df = pd.DataFrame(prepare_data(meal_data))
train_df.head()

Unnamed: 0,question,answer
0,What can I cook with 1 tablespoon Red Chilli p...,You can make **Masala Karela Recipe** with the...
1,Give me a recipe that includes 1 tablespoon Re...,**Recipe Name**: Masala Karela Recipe\n\n**Ing...
2,I want a Indian recipe.,**Recipe Name**: Masala Karela Recipe\n\n**Ing...
3,Show me a recipe for Indian cuisine.,**Recipe Name**: Masala Karela Recipe\n\n**Ing...
4,Show me a recipe that takes 45 minutes to cook.,**Recipe Name**: Masala Karela Recipe\n\n**Ing...


In [35]:

#convert data to Hugging Face dataset
dataset = Dataset.from_pandas(train_df)

# Load GPT-2 tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")

# Add a pad token to tokenizer
tokenizer.pad_token = tokenizer.eos_token

# Tokenize the dataset
def tokenize_function(examples):
    max_len = 2048  # Increased length for longer inputs
    inputs = tokenizer(examples['question'], padding="max_length", truncation=True, max_length=max_len)
    outputs = tokenizer(examples['answer'], padding="max_length", truncation=True, max_length=max_len)
    inputs['labels'] = outputs['input_ids']
    inputs['attention_mask'] = inputs['attention_mask']  # Include attention mask
    return inputs

tokenized_dataset = dataset.map(tokenize_function, batched=True)

small_dataset = tokenized_dataset.select(range(100))  # Use the first 100 examples

# Split the dataset into training and evaluation datasets
train_test_split = small_dataset.train_test_split(test_size=0.1)
train_dataset = train_test_split['train']
eval_dataset = train_test_split['test']

# Set training arguments
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",  
    save_strategy="epoch",       
    load_best_model_at_end=True,  
    metric_for_best_model="eval_loss",    
    greater_is_better=False,      
    logging_dir="./logs",         
    logging_steps=100,            
    save_steps=1000,              
    save_total_limit=3,           
    per_device_train_batch_size=2, 
    gradient_accumulation_steps=8, 
    learning_rate=2e-5,
    num_train_epochs=5,  
    weight_decay=0.01,
)

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
)

# Train the model
trainer.train()



Map:   0%|          | 0/65318 [00:00<?, ? examples/s]

  0%|          | 0/25 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [34]:
import torch

# Save the fine-tuned model and tokenizer
model.save_pretrained("./fine_tuned_meal_plan_model")
tokenizer.save_pretrained("./fine_tuned_meal_plan_model")

# Detect the device (CUDA, MPS for Apple, or CPU)
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
model = model.to(device)

def test_model(model, tokenizer):
    
    tokenizer.pad_token_id = tokenizer.eos_token_id

    test_queries = [
        "What can I cook with potatoes?",
        "Give me a Chinese recipe.",
        "show me an image of a Masala Karela Recipe.",
        "Show me a quick Italian meal recipe.",
        "I want a meal with rice and beans.",
        "Show me a vegan recipe.",
        "Give me a recipe without peanuts.",
        "Show me a recipe that takes 30 minutes to cook."
    ]
    
    for query in test_queries:
        # Move input data to the model's device (CUDA, MPS, or CPU)
        inputs = tokenizer(query, return_tensors='pt', padding=True, truncation=True)
        inputs = {key: value.to(device) for key, value in inputs.items()}
        
        outputs = model.generate(
            inputs['input_ids'], 
            attention_mask=inputs['attention_mask'], 
            max_length=1024, 
            do_sample=True, 
            top_k=50,  
            top_p=0.95,  
            temperature=0.7,  
            no_repeat_ngram_size=2,  # To avoid repetition
            #pad_token_id=tokenizer.eos_token_id  # Set pad token explicitly
        )
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        print(f"Query: {query}")
        print(f"Response: {response}\n")
        
# Run the test_model function with the fine-tuned model
test_model(model, tokenizer)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Query: What can I cook with potatoes?
Response: What can I cook with potatoes?


1 onion, chopped.

4 cloves garlic, finely chopped
... 1 large onion finely sliced. finely cut garlic. cut onions. thin. onion. mix with onion ( make onion mixture. the onion and garlic mixture to form a gravy. it is a good gravy of onions like the kadhi, the onions are also made in gravy, onion is also make a curry. stir fry onion paste, add onion oil and onions and fry. onions should heat, when onions will fry add onions, cook onions till onions cooked. add water to make salt. salt, to fry onions gravy to onions onions can also use gravy like kal dhi. when they are cooked, gravy gravy as curry is gravy and gravy is made, it that gravy can use curry paste. taste. serve, salt the gravy make gravy onions is it gravy curry onion in curry recipe. maddhi in recipe for gravy recipe, serve in kaddi. kkh kodhi to curry, curry to kh. curry krish. ( it make curry gravy or gravy for khen) kakk, kya, jin. fry the pot

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Query: Give me a Chinese recipe.
Response: Give me a Chinese recipe.

In a large bowl, mix together the eggs and rice vinegar. Stir in the water. Add the egg mixture to the cooked rice and stir. The water will thicken the rice. I added it to it and it was so good. And then I chopped it. Now I put it in with the flour. stir it it then to stir the ground flour then add the powder. mix well it is to mix this with it a little. and then it will add it rice with powder and that is the. it with. rice of rice it can that be added or that the will mix it all together. this rice, rice rice recipe will also. make the ingredients. cooking.



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Query: show me an image of a Masala Karela Recipe.
Response: show me an image of a Masala Karela Recipe. You can use the image image to make your recipe. Print Recipe




Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Query: Show me a quick Italian meal recipe.
Response: Show me a quick Italian meal recipe.

Ingredients:
- 1 cup cooked cilantro
 - 2 cups water
 – 1/4 cup cumin seeds
*
1 tsp cayenne powder
 1 tsp garlic powder 1 1.1
 pumin powder - 1 pinch cary seeds of c c
 seeds -1 1 c. salt
2 tbsp c oregano 1 tbsp finely chopped c parsley 1 onion - chopped
juice 3 onions - finely sliced 3 onion(chopped) - cut 1 - ½ - 5 garlic cloves - minced 2 - 4 onion, chopped chopped salt 2 garlic - sauté 2 onions in cooking water for 1 hour. 3 1 = 2 hours 2 = 1 hours salt - for 4 to 8 onions, finely cut onions to 1 teaspoon in salt 1 salt-1 teaspoon 1 pepper 1 to 2 salt or salt to the onions (optional)
(optional, if you are to make use with c, c and cy and add a dash c to add to taste. 1 : 1 is a lot for the taste, 3 the same. and so the salt and salt will give it a nice taste after cooking. to mix salt in onions or add salt salt with 2 teaspoon c o c salt. or to a pinch salt c of salt for added in some to it 

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Query: I want a meal with rice and beans.
Response: I want a meal with rice and beans. I want food with onions and potatoes.

I cook rice with onion and potato and saute onions. You can also cook curry rice, I use a lot of onions with a saut. rice. And as for spices, the curry will be spicy. the rice is not cooked with spices.



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Query: Show me a vegan recipe.
Response: Show me a vegan recipe.

1/2 cup vegetable broth
, (you can use any vegetable for your broth, or add the broth in small pieces if it is not hot. it will make it easier to make)
 1/4 cup rice water
 ( or broth will add rice broth a very fast and thick rice and rice. rice is much better. you can omit the rice, rice rice soup is to slow cooker, make rice with rice flour. just add more rice in the soup rice will become very thin and smooth rice like rice can be served and add in soup. 1 cup water if you added rice to broth but rice it wont water. broth of rice
 to soup with the water, 1.1 cup soup to rice 4.4



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Query: Give me a recipe without peanuts.
Response: Give me a recipe without peanuts. I think it is not as good as the banana curry.
Ingredients:
1 cup coconut milk,

2 cups coconut oil, 1 cup chopped nuts, or to taste


Query: Show me a recipe that takes 30 minutes to cook.
Response: Show me a recipe that takes 30 minutes to cook.

