<a href="https://colab.research.google.com/github/oliviasteeed/ChefGPT/blob/main/ChefGPT_FINAL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Chef GPT: IAT 360 Final Project
Olivia Steed & Welle Dias Ouambo

This project returns a recipe suggestion based on input available ingredients. It uses RAG retrieval to match the input to the closest recipe, and then a fine-tuned version of GPT2 to return the result in a recipe format.

### Import Dependencies

In [None]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments
from datasets import load_dataset, Dataset
import pandas as pd
import csv
import glob
from transformers import pipeline
from sentence_transformers import SentenceTransformer
import numpy as np
import ast

### Load data

In [None]:
# NOTE: This was intended to eliminate cell below, but embeddings from file do not work properly so you have to run cell below to run code


# open combined recipes dataframe csv - this is data for RAG to draw from (run if starting from a new runtime)

# df = pd.read_csv('combined_recipes_dataframe.csv')

# embedding_model = SentenceTransformer('all-MiniLM-L6-v2')  # use lightweight model for embedding
# embeddings = embedding_model.encode(df['Embedding'])  # load embeddings based on key ingredients

In [None]:
# open recipes dataset CSV and create embeddings for RAG retrieval

# download from https://github.com/oliviasteeed/ChefGPT/blob/main/combined_recipes.csv
recipes_csv = ('/Users/oliviasteed/Desktop/combined_recipes.csv') # change to your runtime path

recipe_corpus = []

with open(recipes_csv, 'r') as f:
    reader = csv.reader(f)
    next(reader)
    for row in reader:
        recipe_corpus.append(row)

# getting info from each column
title = [recipe_corpus[i][1] for i in range(len(recipe_corpus))]
all_ing = [recipe_corpus[i][2] for i in range(len(recipe_corpus))]
instr = [recipe_corpus[i][3] for i in range(len(recipe_corpus))]
key_ing = [recipe_corpus[i][-1] for i in range(len(recipe_corpus))]

cleaned_ingr = []
cleaned_instr = []
cleaned_key_ingr = []

# data cleaning to remove "" [] and make into one list
for i in range (len(recipe_corpus)):

  curr_recipe_list = ast.literal_eval(recipe_corpus[i][2])
  result = ', '.join(curr_recipe_list)
  cleaned_ingr.append(result)

  curr_instr_list = ast.literal_eval(recipe_corpus[i][3])
  result = ' '.join(curr_instr_list)
  cleaned_instr.append(result)

  curr_key_ingr_list = ast.literal_eval(recipe_corpus[i][-1])
  result = ', '.join(curr_key_ingr_list)
  cleaned_key_ingr.append(result)

embedding_model = SentenceTransformer('all-MiniLM-L6-v2')  # lightweight model for embedding
embeddings = embedding_model.encode(cleaned_key_ingr)  # generate embeddings based on key ingredients

# combine recipe dataset and ingredient keyword embeddings in a DataFrame
df = pd.DataFrame({
    "Title": title,
    "Ingredients": cleaned_ingr,
    "Instructions": cleaned_instr,
    "Embedding": list(embeddings)  # Store embeddings as lists for easier handling
})

Indexed Documents:
                      Title  \
0     Jewell Ball'S Chicken   
1               Creamy Corn   
2             Chicken Funny   
3      Reeses Cups(Candy)     
4  Cheeseburger Potato Soup   

                                         Ingredients  \
0  1 small jar chipped beef, cut up, 4 boned chic...   
1  2 (16 oz.) pkg. frozen corn, 1 (8 oz.) pkg. cr...   
2  1 large whole chicken, 2 (10 1/2 oz.) cans chi...   
3  1 c. peanut butter, 3/4 c. graham cracker crum...   
4  6 baking potatoes, 1 lb. of extra lean ground ...   

                                        Instructions  \
0  Place chipped beef on bottom of baking dish. P...   
1  In a slow cooker, combine all ingredients. Cov...   
2  Boil and debone chicken. Put bite size pieces ...   
3  Combine first four ingredients and press in 13...   
4  Wash potatoes; prick several times with a fork...   

                                           Embedding  
0  [-0.0052063875, 0.005931496, 0.057787675, 0.05...  
1  [-0.050

In [None]:
# preview dataframe

df

Unnamed: 0,Title,Ingredients,Instructions,Embedding
0,Jewell Ball'S Chicken,"1 small jar chipped beef, cut up, 4 boned chic...",Place chipped beef on bottom of baking dish. P...,"[-0.0052063875, 0.005931496, 0.057787675, 0.05..."
1,Creamy Corn,"2 (16 oz.) pkg. frozen corn, 1 (8 oz.) pkg. cr...","In a slow cooker, combine all ingredients. Cov...","[-0.05017694, -0.024463343, 0.038555004, 0.039..."
2,Chicken Funny,"1 large whole chicken, 2 (10 1/2 oz.) cans chi...",Boil and debone chicken. Put bite size pieces ...,"[-0.04555725, 0.011677907, 0.025108758, 0.0171..."
3,Reeses Cups(Candy),"1 c. peanut butter, 3/4 c. graham cracker crum...",Combine first four ingredients and press in 13...,"[-0.07629992, -0.06006803, 0.07658986, 0.04522..."
4,Cheeseburger Potato Soup,"6 baking potatoes, 1 lb. of extra lean ground ...",Wash potatoes; prick several times with a fork...,"[-0.015689377, -0.033844896, 0.02279221, -0.00..."
...,...,...,...,...
1083,"""Sesame-Scallion Chicken Salad""","2 skinless, boneless chicken breasts (1–11/2 l...","Bring chicken, 3 Tbsp. kosher salt, and 5 cups...","[-0.024737706, 0.0026643737, 0.05768343, 0.034..."
1084,"""Maple Barbecue Grilled Chicken""","2 tbsp. extra-virgin olive oil, 3 garlic clove...",Heat olive oil in a small saucepan over medium...,"[-0.07130347, -0.017758539, 0.031053178, 0.001..."
1085,"""Spiced Lamb and Dill Yogurt Pasta""","3 large egg yolks, 2 cups kefir (cultured milk...","Combine egg yolks, kefir, and 11/2 cups dill i...","[-0.059787862, -0.0047301687, 0.07131379, 0.04..."
1086,"""Salt-and-Pepper Fish""","1 cup short-grain sushi rice, 8 scallions, 1 (...",Rinse rice in several changes of cold water in...,"[-0.05912297, 0.01989312, 0.058753405, 0.04396..."


In [None]:
# load the fine-tuned GPT2 model

# download from https://1sfu-my.sharepoint.com/:u:/g/personal/osa12_sfu_ca/EXwsLHVUhHpFkHGNj5a9pjMBKL-2JEr7vFp0I9OUqGLc6w?e=PVWqzh

model = GPT2LMHeadModel.from_pretrained("/Users/oliviasteed/Desktop/test 2/gpt2_recipe_model")  # change to your runtime path
tokenizer = GPT2Tokenizer.from_pretrained("/Users/oliviasteed/Desktop/test 2/gpt2_recipe_tokenizer")  # change to your runtime path

### RAG retrieval and generate result

In [None]:
# RAG retrieval functions to get closest recipe and return it using llm

def retrieve_with_pandas(query, top_k=1):

    # generate embedding from query
    query_embedding = embedding_model.encode([query])[0]

   # compute similarity scores (cosine similarity)
    df['Similarity'] = df['Embedding'].apply(lambda x: np.dot(query_embedding, x) /
                                             (np.linalg.norm(query_embedding) * np.linalg.norm(x)))

    # we were using this to compute similarity scores from saved embeddings but they were not as accurate as code above
    # similarities = np.dot(embeddings, query_embedding) / (np.linalg.norm(embeddings, axis=1) * np.linalg.norm(query_embedding))
    # df['Similarity'] = similarities

    # sort by similarity and return top-k results
    results = df.sort_values(by="Similarity", ascending=False).head(top_k)
    return results[["Title", "Ingredients", "Similarity"]]


In [None]:
# function to generate recipe based on input ingredients

def generate_recipe(query):

    # prompt = f"Here's what you can make with: {ingredients} as key ingredients \n \nIngredients: {ingredients}\nInstructions:"

    context = retrieve_with_pandas(query)[["Title", "Ingredients"]]

    prompt = f"You can make {context['Title'].iloc[0]} with {query} as key ingredients \n\nIngredients: {context['Ingredients'].iloc[0]}. \n\nInstructions:"


    inputs = tokenizer(prompt, return_tensors="pt")

    outputs = model.generate(inputs["input_ids"], max_length=800, num_return_sequences=1, no_repeat_ngram_size=5)

    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    return generated_text


### Generate response from user input

Input what ingredients you have and closest recipe will be returned with RAG, and delivered using fine-tuned GPT2.

In [126]:
# try it out (1-2 common ingredients works best but feel free to have fun)

ingredients = "apple, sugar"

recipe = generate_recipe(ingredients)

print("\n*******")
print(recipe)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



*******
You can make Apple Pie with apple, sugar as key ingredients 

Ingredients: 6 to 7 apples, sliced, 1 c. sugar, 2 Tbsp. flour, 1 tsp. cinnamon, dash of salt and nutmeg. 

Instructions:      Cook apples in a large skillet over medium heat until tender, about 10 minutes.    Remove apples from skillet and set aside.     In a large bowl, combine flour, cinnamon, salt and nutmeg; set aside.  (Do not stir.)     Add apples to skillet and cook until apples are soft and apples are slightly browned, about 10 minutes or until apples are slightly brown.    Serve apples with whipped cream or whipped cream topping.    Recipe Notes:    Apple Pie Recipe:    Recipe:  
