## Recipe Chatbot

###Secrets and imports

In [1]:
!ls # check location

context_texts.json  formatted_recipes.json  recipe_index.faiss
drive		    recipe_ids.json	    sample_data


In [None]:
# get into right folder, make sure connected to git
%cd /content/drive/MyDrive/recipe-chatbot
!git status

In [26]:
!git pull origin main

remote: Enumerating objects: 31, done.[K
remote: Counting objects:   3% (1/31)[Kremote: Counting objects:   6% (2/31)[Kremote: Counting objects:   9% (3/31)[Kremote: Counting objects:  12% (4/31)[Kremote: Counting objects:  16% (5/31)[Kremote: Counting objects:  19% (6/31)[Kremote: Counting objects:  22% (7/31)[Kremote: Counting objects:  25% (8/31)[Kremote: Counting objects:  29% (9/31)[Kremote: Counting objects:  32% (10/31)[Kremote: Counting objects:  35% (11/31)[Kremote: Counting objects:  38% (12/31)[Kremote: Counting objects:  41% (13/31)[Kremote: Counting objects:  45% (14/31)[Kremote: Counting objects:  48% (15/31)[Kremote: Counting objects:  51% (16/31)[Kremote: Counting objects:  54% (17/31)[Kremote: Counting objects:  58% (18/31)[Kremote: Counting objects:  61% (19/31)[Kremote: Counting objects:  64% (20/31)[Kremote: Counting objects:  67% (21/31)[Kremote: Counting objects:  70% (22/31)[Kremote: Counting objects:  74% (23/31)[Kr

In [2]:
from google.colab import userdata

git_token = userdata.get('git-token')
hf_token = userdata.get('hf-token')

In [None]:
# !pip install -U transformers sentence-transformers huggingface_hub peft

In [3]:
from huggingface_hub import login
login(token=hf_token)

import json
import numpy as np
import faiss
import torch
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline


In [4]:
if not torch.cuda.is_available():
  print("Runtime not set to GPU")
else:
  print("good")

good


### Embeddings

In [8]:
# choosing embedding model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

In [5]:
# fns

# given a recipe, get an embedding of it for search
def get_recipe_emb(recipe, model):
    title = recipe["title"]
    category = recipe.get("category", "")
    tags = ", ".join(recipe["tags"]) if recipe["tags"][0] else ""
    desc = recipe.get("description", "")
    ingredients = ", ".join(recipe["ingredients"])
    search_text = f"{title} - {category} - {tags} - {desc} - {ingredients}"
    return model.encode(search_text)

# given a recipe, get the context that will be returned if chosen by RAG
def get_recipe_context(recipe):
    title = recipe["title"]
    tags = ", ".join(recipe["tags"]) if recipe["tags"][0] else ""
    desc = recipe.get("description", "")
    ingredients = ", ".join(recipe["ingredients"])
    steps = ", ".join(recipe["steps"])
    return f"Title: {title}\nDescription: {desc}\nTags: {tags}\nIngredients: {ingredients}\nSteps: {steps}"

# Given a model and dataset of recipes, save the recipes' IDs,
# context texts, and indexed search vectors (embeddings)
def save_embeddings(model, recipes_path="/content/formatted_recipes.json"):
    with open(recipes_path) as f:
        recipes = json.load(f)
    recipe_ids, search_vecs, context_texts = [], [], []
    for idx, recipe in enumerate(recipes):
        recipe_ids.append(idx)
        search_vecs.append(get_recipe_emb(recipe, model))
        context_texts.append(get_recipe_context(recipe))
    emb_matrix = np.array(search_vecs).astype("float32")
    index = faiss.IndexFlatL2(emb_matrix.shape[1])
    index.add(emb_matrix)
    faiss.write_index(index, "/content/recipe_index.faiss")
    with open("/content/context_texts.json", "w") as f:
        json.dump(context_texts, f)
    with open("/content/recipe_ids.json", "w") as f:
        json.dump(recipe_ids, f)

# Get top recipes in terms of similarity to user's query
def get_top_recipes(model, user_query, n_recipes=3):
    encoded_query = model.encode([user_query])
    index = faiss.read_index("/content/recipe_index.faiss")
    D, I = index.search(np.array(encoded_query, dtype="float32"), k=n_recipes)
    with open("/content/context_texts.json") as f:
        context_texts = json.load(f)
    with open("/content/formatted_recipes.json") as f:
        recipes = json.load(f)
    return [context_texts[i] for i in I[0]]

# Create a prompt using the user query and the top recipe context(s)
def create_prompt(user_query, top_contexts):
    return f"""<s>[INST]
You are a friendly cooking assistant that helps people find great recipes based on their needs.

User query:
"{user_query}"

Here are some recipes you can consider:
===RECIPE START===
{top_contexts[0]}
===RECIPE END===
===RECIPE START===
{top_contexts[1]}
===RECIPE END===
===RECIPE START===
{top_contexts[2]}
===RECIPE END===

Now write a helpful recommendation for one of these recipes, explaining why it
meets the user's request.
At the end, include the recipe title, ingredients, and steps.
[/INST]"""

In [6]:
# load chatbot model
model_name = "mistralai/Mistral-7B-Instruct-v0.3"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.float16
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]



In [9]:
# test
user_query = "I'd like something with mushrooms - what do you recommend?"
top_contexts = get_top_recipes(embedding_model, user_query)
prompt = create_prompt(user_query, top_contexts)

inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
output = model.generate(
    **inputs,
    max_new_tokens=500,
    do_sample=True,
    top_p=0.9,
    temperature=0.7,
    repetition_penalty=1.1
)

print(tokenizer.decode(output[0], skip_special_tokens=True))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



You are a friendly cooking assistant that helps people find great recipes based on their needs.

User query:
"I'd like something with mushrooms - what do you recommend?"

Here are some recipes you can consider:
===RECIPE START===
Title: Homemade Cream of Mushroom Soup
Description: Make and share this Homemade Cream of Mushroom Soup recipe from Food.com.        
Tags: Low Protein, Winter, Savory, < 30 Mins, Stove Top, Easy
Ingredients: fresh mushrooms, onions, garlic clove, butter, flour, chicken broth, evaporated milk, salt, pepper, nutmeg
Steps: Cut the mushrooms into slices., Melt butter in large frying pan.  Add in onions, garlic, and mushrooms.  Cook until onions are soft., Blend in 2 T. flour and stir., Add in the chicken broth and heat until slightly thickened while stirring frequently., Stir cream with additional 1 T. flour and seasonings.  Add in cream to soup.  Heat to thicken while stirring frequently., Serve and enjoy!
===RECIPE END===
===RECIPE START===
Title: Classic Bake

In [15]:
# getting rid of the input query in the chatbot's response
full_output = tokenizer.decode(output[0], skip_special_tokens=True)
prompt_wo_inst = tokenizer.decode(inputs["input_ids"][0], skip_special_tokens=True)

if full_output.startswith(prompt_wo_inst):
    chatbot_response = full_output[len(prompt_wo_inst):].strip()
else:
    chatbot_response = full_output.strip()

print(chatbot_response)

Based on your preference for dishes with mushrooms, I would highly recommend the "Homemade Cream of Mushroom Soup" recipe. This soup features mushrooms as its main ingredient, making it an excellent choice for those who love the earthy flavor of mushrooms.

Recipe Title: Homemade Cream of Mushroom Soup

Ingredients:
- Fresh mushrooms
- Onions
- Garlic clove
- Butter
- Flour
- Chicken broth
- Evaporated milk
- Salt
- Pepper
- Nutmeg

Steps:
1. Cut the mushrooms into slices.
2. Melt butter in a large frying pan.
3. Add in onions, garlic, and mushrooms, and cook until the onions are soft.
4. Blend in 2 tablespoons of flour and stir.
5. Add in the chicken broth and heat until slightly thickened while stirring frequently.
6. Stir cream with an additional tablespoon of flour and seasonings.
7. Add in the cream to the soup and heat to thicken while stirring frequently.
8. Serve and enjoy!


### Looking into gradio

In [None]:
!pip install gradio


In [None]:
import gradio as gr

chat_history = []

def chatbot_interface(user_input):
    global chat_history

    # get top recipe contexts based on THIS user input
    # could maybe add previous user inputs?
    top_contexts = get_top_recipes(embedding_model, user_input)

    # create prompt w context and chat history
    prompt = create_prompt(user_input, top_contexts, chat_history)

    # get mistral response
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    output = model.generate(
        **inputs,
        max_new_tokens=150,
        do_sample=True,
        top_p=0.9,
        temperature=0.7,
        repetition_penalty=1.1
    )
    response = tokenizer.decode(output[0], skip_special_tokens=True).strip()

    # update chat history; return mistral response
    chat_history.append((user_input, response))
    return response

In [None]:
with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    msg = gr.Textbox(label="Ask for a recipe suggestion!")
    clear = gr.Button("Clear Chat")

    def user_submit(user_message, history):
        reply = chatbot_interface(user_message)
        history = history + [[user_message, reply]]
        return "", history

    msg.submit(user_submit, [msg, chatbot], [msg, chatbot])
    clear.click(lambda: None, None, chatbot, queue=False)

demo.launch()