In [None]:
!pip install langchain chromadb sentence-transformers
!pip install -U langchain-community
!pip install -q gradio
!pip install pypdf
!pip install -q transformers
!pip install streamlit



In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

In [None]:
def get_prompt_template(query):
    query_lower = query.lower()

    if any(word in query_lower for word in ["ingredient", "ingredients", "what do i need", "how much", "quantity"]):
        prompt_text = """
You are a helpful cooking assistant.

List the ingredients based on the recipe context below.

Use the format:
- Ingredient – Quantity (Unit)

Only include items explicitly mentioned in the recipe. No guessing.

Context:
{context}

Question: {question}
Answer:
"""
    elif any(word in query_lower for word in ["step", "instruction", "how to", "method", "procedure", "direction"]):
        prompt_text = """
You are a helpful cooking assistant.

List the steps for this recipe in clear order.

Use the format:
1. Do this
2. Do that

Use only what is stated in the recipe. Don’t guess.

Context:
{context}

Question: {question}
Answer:
"""
    elif any(word in query_lower for word in ["tool", "equipment", "utensil", "what do i need to cook"]):
        prompt_text = """
You are a helpful cooking assistant.

List the cooking tools or equipment mentioned in the recipe.

Only mention tools explicitly stated. Do not guess.

Context:
{context}

Question: {question}
Answer:
"""
    elif any(word in query_lower for word in ["time", "cook time", "prep time", "how long", "duration"]):
        prompt_text = """
You are a helpful cooking assistant.

Extract the cook time or prep time from the recipe, if available.

Do not guess. Just state the time if mentioned.

Context:
{context}

Question: {question}
Answer:
"""
    else:
        prompt_text = """
You are a helpful cooking assistant.

Answer the user's question using only the context from the recipe.

Be clear and concise. If the recipe doesn't answer the question, say you don't know.

Context:
{context}

Question: {question}
Answer:
"""

    return PromptTemplate.from_template(prompt_text)

In [None]:
# Load PDF
loader = PyPDFLoader("Coconut_Based_Fish_Gravy_Recipe.pdf")
pages = loader.load()

# Spliting Text into Chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=10)
docs = text_splitter.split_documents(pages)

# Fixing broken fraction units in chunk text
def clean_fractions(text):
    return (
        text.replace("12 tsp", "½ tsp")
            .replace("14 tsp", "¼ tsp")
            .replace("34 tsp", "¾ tsp")
            .replace("112 tsp", "1½ tsp")
            .replace("12 cup", "½ cup")
            .replace("14 cup", "¼ cup")
            .replace("34 cup", "¾ cup")
            .replace("112 cup", "1½ cup")
    )

# Apply fix to each chunk
for doc in docs:
    doc.page_content = clean_fractions(doc.page_content)

# Embedding
embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2", model_kwargs={'device': 'cpu'})

# Store in Vector DB
db = Chroma.from_documents(docs, embedding, persist_directory="chroma_db")

  embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2", model_kwargs={'device': 'cpu'})
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [None]:
# Loading the FLAN-T5 model
model_id = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSeq2SeqLM.from_pretrained(model_id)

# Setting up LLM pipeline
llm_pipeline = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=512,
    do_sample=False
)
llm = HuggingFacePipeline(pipeline=llm_pipeline)

# Creating retriever
retriever = db.as_retriever()

# Asking a question
query = "What are the ingredients for this recipe?"

# Dynamically choosing the prompt based on query type
prompt = get_prompt_template(query)

# Building the RetrievalQA pipeline with custom prompt
qa = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff",
    chain_type_kwargs={"prompt": prompt}
)

response = qa.run(query)
print(response)


Device set to use cuda:0
  llm = HuggingFacePipeline(pipeline=llm_pipeline)
  response = qa.run(query)
Token indices sequence length is longer than the specified maximum sequence length for this model (688 > 512). Running this sequence through the model will result in indexing errors


Ingredients: Fresh grated coconut, coconut milk, Shallots, Garlic, 5 cloves, Ginger, Green chilies, 2 Curry leaves, 1 sprig, Tomato, 1 medium, Tamarind, Gooseberry-sized ball in 14 cup warm water, Mustard seeds, 12 tsp, Fenugreek seeds, 14 tsp, Turmeric powder, 12 tsp, Coconut oil, 212 tbsp, Water, 1 cup


In [None]:
# Gradio interface

import gradio as gr

def answer_question(question):
    prompt = get_prompt_template(question)
    qa = RetrievalQA.from_chain_type(
        llm=llm,
        retriever=retriever,
        chain_type="stuff",
        chain_type_kwargs={"prompt": prompt}
    )
    return qa.run(question)

# aunch Gradio UI
gr.Interface(
    fn=answer_question,
    inputs=gr.Textbox(label="Ask about the recipe"),
    outputs=gr.Textbox(label="Answer"),
    title="Recipe Chatbot"
).launch()

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://3344dc89e79cf697d8.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


