# Dependencies

In [1]:
!pip install -U langchain-community



In [2]:
!pip install langchain openai chromadb tiktoken qdrant-client streamlit pyngrok


Collecting protobuf<6,>=3.20 (from streamlit)
  Using cached protobuf-5.29.3-cp38-abi3-manylinux2014_x86_64.whl.metadata (592 bytes)
Using cached protobuf-5.29.3-cp38-abi3-manylinux2014_x86_64.whl (319 kB)
Installing collected packages: protobuf
  Attempting uninstall: protobuf
    Found existing installation: protobuf 3.20.3
    Uninstalling protobuf-3.20.3:
      Successfully uninstalled protobuf-3.20.3
Successfully installed protobuf-5.29.3


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Data Load & Pre-Processing

In [4]:
import pandas as pd
import re

# Define paths
df_path = "/content/drive/MyDrive/Colab Notebooks/RecipeRecommender/recipes.csv"
df = pd.read_csv(df_path, keep_default_na=False)

vector_columns = [
    "Images",
    "Keywords",
    "RecipeIngredientQuantities",
    "RecipeIngredientParts",
    "RecipeInstructions"
]

for col in vector_columns:
    df[col] = df[col].apply(lambda cell:
        # If cell is a string and equals "NA", "character(0)" or is empty, return an empty list
        [] if (isinstance(cell, str) and cell.strip() in ["NA", "character(0)", ""])
        # Otherwise, if it matches the pattern c("..."), extract the inner text, split on '", "'
        else ([item.strip() for item in re.match(r'^c\("(.+)"\)$', cell.strip(), re.DOTALL).group(1).split('", "')]
              if (isinstance(cell, str) and re.match(r'^c\("(.+)"\)$', cell.strip(), re.DOTALL))
              # If it doesn't match the pattern, just return the cell as-is
              else cell)
    )

numeric_columns = [
    "AggregatedRating", "ReviewCount", "Calories", "FatContent",
    "SaturatedFatContent", "CholesterolContent", "SodiumContent",
    "CarbohydrateContent", "FiberContent", "SugarContent", "ProteinContent"
]

for col in numeric_columns:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# List of common non-vegetarian keywords (in lower case)
non_veg_keywords = ['chicken', 'beef', 'pork', 'lamb', 'fish', 'turkey', 'shrimp', 'bacon', 'meat']

df['is_vegetarian'] = df['RecipeIngredientParts'].apply(
    lambda parts: False if isinstance(parts, list) and any(keyword in " ".join(parts).lower() for keyword in non_veg_keywords) else True
)
print("Dataset shape:", df.shape)

Dataset shape: (522517, 29)


In [5]:
df.columns

Index(['RecipeId', 'Name', 'AuthorId', 'AuthorName', 'CookTime', 'PrepTime',
       'TotalTime', 'DatePublished', 'Description', 'Images', 'RecipeCategory',
       'Keywords', 'RecipeIngredientQuantities', 'RecipeIngredientParts',
       'AggregatedRating', 'ReviewCount', 'Calories', 'FatContent',
       'SaturatedFatContent', 'CholesterolContent', 'SodiumContent',
       'CarbohydrateContent', 'FiberContent', 'SugarContent', 'ProteinContent',
       'RecipeServings', 'RecipeYield', 'RecipeInstructions', 'is_vegetarian'],
      dtype='object')

In [6]:
df_veg = df[df['is_vegetarian']].copy()

print("Vegetarian Recipes DataFrame Shape:", df_veg.shape)

Vegetarian Recipes DataFrame Shape: (385467, 29)


In [7]:
if 'embedding_text' not in df_veg.columns:
    print("Creating 'embedding_text' column...")
    df_veg['embedding_text'] = (
        df_veg['Name'].fillna('') + " " +
        df_veg['Description'].fillna('') + " " +
        df_veg['Keywords'].apply(lambda x: " ".join(x) if isinstance(x, list) else '') + " " +
        df_veg['RecipeIngredientParts'].apply(lambda x: " ".join(x) if isinstance(x, list) else '') + " " +
        df_veg['RecipeInstructions'].apply(lambda x: " ".join(x) if isinstance(x, list) else '')
    )
    # Append nutritional information to the embedding text
    df_veg['embedding_text'] += " Nutrition: " + \
        "Calories: " + df_veg['Calories'].astype(str) + ", " + \
        "Fat: " + df_veg['FatContent'].astype(str) + ", " + \
        "Saturated Fat: " + df_veg['SaturatedFatContent'].astype(str) + ", " + \
        "Cholesterol: " + df_veg['CholesterolContent'].astype(str) + ", " + \
        "Sodium: " + df_veg['SodiumContent'].astype(str) + ", " + \
        "Carbs: " + df_veg['CarbohydrateContent'].astype(str) + ", " + \
        "Fiber: " + df_veg['FiberContent'].astype(str) + ", " + \
        "Sugar: " + df_veg['SugarContent'].astype(str) + ", " + \
        "Protein: " + df_veg['ProteinContent'].astype(str)
else:
    print("'embedding_text' column already exists.")

# Optional: Verify the column was created
print("Columns in df_veg:", df_veg.columns.tolist())

Creating 'embedding_text' column...
Columns in df_veg: ['RecipeId', 'Name', 'AuthorId', 'AuthorName', 'CookTime', 'PrepTime', 'TotalTime', 'DatePublished', 'Description', 'Images', 'RecipeCategory', 'Keywords', 'RecipeIngredientQuantities', 'RecipeIngredientParts', 'AggregatedRating', 'ReviewCount', 'Calories', 'FatContent', 'SaturatedFatContent', 'CholesterolContent', 'SodiumContent', 'CarbohydrateContent', 'FiberContent', 'SugarContent', 'ProteinContent', 'RecipeServings', 'RecipeYield', 'RecipeInstructions', 'is_vegetarian', 'embedding_text']


# RAG

In [8]:
import os
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

# Assuming df_veg is your DataFrame and already defined:
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
texts = df_veg['embedding_text'].tolist()
metadata = df_veg[['Name', 'Calories', 'FatContent', 'SaturatedFatContent',
                   'CholesterolContent', 'SodiumContent', 'CarbohydrateContent',
                   'FiberContent', 'SugarContent', 'ProteinContent']].to_dict(orient="records")

ids = df_veg['RecipeId'].astype(str).tolist()

db = Chroma.from_texts(
    texts=texts,
    embedding=embeddings,
    metadatas=metadata,
    ids=ids,
    collection_name="veg_recipes",
    persist_directory="./chroma_db"
)

  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [9]:
# Define a sample query—this can be replaced with user input.
user_query = "I have spinach, tomatoes, and garlic. What vegetarian recipe can I make?"

# Perform a similarity search against the vector store.
# k determines how many top matching recipes to return.
results = db.similarity_search(user_query, k=3)

# Display the results.
print("Top matching recipes based on your query:")
for i, result in enumerate(results, start=1):
    # Print the recipe name (from metadata) and a snippet of the embedding text.
    recipe_name = result.metadata.get('Name', 'No Name')
    snippet = result.page_content[:300]  # Show first 300 characters for brevity
    print(f"\nRecipe {i}: {recipe_name}")
    print(snippet)

Top matching recipes based on your query:

Recipe 1: Sun-Dried Tomato Spinach Rice Pilaf
Sun-Dried Tomato Spinach Rice Pilaf This is fabulous with fish and/or chicken.  It can easily be made vegetarian by using vegetable broth instead of chicken.  It's quick, easy and was a hit with my family.  Hope you enjoy. Rice < 60 Mins Beginner Cook Easy olive oil onion garlic clove sun-dried toma

Recipe 2: Sicilian Spinach Sauce
Sicilian Spinach Sauce One of our favorite vegan pasta sauces (also good over baked potatoes, leftover lentils, cooked grains, etc); from the Lean, Lucious, and Meatless cookbook. I usually add flax (sometimes ground, sometimes whole) for extra nutrition that goes unseen. Especially good when fresh 

Recipe 3: Braised Spinach & Garlic
Braised Spinach & Garlic Make and share this Braised Spinach & Garlic recipe from Food.com. Vegan Spicy < 30 Mins Stove Top Easy spinach kale vegan soy sauce garlic Slice greens into small ribbons. Bring l 1/2 cups water and soy sauce to a

# LLaMA 2

In [None]:
import os
import time
import streamlit as st
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch
from langchain.llms import HuggingFacePipeline

# ---------------------------
# Streamlit Page Configuration
# ---------------------------
st.set_page_config(page_title="Vegetarian Recipe Chatbot", page_icon="🥗", layout="wide")

# Custom CSS for better UI
st.markdown("""
<style>
.stTextInput > div > div > input {
    caret-color: #4CAF50;
}
.stButton > button {
    background-color: #4CAF50;
    color: white;
    border-radius: 20px;
}
.chat-message {
    padding: 1.5rem;
    border-radius: 0.5rem;
    margin-bottom: 1rem;
    display: flex;
}
.chat-message.user {
    background-color: #2b313e;
}
.chat-message.bot {
    background-color: #475063;
}
.chat-message .avatar {
  width: 20%;
}
.chat-message .avatar img {
  max-width: 78px;
  max-height: 78px;
  border-radius: 50%;
  object-fit: cover;
}
.chat-message .message {
  width: 80%;
  padding: 0 1.5rem;
  color: #fff;
}
</style>
""", unsafe_allow_html=True)

# ---------------------------
# Session State Initialization
# ---------------------------
if 'messages' not in st.session_state:
    st.session_state['messages'] = []
if 'user_input' not in st.session_state:
    st.session_state['user_input'] = ""

# ---------------------------
# Load Embeddings & Vector Store
# ---------------------------
@st.cache_resource
def load_vectorstore():
    embedding_model_name = "all-MiniLM-L6-v2"
    embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)
    persist_directory = "./chroma_db"
    return Chroma(
        persist_directory=persist_directory,
        embedding_function=embeddings,
        collection_name="veg_recipes"
    )

db = load_vectorstore()

# ---------------------------
# Set Up Chat LLM & Retrieval
# ---------------------------
# Set Hugging Face API token
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "HUGGINGFACEHUB_API_TOKEN"

# Load Hugging Face Model
model_name = "meta-llama/Llama-2-7b-chat-hf"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token="HUGGINGFACEHUB_API_TOKEN")
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto",
    use_auth_token="HUGGINGFACEHUB_API_TOKEN"
)

# Create text generation pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=300)

# Use HuggingFacePipeline as the LLM
llm = HuggingFacePipeline(pipeline=pipe)

# ✅ Fixed: Use `PromptTemplate`
prompt_template = PromptTemplate(
    input_variables=["context", "question"],  # Explicitly define input variables
    template="""
    You are a helpful assistant specialized in vegetarian recipes.
    Use the provided reference information to answer the user's query.

    Recipe Reference:
    {context}

    User's Ingredients:
    {question}

    Answer:
    """
)

# Build the retriever (fetch top 3 relevant documents)
retriever = db.as_retriever(search_kwargs={"k": 3})

# ✅ Apply fixed prompt template
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    chain_type_kwargs={"prompt": prompt_template}
)

def get_response(user_input: str) -> str:
    """
    Retrieves a response from the LLM based on user input.
    Handles cases where no relevant documents are retrieved.
    """
    docs = retriever.get_relevant_documents(user_input)

    if not docs:  # If no relevant recipes are found
        return "I'm sorry, I couldn't find a matching recipe. Try using different ingredients!"

    return qa_chain.run({"question": user_input, "context": "\n\n".join([d.page_content for d in docs])}).strip()

# ---------------------------
# Chat Interface (Streamlit UI)
# ---------------------------
st.title("🥗 Vegetarian Recipe Chatbot 🍲")

# Display previous chat messages
for message in st.session_state.messages:
    with st.container():
        st.markdown(f"""
        <div class="chat-message {message['role']}">
            <div class="avatar">
                <img src="https://i.imgur.com/{'Rnk6Qju' if message['role'] == 'user' else 'Kkj5ypf'}.png">
            </div>
            <div class="message">{message['content']}</div>
        </div>
        """, unsafe_allow_html=True)

# User input field
user_input = st.text_input("What ingredients do you have?", key="user_input")

if user_input:
    st.session_state.messages.append({"role": "user", "content": user_input})
    with st.spinner("Thinking..."):
        bot_response = get_response(user_input)
    st.session_state.messages.append({"role": "bot", "content": bot_response})
    st.experimental_set_query_params(dummy=str(time.time()))  # ✅ Forces UI refresh

# ✅ Fix: Use a function instead of `st.rerun()`
def clear_chat():
    st.session_state.messages = []
    st.session_state.user_input = ""
    st.experimental_set_query_params(dummy=str(time.time()))  # ✅ Forces UI refresh

st.button("Clear Chat History", on_click=clear_chat)



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0


False

In [None]:
!pip install streamlit pyngrok -q

import os
import time
from pyngrok import ngrok

# Kill any existing ngrok tunnels
os.system("pkill ngrok")

# Set ngrok authentication token
NGROK_AUTH_TOKEN = "HUGGINGFACEHUB_API_TOKEN"  # Replace with your actual token
ngrok.set_auth_token(NGROK_AUTH_TOKEN)

# Start Streamlit
os.system("nohup streamlit run app.py --server.headless true --server.enableCORS false --server.enableXsrfProtection false > streamlit.log 2>&1 &")

time.sleep(10)  # Give Streamlit time to start

# Connect ngrok to Streamlit
public_url = ngrok.connect(8502)
print(f"🔥 Streamlit App is Live! Click here: {public_url}")

# Keep the tunnel open
while True:
    time.sleep(100)

🔥 Streamlit App is Live! Click here: NgrokTunnel: "https://c5dd-35-236-135-186.ngrok-free.app" -> "http://localhost:8502"




KeyboardInterrupt: 