In [None]:
pip install python-dotenv huggingface_hub transformers accelerate sentence-transformers faiss-cpu pandas



In [None]:
pip install python-dotenv huggingface_hub transformers accelerate sentence-transformers faiss-cpu pandas
from dotenv import load_dotenv
import os

load_dotenv()

HF_TOKEN = os.getenv("HF_TOKEN")
MODEL_ID = os.getenv("MODEL_ID", "google/gemma-2b-it")
TOP_K = int(os.getenv("TOP_K", 3))

# === Hugging Face Login ===
from huggingface_hub import login
login(token=HF_TOKEN)
# === Required Libraries ===
import pandas as pd
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline


# === Load and preprocess data ===
df = pd.read_csv("Detailed_Data.csv")

# Select the required columns
columns_to_use = [
    "Name", "Price", "Seller", "Seller Location",
    "Ships to (seller)", "Ships to (product)",
    "Category", "Quantity in stock", "Availability"
]

# Drop rows with any missing values in selected columns
filtered_df = df[columns_to_use].dropna()

# Convert each row into a formatted string for retrieval
entries = filtered_df.apply(
    lambda row: "\n".join(f"{col}: {row[col]}" for col in columns_to_use),
    axis=1
).tolist()

# === Create embeddings and FAISS index ===
embedding_model = SentenceTransformer("thenlper/gte-large")
embeddings = embedding_model.encode(entries, convert_to_numpy=True, normalize_embeddings=True)

dimension = embeddings.shape[1]
index = faiss.IndexFlatIP(dimension)
index.add(embeddings)

# === Load Gemma model ===
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    device_map="auto",
    torch_dtype="auto",
    token=HF_TOKEN
)

gen_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=150,
    temperature=0.7,
    do_sample=True
)

# === RAG query function ===
def rag_query(user_query, top_k=TOP_K):
    query_embedding = embedding_model.encode([user_query], convert_to_numpy=True, normalize_embeddings=True)
    distances, indices = index.search(query_embedding, top_k)

    retrieved_context = [entries[i] for i in indices[0]]
    context = "\n\n---\n\n".join(retrieved_context)

    prompt = f"""Answer the following question using the context provided below.

Context:
{context}

Question: {user_query}
Answer:"""

    response = gen_pipeline(prompt)[0]["generated_text"]

    return response

# # === Interactive CLI ===
# print("🔁 RAG Q&A Chat — type 'exit' to stop\n")

# while True:
#     user_input = input("🧠 Ask a question: ")

#     if user_input.strip().lower() in ["exit", "quit", "stop"]:
#         print("👋 Exiting chat.")
#         break

#     answer = rag_query(user_input, top_k=3)
#     print("\n💬 Answer:\n", answer, "\n" + "-"*60 + "\n")

In [None]:
from dotenv import load_dotenv
import os

load_dotenv()

HF_TOKEN = os.getenv("HF_TOKEN")
MODEL_ID = os.getenv("MODEL_ID", "google/gemma-2b-it")
TOP_K = int(os.getenv("TOP_K", 3))

# === Hugging Face Login ===
from huggingface_hub import login
login(token=HF_TOKEN)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [None]:
# === Required Libraries ===
import pandas as pd
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline


# === Load and preprocess data ===
df = pd.read_csv("Detailed_Data.csv")

# Select the required columns
columns_to_use = [
    "Name", "Price", "Seller", "Seller Location",
    "Ships to (seller)", "Ships to (product)",
    "Category", "Quantity in stock", "Availability"
]

# Drop rows with any missing values in selected columns
filtered_df = df[columns_to_use].dropna()

# Convert each row into a formatted string for retrieval
entries = filtered_df.apply(
    lambda row: "\n".join(f"{col}: {row[col]}" for col in columns_to_use),
    axis=1
).tolist()

# === Create embeddings and FAISS index ===
embedding_model = SentenceTransformer("thenlper/gte-large")
embeddings = embedding_model.encode(entries, convert_to_numpy=True, normalize_embeddings=True)

dimension = embeddings.shape[1]
index = faiss.IndexFlatIP(dimension)
index.add(embeddings)

# === Load Gemma model ===
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    device_map="auto",
    torch_dtype="auto",
    token=HF_TOKEN
)

gen_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=150,
    temperature=0.7,
    do_sample=True
)

# === RAG query function ===
def rag_query(user_query, top_k=TOP_K):
    query_embedding = embedding_model.encode([user_query], convert_to_numpy=True, normalize_embeddings=True)
    distances, indices = index.search(query_embedding, top_k)

    retrieved_context = [entries[i] for i in indices[0]]
    context = "\n\n---\n\n".join(retrieved_context)

    prompt = f"""Answer the following question using the context provided below.

Context:
{context}

Question: {user_query}
Answer:"""

    response = gen_pipeline(prompt)[0]["generated_text"]

    return response

# # === Interactive CLI ===
# print("🔁 RAG Q&A Chat — type 'exit' to stop\n")

# while True:
#     user_input = input("🧠 Ask a question: ")

#     if user_input.strip().lower() in ["exit", "quit", "stop"]:
#         print("👋 Exiting chat.")
#         break

#     answer = rag_query(user_input, top_k=3)
#     print("\n💬 Answer:\n", answer, "\n" + "-"*60 + "\n")

import gradio as gr

# Wrap the rag_query function
def gradio_rag_query(question):
    answer = rag_query(question, top_k=TOP_K)
    return answer

# Create Gradio Interface
iface = gr.Interface(
    fn=gradio_rag_query,
    inputs=gr.Textbox(lines=2, placeholder="Ask your question here..."),
    outputs="text",
    title="🔎 RAG Product Assistant",
    description="Ask any question about the products listed in your dataset!",
    theme="default"
)

# Launch it
iface.launch(share=True)  # share=True generates a public link

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/57.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/619 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/670M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/191 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/34.2k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/627 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/13.5k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/67.1M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

Device set to use cuda:0


In [None]:
import gradio as gr

# Wrap the rag_query function
def gradio_rag_query(question):
    answer = rag_query(question, top_k=TOP_K)
    return answer

# Create Gradio Interface
iface = gr.Interface(
    fn=gradio_rag_query,
    inputs=gr.Textbox(lines=2, placeholder="Ask your question here..."),
    outputs="text",
    title="🔎 RAG Product Assistant",
    description="Ask any question about the products listed in your dataset!",
    theme="default"
)

# Launch it
iface.launch(share=True)  # share=True generates a public link

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://1c04b3da146d5f5fcf.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


