<a href="https://colab.research.google.com/github/learnthusalearner/Minor_Project_7th/blob/main/Minor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [24]:
!pip install pandas sentence-transformers faiss-cpu transformers accelerate bitsandbytes gradio pyngrok




In [25]:
import pandas as pd
import faiss
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import gradio as gr
from pyngrok import ngrok
from google.colab import userdata

In [26]:
csv_file = "/content/sample_data/gita.csv"   # <-- your file
df = pd.read_csv(csv_file)
df["text"] = df.fillna("").astype(str).agg(" ".join, axis=1)

In [27]:
def chunk_text(s, size=500):
    return [s[i:i+size] for i in range(0, len(s), size)]

chunks = []
for i, row in df.iterrows():
    for c in chunk_text(row["text"]):
        chunks.append(c)

print(f"✅ Total text chunks: {len(chunks)}")

✅ Total text chunks: 4303


In [28]:
embedder = SentenceTransformer("all-MiniLM-L6-v2")
embs = embedder.encode(chunks, convert_to_numpy=True)

dim = embs.shape[1]
index = faiss.IndexFlatL2(dim)
index.add(embs)

print("✅ FAISS index ready!")

✅ FAISS index ready!


In [29]:
def retrieve(query, k=5):  # use more chunks
    q_emb = embedder.encode([query], convert_to_numpy=True)
    D, I = index.search(q_emb, k)
    return [chunks[i] for i in I[0]]

In [34]:
from transformers import BitsAndBytesConfig

model_id = "microsoft/Phi-3-mini-4k-instruct"

# Quantization config (replaces load_in_4bit)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16
)

tokenizer = AutoTokenizer.from_pretrained(model_id)
llm = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.float16,
    quantization_config=bnb_config
)

# ✅ Don't set device here because accelerate already placed it
chatbot = pipeline(
    "text-generation",
    model=llm,
    tokenizer=tokenizer
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0


In [46]:
def ask_csv(query, history=[]):
    context = "\n".join(retrieve(query))
    prompt = f"""
You are Lord Krishna, speaking only in the style of the Bhagavad Gita.
Answer briefly (3-5 sentences), with compassion and clarity.
Do NOT invent information — use only the context below.
If unsure, say: "I do not know, but seek within, and clarity shall arise."

Context:
{context}

Arjuna's Question: {query}
Krishna's Guidance:"""

    out = chatbot(prompt, max_new_tokens=200, do_sample=False, temperature=0.2, top_p=0.9)
    answer = out[0]["generated_text"].split("Krishna's Guidance:")[-1].strip()
    return answer


In [47]:
with gr.Blocks() as demo:
    gr.Markdown("# 📊 CSV Chatbot (Free LLM + GPU)")
    chatbot_ui = gr.ChatInterface(chat_fn)

ngrok_auth_token = userdata.get("NGROK_AUTH_TOKEN")
ngrok.set_auth_token(ngrok_auth_token)

  self.chatbot = Chatbot(


In [53]:
app = FastAPI()

class QueryRequest(BaseModel):
    question: str

@app.post("/ask")
def ask_endpoint(req: QueryRequest):
    answer = ask_csv(req.question)
    return {"question": req.question, "answer": answer}


In [56]:
nest_asyncio.apply()  # Needed for Colab notebooks

def run_fastapi():
    uvicorn.run(app, host="0.0.0.0", port=PORT)

# Run FastAPI in background thread
threading.Thread(target=run_fastapi, daemon=True).start()


In [None]:
from pyngrok import ngrok
from fastapi import FastAPI
from fastapi.responses import JSONResponse
import uvicorn
import gradio as gr

# ✅ FastAPI app
app = FastAPI()

# ✅ API endpoint for chatbot
@app.get("/ask")
def ask_endpoint(q: str):
    answer = ask_csv(q)
    return JSONResponse({"query": q, "answer": answer})

# ✅ Mount Gradio UI at /ui
app = gr.mount_gradio_app(app, demo, path="/ui")

# ✅ Ngrok tunnel
ngrok.kill()
public_url = ngrok.connect(1111)
print("🌍 Public URL:", public_url.public_url)
print("📡 API endpoint:", public_url.public_url + "/ask?q=What is dharma?")
print("🎨 Gradio UI:", public_url.public_url + "/ui")

# ✅ Run server
uvicorn.run(app, host="0.0.0.0", port=1111)


🌍 Public URL: https://43e7dd63c1f9.ngrok-free.app
📡 API endpoint: https://43e7dd63c1f9.ngrok-free.app/ask?q=What is dharma?
🎨 Gradio UI: https://43e7dd63c1f9.ngrok-free.app/ui


INFO:     Started server process [549]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:1111 (Press CTRL+C to quit)


INFO:     103.161.223.11:0 - "GET /ui HTTP/1.1" 307 Temporary Redirect
INFO:     103.161.223.11:0 - "GET /ui/ HTTP/1.1" 200 OK
INFO:     103.161.223.11:0 - "GET /ui/assets/index-BIiu1kG0.css HTTP/1.1" 200 OK
INFO:     103.161.223.11:0 - "GET /ui/assets/index-C1MLra31.js HTTP/1.1" 200 OK
INFO:     103.161.223.11:0 - "GET /ui/assets/svelte/svelte.js HTTP/1.1" 200 OK
INFO:     103.161.223.11:0 - "GET /ui/assets/Index-C6djDJnG.js HTTP/1.1" 200 OK
INFO:     103.161.223.11:0 - "GET /ui/assets/prism-python-Dl7Tytau.js HTTP/1.1" 200 OK
INFO:     103.161.223.11:0 - "GET /ui/assets/MarkdownCode-xMSYSvAD.css HTTP/1.1" 200 OK
INFO:     103.106.200.60:0 - "GET /ui/ HTTP/1.1" 200 OK
INFO:     103.161.223.11:0 - "GET /ui/assets/IconButton-BYr8xyoS.js HTTP/1.1" 200 OK
INFO:     103.161.223.11:0 - "GET /ui/assets/StreamingBar.svelte_svelte_type_style_lang-DyfAPhwM.js HTTP/1.1" 200 OK
INFO:     103.161.223.11:0 - "GET /ui/assets/DownloadLink-CqD3Uu0l.css HTTP/1.1" 200 OK
INFO:     103.161.223.11:0 - "GE

The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


INFO:     103.161.223.11:0 - "GET /ask?q=What%20is%20dharma? HTTP/1.1" 200 OK


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


INFO:     103.106.200.60:0 - "GET /ask?q=What%20is%20dharma? HTTP/1.1" 200 OK
