<a href="https://colab.research.google.com/github/santanamnaa/Smart-Chatbot-with-Retrieval-Augmented-Generation/blob/main/chatbot_RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip -q install fastapi uvicorn nest_asyncio pyngrok pydantic python-dotenv \
               mysql-connector-python sentence-transformers torch transformers \
               certifi pandas


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m33.9/33.9 MB[0m [31m54.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import os

os.environ["TIDB_HOST"] = "gateway01.ap-southeast-1.prod.aws.tidbcloud.com"
os.environ["TIDB_PORT"] = "4000"
os.environ["TIDB_USER"] = "H9XdAvETFW3AD85.root"
os.environ["TIDB_PASSWORD"] = "07Ma0D4AgitXUH4w"
os.environ["TIDB_DB"] = "RAG"

os.environ["HF_MODEL"] = "deepseek-ai/deepseek-coder-1.3b-instruct"


In [None]:
import os, certifi, mysql.connector

def get_db():
    return mysql.connector.connect(
        host=os.getenv("TIDB_HOST"),
        port=int(os.getenv("TIDB_PORT", "4000")),
        user=os.getenv("TIDB_USER"),
        password=os.getenv("TIDB_PASSWORD"),
        database=os.getenv("TIDB_DB", "RAG"),
        ssl_ca=certifi.where(),
        ssl_verify_cert=True,
        ssl_verify_identity=True,
    )

In [None]:
import json
from typing import List, Dict
from sentence_transformers import SentenceTransformer

EMBED_DIM = 1024
_embedder = SentenceTransformer("BAAI/bge-m3")

def encode_texts(texts: List[str]) -> List[List[float]]:
    return _embedder.encode(texts, normalize_embeddings=True).tolist()

def insert_documents(db, texts: List[str]) -> int:
    embs = encode_texts(texts)
    rows = [(t, json.dumps(e)) for t, e in zip(texts, embs)]
    sql = "INSERT IGNORE INTO documents (text, embedding) VALUES (%s, CAST(%s AS VECTOR(1024)))"
    with db.cursor() as cur:
        cur.executemany(sql, rows)
    db.commit()
    return len(rows)

def search_documents(db, query: str, k: int = 5) -> List[Dict]:
    qvec = encode_texts([query])[0]
    sql = f"""
      SELECT text, vec_cosine_distance(embedding, CAST(%s AS VECTOR({EMBED_DIM}))) AS distance
      FROM documents
      ORDER BY distance
      LIMIT %s
    """
    with db.cursor() as cur:
        cur.execute(sql, (json.dumps(qvec), int(k)))
        rows = cur.fetchall()
    return [{"text": t, "distance": float(d)} for (t, d) in rows]


In [None]:
import pandas as pd
from google.colab import files

uploaded = files.upload()
df = pd.read_csv("data_knowledge.csv", dtype=str)

texts = (df["question"].fillna("") + " " + df["answer"].fillna("")).tolist()
db = get_db()
n = insert_documents(db, texts)
db.close()
print(f"Inserted {n} rows ✅")

Saving data_knowledge.csv to data_knowledge.csv
Inserted 55 rows ✅


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch, os

model_id = os.getenv("HF_MODEL")
print("Loading model:", model_id)

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.float16
)

def generate_llm_answer(prompt: str, max_new_tokens: int = 256) -> str:
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
    outputs = model.generate(**inputs, max_new_tokens=max_new_tokens, do_sample=False)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)


Loading model: deepseek-ai/deepseek-coder-1.3b-instruct


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/631 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.69G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/119 [00:00<?, ?B/s]

In [None]:
def trim_to_token_budget(chunks, token_budget=900):
    out, used = [], 0
    for c in chunks:
        t = int(len(c)/4)
        if used + t > token_budget: break
        out.append(c.strip()); used += t
    return out

def answer_with_rag(question: str, k: int = 5) -> dict:
    db = get_db()
    docs = search_documents(db, question, k=k)
    db.close()

    context_chunks = [d["text"] for d in docs]
    context_chunks = trim_to_token_budget(context_chunks, token_budget=900)
    context = "\n\n".join(context_chunks) if context_chunks else "(no context found)"

    prompt = (
        "Jawab pertanyaan hanya berdasarkan konteks berikut. "
        "Jika tidak ada di konteks, jawab: 'Tidak ditemukan di konteks.'\n\n"
        f"--- KONTEKS ---\n{context}\n\n--- PERTANYAAN ---\n{question}"
    )
    out = generate_llm_answer(prompt, max_new_tokens=200)
    return {"answer": out, "retrieved": docs}


In [None]:
res = answer_with_rag("Apa yang dilakukan pada Phase 3?", k=5)
print(res["answer"])
res["retrieved"][:2]


Setting `pad_token_id` to `eos_token_id`:32021 for open-end generation.


Jawab pertanyaan hanya berdasarkan konteks berikut. Jika tidak ada di konteks, jawab: 'Tidak ditemukan di konteks.'

--- KONTEKS ---
Apa yang dilakukan selama Phase 3? Phase 3 adalah Agile Development Sprints yang mencakup Backend & API Development, Frontend Component Construction, dan Bi-Weekly Sprint Reviews.

Apa audit dilakukan di Phase 4? User Acceptance Testing dan performance/security audits.

Apa tujuan dari Phase 5? Phase 5 adalah Deployment & Strategic Partnership yang mencakup Server Configuration, App Store Submission, Post-Launch Monitoring, dan penyusunan Long-Term Growth Roadmap.

Apa yang diuji dalam Phase 4? Phase 4 adalah Rigorous Quality Assurance yang mencakup Automated & Manual Testing, User Acceptance Testing (UAT), dan Performance & Security Audits.

Apa kegiatan utama di Phase 2? Phase 2 yakni Architectural Design & Prototyping meliputi User Flow & Journey Mapping, High-Fidelity Prototypes, dan Technical Architecture Plan.

--- PERTANYAAN ---
Apa yang dilakukan 

[{'text': 'Apa yang dilakukan selama Phase 3? Phase 3 adalah Agile Development Sprints yang mencakup Backend & API Development, Frontend Component Construction, dan Bi-Weekly Sprint Reviews.',
  'distance': 0.3682907129200258},
 {'text': 'Apa audit dilakukan di Phase 4? User Acceptance Testing dan performance/security audits.',
  'distance': 0.4475205909491051}]

In [None]:
# models.py
from pydantic import BaseModel, Field

class AskPayload(BaseModel):
    query: str = Field(..., min_length=2)
    k: int = 5


In [None]:
# app.py
import nest_asyncio, uvicorn
from fastapi import FastAPI
from pyngrok import ngrok
from fastapi.middleware.cors import CORSMiddleware


nest_asyncio.apply()
app = FastAPI(title="RAG Agent - Colab")
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

from pydantic import BaseModel, Field
class AskPayload(BaseModel):
    query: str = Field(..., min_length=2)
    k: int = 5

@app.get("/health")
def health():
    return {"ok": True}

@app.post("/ask")
def ask(p: AskPayload):
    return answer_with_rag(p.query, k=p.k)

# start server + tunnel
public_url = ngrok.connect(8000)
print("Public URL:", public_url)
uvicorn.run(app, host="0.0.0.0", port=8000)


Public URL: NgrokTunnel: "https://c243def9adc3.ngrok-free.app" -> "http://localhost:8000"


INFO:     Started server process [182]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


INFO:     182.253.194.28:0 - "OPTIONS /ask HTTP/1.1" 200 OK


Setting `pad_token_id` to `eos_token_id`:32021 for open-end generation.


INFO:     182.253.194.28:0 - "POST /ask HTTP/1.1" 200 OK


INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [182]


In [None]:
from pyngrok import ngrok
ngrok.set_auth_token("31vdR3QPzyCkYIE9f3y86gnjO78_4XB2mjBgUyn4wn8YpoEeP")


In [None]:
from pyngrok import ngrok
ngrok.kill()

import os
os.system("fuser -k 8000/tcp || true")

0

In [None]:
import nest_asyncio, uvicorn
nest_asyncio.apply()

uvicorn.run(app, host="0.0.0.0", port=8000)

INFO:     Started server process [182]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)
INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [182]


In [None]:
from pyngrok import ngrok
ngrok.kill()

public_url = ngrok.connect(8000)
print("Public URL:", public_url)


Public URL: NgrokTunnel: "https://545689fe02b4.ngrok-free.app" -> "http://localhost:8000"
