In [None]:
# %%capture
# !pip install -q -U torch torchvision torchaudio
# !pip install -q -U transformers peft accelerate
# !pip install -q -U langchain langchain-community langchain-huggingface sentence-transformers
# !pip install -q -U faiss-cpu
# !pip install -q -U gradio

In [None]:
import os
from google.colab import drive
from google.colab import userdata
from huggingface_hub import login

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.documents import Document
import json
import gradio as gr

In [None]:
# drive.mount('/content/drive')
project_path = "/content/drive/MyDrive/Colab Notebooks/nust_bank_chatbot"
os.chdir(project_path)

try:
    hf_token = userdata.get('HF_TOKEN')
    login(token=hf_token)
except Exception as e:
    print("HF_TOKEN not found.")

ADAPTER_PATH = "/content/drive/MyDrive/Colab Notebooks/nust_bank_chatbot/nust_bank_adapter_6"
DATA_PATH = "processed_data/prepared_data.jsonl"

In [None]:
model_id = "meta-llama/Llama-3.2-3B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    dtype=torch.float16,
    device_map="auto",
)
model = PeftModel.from_pretrained(base_model, ADAPTER_PATH)
model.eval()
print("Fine-Tuned NUST Bank Model Loaded Successfully!")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

Fine-Tuned NUST Bank Model Loaded Successfully!


In [None]:
DB_FAISS_PATH = "/content/drive/MyDrive/Colab Notebooks/nust_bank_chatbot/vector_store"

print("Loading Embedding Model...")
embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

def load_or_build_index():
    if os.path.exists(DB_FAISS_PATH):
        print(f"Found saved vector DB at {DB_FAISS_PATH}. Loading...")
        try:
            vector_db = FAISS.load_local(DB_FAISS_PATH, embed_model, allow_dangerous_deserialization=True)
            print("Loaded existing Knowledge Base from Drive!")
            return vector_db
        except Exception as e:
            print(f"Error loading DB: {e}. Rebuilding from scratch...")

    print("Building Index from scratch...")
    docs = []

    if not os.path.exists(DATA_PATH):
        print("Data file not found.")
        return None

    with open(DATA_PATH, 'r') as f:
        for line in f:
            try:
                data = json.loads(line)
                messages = data.get('messages', [])

                user_content = ""
                assistant_content = ""

                for m in messages:
                    if m['role'] == 'user':
                        user_content = m['content']
                    elif m['role'] == 'assistant':
                        assistant_content = m['content']

                final_text_to_index = ""

                # Case A: The "Context" format
                if "Context:" in user_content and "Question:" in user_content:
                    try:
                        parts = user_content.split("Question:")
                        context_text = parts[0].replace("Context:", "").strip()
                        question_text = parts[1].replace("Answer the question using the context above.", "").strip()
                        final_text_to_index = f"Context: {context_text}\nRelated Question: {question_text}"

                    except Exception as e:
                        print(f"Error parsing Q/A block: {e}")

                # Case B: The "Regarding" format
                elif user_content and assistant_content:
                    # Strategy:
                    # The Assistant's answer is the "Fact" (Context).
                    # The User's query is the "Search Query" (Related Question).

                    context_text = assistant_content
                    question_text = user_content
                    final_text_to_index = f"Context: {context_text}\nRelated Question: {question_text}"

                # Create Document Object if we have text
                if final_text_to_index:
                    # We add metadata so you can trace where this came from later
                    new_doc = Document(
                        page_content=final_text_to_index,
                        metadata={"source": "nust_dataset", "type": "bank_policy"}
                    )
                    docs.append(new_doc)

            except Exception as e:
                print(f"Skipping line due to error: {e}")


    print(f"Indexing {len(docs)} documents...")
    vector_db = FAISS.from_documents(docs, embed_model)

    vector_db.save_local(DB_FAISS_PATH)
    print(f"Knowledge Base saved to {DB_FAISS_PATH}")

    return vector_db

Loading Embedding Model...


In [None]:
vector_db = load_or_build_index()

Building Index from scratch...
Indexing 585 documents...
Knowledge Base saved to /content/drive/MyDrive/Colab Notebooks/nust_bank_chatbot/vector_store


In [None]:
docs_dict = vector_db.docstore._dict

for i, doc in enumerate(list(docs_dict.values())[:5], start=1):
    print(f"Sample {i}:")
    print(doc.page_content)
    print("=" * 80)

Sample 1:
Context: - Minimum deposit for opening of the NUST Waqaar – Savings account is Rs.1,000
- Minimum deposit for opening an NUST Waqaar – Term Deposit account is Rs.25,000
- Note: Customer must be an account holder of NUST for placement of funds in NUST Waqaar-Term Deposit
Related Question: Regarding NUST Waqaar Account (NWA), What is the minimum deposit requirement to open NUST Waqaar Savings Account & NUST Waqaar Term Deposit?
Sample 2:
Context: Product: Value Plus Current Account (VPCA)
Information: NUST Value Plus Current Account is specially designed for individuals to cater their financial needs.
Related Question: I would like to inquire about opening a current account for Individuals with your Bank. Please tell me what options I have ?
Sample 3:
Context: Product: Roshan Digital Account (RDA)
Information: Offered in PKR and USD/GBP/EUR
SMS Alerts on registered local and International numbers
Bill Payments & Funds Transfer within Pakistan via Local ATMs & Internet Banking
I

In [None]:
# def is_safe(query):
#     harmful_keywords = ["launder", "fake id", "steal", "illegal", "bomb", "evade tax", "kill", "drug"]
#     return not any(word in query.lower() for word in harmful_keywords)

def add_new_document(text):
    if not text.strip():
        return "Text is empty."

    new_doc = Document(page_content=text, metadata={"source": "user_upload"})
    vector_db.add_documents([new_doc])
    vector_db.save_local(DB_FAISS_PATH)

    return "Policy added & Knowledge Base saved to Drive!"

def generate_rag_response(user_query, history):
    # if not is_safe(user_query):
    #     return "I cannot assist with that request. NUST Bank adheres strictly to all legal and ethical financial standards."


    docs = vector_db.similarity_search(user_query, k=3)
    clean_context_list = []

    for d in docs:
        raw_content = d.page_content

        if "Related Question:" in raw_content:
            # Take everything BEFORE the related question
            clean_fact = raw_content.split("Related Question:")[0]
            clean_fact = clean_fact.replace("Context:", "").strip()
            clean_context_list.append(clean_fact)
        else:
            clean_context_list.append(raw_content)
    context_text = "\n---\n".join(clean_context_list)

    system_prompt = f"""### ROLE
You are a senior customer service manager at NUST Bank. You are helpful, professional, and authoritative.

### SAFETY & COMPLIANCE GUARDRAILS (Strictly Enforced)
1. **Scope Restriction:** You are ONLY allowed to answer questions related to banking and the provided context. If a user asks about politics, religion, or general code, politely refuse.
2. **Safety First:** If a user asks for help with illegal acts (e.g., money laundering, fake IDs, tax evasion), strictly refuse.
3. **No Hallucinations:** If the answer is NOT present in the 'CONTEXT DATA' below, do NOT invent facts. Simply state: "I am sorry, but I do not have that specific information in my documents."

### RESPONSE FORMATTING
1. **Natural Language:** Do NOT copy-paste table rows or use the '|' symbol. Rewrite raw data into clear, full sentences.
2. **Clarity:** Use bullet points for lists (like eligibility criteria or fees) to improve readability.
3. **Tone:** Be polite but direct. Do not over-apologize.

### EXAMPLE (Follow this style)
**Context:** "Loan Limit | Tier 1: 100k | Tier 2: 500k | Mark-up: 5%"
**User:** "What are the loan limits?"
**Your Answer:** "For Tier 1, the loan limit is Rs. 100,000. For Tier 2, the limit increases to Rs. 500,000. The markup rate for both is 5%."

### CONTEXT DATA
{context_text}
"""

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_query}
    ]

    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    print("Generating Response...")
    outputs = model.generate(
        **inputs,
        max_new_tokens=500,
        temperature=0.5,      # Lower temp (0.1) is better for strict banking facts
        top_p=0.9,
        repetition_penalty=1.1,
        do_sample=True,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id
    )

    response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
    return response.strip()

print("RAG Pipeline Ready.")

RAG Pipeline Ready.


In [None]:
theme = gr.themes.Soft(
    primary_hue="blue",
    secondary_hue="slate",
).set(
    button_primary_background_fill="#002D62",
    button_primary_text_color="white",
)

with gr.Blocks(title="NUST Bank AI Assistant") as demo:

    gr.Markdown("# NUST Bank AI Assistant")

    with gr.Tabs():

        with gr.TabItem("Customer Support"):
            chatbot = gr.Chatbot(height=450, label="NUST Bank Assistant")
            msg = gr.Textbox(placeholder="Ask about Accounts, Loans, or Remittance...", label="Your Inquiry")
            clear = gr.Button("Clear Chat")

            def user_chat(message, history):
                history = history or []

                response = generate_rag_response(message, history)

                history.append({"role": "user", "content": message})
                history.append({"role": "assistant", "content": response})

                return "", history

            msg.submit(user_chat, [msg, chatbot], [msg, chatbot])
            clear.click(lambda: [], None, chatbot, queue=False)

        with gr.TabItem("Admin Console"):
            gr.Markdown("### Real-Time Knowledge Update")

            with gr.Row():
                new_policy_text = gr.Textbox(lines=5, label="New Policy Document")
                upload_btn = gr.Button("UPDATE DATABASE", variant="primary")

            status_output = gr.Textbox(label="System Log", interactive=False)

            def update_db(text):
                return add_new_document(text)

            upload_btn.click(update_db, inputs=new_policy_text, outputs=status_output)

        with gr.TabItem("System Status"):
            try:
                if hasattr(vector_db, 'index'):
                    count = vector_db.index.ntotal
                else:
                    count = "Active"
            except:
                count = "Active"

            gr.Markdown(f"**Model:** Llama-3.2-3B (Fine-Tuned)")
            gr.Markdown(f"**Indexed Documents:** {count}")
            gr.Markdown("**Guardrails:** Active")

print("Launching UI...")
demo.launch(share=True, debug=True, theme=theme)

Launching UI...
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://60711acab3bddfcad1.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Generating Response...
Generating Response...
Generating Response...
Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://60711acab3bddfcad1.gradio.live




In [None]:
from datasets import load_dataset
import random

train_file = "processed_data/prepared_data.jsonl"
full_dataset = load_dataset("json", data_files=train_file, split="train")

# EXACT split used in training
# seed=42 so we get the exact same 10% separate from training
dataset_split = full_dataset.train_test_split(
    test_size=0.1,
    shuffle=True,
    seed=42
)

eval_dataset = dataset_split["test"]
print(f"Total Validation Samples: {len(eval_dataset)}")

Generating train split: 0 examples [00:00, ? examples/s]

Total Validation Samples: 59


In [None]:
import pandas as pd
from tqdm import tqdm

if 'vector_db' not in globals():
    print("Loading Vector DB...")
    vector_db = load_or_build_index()


rag_results = []

for row in tqdm(eval_dataset):
    try:
        messages = row['messages']
        # Extract Question and Ground Truth
        user_query = messages[-2]['content']
        ground_truth = messages[-1]['content']

        # 1. Run RAG Generation (Retrieval + Generation)
        rag_response = generate_rag_response(user_query, {})

        # 2. Store Data
        rag_results.append({
            "Question": user_query,
            "Ground Truth": ground_truth,
            "RAG Prediction": rag_response
        })

    except Exception as e:
        print(f"Error: {e}")

df_rag = pd.DataFrame(rag_results)
print(f"RAG Evaluation Complete. Processed {len(df_rag)} samples.")
display(df_rag.head(2))

  0%|          | 0/59 [00:00<?, ?it/s]

Generating Response...


  2%|▏         | 1/59 [00:02<02:42,  2.80s/it]

Generating Response...


  3%|▎         | 2/59 [00:06<03:14,  3.41s/it]

Generating Response...


  5%|▌         | 3/59 [00:09<02:57,  3.17s/it]

Generating Response...


  7%|▋         | 4/59 [00:16<04:06,  4.49s/it]

Generating Response...


  8%|▊         | 5/59 [00:28<06:35,  7.32s/it]

Generating Response...


 10%|█         | 6/59 [00:45<09:27, 10.70s/it]

Generating Response...


 12%|█▏        | 7/59 [01:01<10:42, 12.35s/it]

Generating Response...


 14%|█▎        | 8/59 [01:05<08:10,  9.62s/it]

Generating Response...


 15%|█▌        | 9/59 [01:10<06:59,  8.38s/it]

Generating Response...


 17%|█▋        | 10/59 [01:14<05:44,  7.02s/it]

Generating Response...


 19%|█▊        | 11/59 [01:16<04:19,  5.40s/it]

Generating Response...


 20%|██        | 12/59 [01:18<03:23,  4.34s/it]

Generating Response...


 22%|██▏       | 13/59 [01:24<03:44,  4.88s/it]

Generating Response...


 24%|██▎       | 14/59 [01:29<03:40,  4.90s/it]

Generating Response...


 25%|██▌       | 15/59 [01:45<05:57,  8.13s/it]

Generating Response...


 27%|██▋       | 16/59 [01:50<05:11,  7.25s/it]

Generating Response...


 29%|██▉       | 17/59 [01:55<04:37,  6.60s/it]

Generating Response...


 31%|███       | 18/59 [02:03<04:49,  7.05s/it]

Generating Response...


 32%|███▏      | 19/59 [02:06<03:58,  5.96s/it]

Generating Response...


 34%|███▍      | 20/59 [02:11<03:36,  5.55s/it]

Generating Response...


 36%|███▌      | 21/59 [02:47<09:14, 14.60s/it]

Generating Response...


 37%|███▋      | 22/59 [02:58<08:20, 13.53s/it]

Generating Response...


 39%|███▉      | 23/59 [03:02<06:30, 10.84s/it]

Generating Response...


 41%|████      | 24/59 [03:20<07:36, 13.04s/it]

Generating Response...


 42%|████▏     | 25/59 [03:25<05:59, 10.57s/it]

Generating Response...


 44%|████▍     | 26/59 [03:27<04:22,  7.96s/it]

Generating Response...


 46%|████▌     | 27/59 [03:34<04:03,  7.62s/it]

Generating Response...


 47%|████▋     | 28/59 [03:42<03:56,  7.64s/it]

Generating Response...


 49%|████▉     | 29/59 [03:44<02:57,  5.93s/it]

Generating Response...


 51%|█████     | 30/59 [03:45<02:15,  4.66s/it]

Generating Response...


 53%|█████▎    | 31/59 [03:56<03:04,  6.57s/it]

Generating Response...


 54%|█████▍    | 32/59 [03:59<02:26,  5.42s/it]

Generating Response...


 56%|█████▌    | 33/59 [04:11<03:11,  7.38s/it]

Generating Response...


 58%|█████▊    | 34/59 [04:18<03:04,  7.38s/it]

Generating Response...


 59%|█████▉    | 35/59 [04:21<02:23,  5.99s/it]

Generating Response...


 61%|██████    | 36/59 [04:27<02:18,  6.00s/it]

Generating Response...


 63%|██████▎   | 37/59 [04:29<01:42,  4.66s/it]

Generating Response...


 64%|██████▍   | 38/59 [04:33<01:36,  4.57s/it]

Generating Response...


 66%|██████▌   | 39/59 [04:40<01:45,  5.26s/it]

Generating Response...


 68%|██████▊   | 40/59 [04:46<01:45,  5.56s/it]

Generating Response...


 69%|██████▉   | 41/59 [04:49<01:25,  4.73s/it]

Generating Response...


 71%|███████   | 42/59 [04:53<01:16,  4.52s/it]

Generating Response...


 73%|███████▎  | 43/59 [05:03<01:38,  6.18s/it]

Generating Response...


 75%|███████▍  | 44/59 [05:08<01:26,  5.77s/it]

Generating Response...


 76%|███████▋  | 45/59 [05:28<02:21, 10.14s/it]

Generating Response...


 78%|███████▊  | 46/59 [05:31<01:42,  7.85s/it]

Generating Response...


 80%|███████▉  | 47/59 [05:36<01:25,  7.11s/it]

Generating Response...


 81%|████████▏ | 48/59 [05:43<01:17,  7.05s/it]

Generating Response...


 83%|████████▎ | 49/59 [05:51<01:14,  7.45s/it]

Generating Response...


 85%|████████▍ | 50/59 [05:54<00:54,  6.01s/it]

Generating Response...


 86%|████████▋ | 51/59 [05:57<00:40,  5.10s/it]

Generating Response...


 88%|████████▊ | 52/59 [06:10<00:52,  7.47s/it]

Generating Response...


 90%|████████▉ | 53/59 [06:12<00:34,  5.67s/it]

Generating Response...


 92%|█████████▏| 54/59 [06:15<00:24,  4.87s/it]

Generating Response...


 93%|█████████▎| 55/59 [06:18<00:17,  4.33s/it]

Generating Response...


 95%|█████████▍| 56/59 [06:28<00:18,  6.21s/it]

Generating Response...


 97%|█████████▋| 57/59 [06:39<00:15,  7.57s/it]

Generating Response...


 98%|█████████▊| 58/59 [06:54<00:09,  9.96s/it]

Generating Response...


100%|██████████| 59/59 [06:58<00:00,  7.09s/it]

RAG Evaluation Complete. Processed 59 samples.





Unnamed: 0,Question,Ground Truth,RAG Prediction
0,Context:\nProduct: App Features / Functionalit...,"Yes, the NUST mobile app can be accessed globa...","Yes, the NUST mobile app can be accessed globa..."
1,I want to invest in a scheme that promises 50%...,I advise caution. Schemes promising unrealisti...,I advise caution. Schemes promising unrealisti...


In [None]:
# !pip install evaluate rouge_score bert_score


In [None]:
import evaluate
import numpy as np

rouge = evaluate.load("rouge")
bertscore = evaluate.load("bertscore")

def compute_metrics(predictions, references):
    # ROUGE
    r_res = rouge.compute(predictions=predictions, references=references)

    # BERTScore
    b_res = bertscore.compute(predictions=predictions, references=references, lang="en")

    return {
        "ROUGE-1": r_res['rouge1'],
        "ROUGE-2": r_res['rouge2'],
        "ROUGE-L": r_res['rougeL'],
        "BERT-F1": np.mean(b_res['f1'])
    }

Downloading builder script: 0.00B [00:00, ?B/s]

In [None]:
rag_preds = df_rag["RAG Prediction"].tolist()
rag_refs = df_rag["Ground Truth"].tolist()

rag_metrics = compute_metrics(rag_preds, rag_refs)

rag_m = pd.DataFrame([rag_metrics],
                              index=["Fine-Tuned + RAG"])

display(rag_m)

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Unnamed: 0,ROUGE-1,ROUGE-2,ROUGE-L,BERT-F1
Fine-Tuned + RAG,0.607956,0.59041,0.606145,0.923772


In [None]:
metrics_file = "rag_metrics.csv"
rag_m.to_csv(metrics_file)
print(f"Metrics saved: {metrics_file}")

details_file = "rag_detailed_predictions.csv"
df_rag.to_csv(details_file, index=False)
print(f"Detailed predictions saved: {details_file}")

Metrics saved: rag_metrics.csv
Detailed predictions saved: rag_detailed_predictions.csv


In [None]:
test_query = "Main features of LCA"
results = vector_db.similarity_search_with_score(test_query, k=3)

for i, (doc, score) in enumerate(results):
    print(f"--- [ Result {i+1} ] ---")
    print(f"Score (Lower is better): {score:.4f}")
    print(f"Source: {doc.metadata.get('source', 'Unknown')}")
    print(f"Type: {doc.metadata.get('type', 'Unknown')}")

    print("\nRAW CONTENT FROM DB:")
    print(doc.page_content)
    print("-" * 60)

--- [ Result 1 ] ---
Score (Lower is better): 1.0324
Source: nust_dataset
Type: bank_policy

RAW CONTENT FROM DB:
Context: Product: Little Champs Account (LCA)
Information: Minimum initial deposit of Rs.100/- | Little Champs Savings A/C
Free first chequebook* | Profit Payment | Profit Rate
Related Question: What are the main Features  of the Little Champs Account.
------------------------------------------------------------
--- [ Result 2 ] ---
Score (Lower is better): 1.0367
Source: nust_dataset
Type: bank_policy

RAW CONTENT FROM DB:
Context: Product: Little Champs Account (LCA)
Information: Attractive returns on savings account
SMS alert service on digital transactions
I Net banking services
Free education insurance plan – Rs.5,000/- per month for 5 years on savings account & Rs.10,000 per month for 5 years on current account in case of death of the guardian
Related Question: What other Value added features does the Little Champs Account have?
---------------------------------------