In [None]:
# Install Libraries

!pip install -U transformers
!pip install -q transformers datasets torch scikit-learn faiss-cpu accelerate peft bitsandbytes sentence-transformers
!pip install -U transformers datasets accelerate --quiet
!pip install streamlit pyngrok
!streamlit run app.py &>/content/logs.txt &

In [None]:
## Import all necessary libraries

import os
import pandas as pd
import torch
import transformers
import faiss
import streamlit as st
import torch
import pickle
from datasets import Dataset
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments, DataCollatorForSeq2Seq
from transformers import Seq2SeqTrainingArguments
from transformers import Seq2SeqTrainer
from peft import LoraConfig, get_peft_model
from sentence_transformers import SentenceTransformer
from pyngrok import ngrok
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from sentence_transformers import SentenceTransformer

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

In [None]:
## Upload the dataset

from google.colab import files
uploaded = files.upload()

In [None]:
## Load the dataset

df = pd.read_csv("bbc_news_text_complexity_summarization.csv", delimiter=',')
df.head()

In [None]:
# Split dataset

from sklearn.model_selection import train_test_split
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
train_ds = Dataset.from_pandas(train_df)
test_ds = Dataset.from_pandas(test_df)

In [None]:
labels = df["labels"].unique().tolist()
print("Labels:", labels)

In [None]:
# Prepare FAISS Retriever (for RAG)

corpus = df["text"].tolist()
embed_model = SentenceTransformer("all-MiniLM-L6-v2")
corpus_embeddings = embed_model.encode(corpus, convert_to_numpy=True)
index = faiss.IndexFlatL2(corpus_embeddings.shape[1])
index.add(corpus_embeddings)

In [None]:
# Load Flan-T5 + LoRA

model_name = "google/flan-t5-base"

# Tokenizer (must exist before Trainer)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Model
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Move to GPU
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)

# LoRA config
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q", "v"],
    lora_dropout=0.05,
    task_type="SEQ_2_SEQ_LM"
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()


In [None]:
# Prepare Prompts (RAG)

def retrieve_context(text, top_k=3):
    query_emb = embed_model.encode([text], convert_to_numpy=True)
    D, I = index.search(query_emb, top_k)
    return "\n".join([corpus[i] for i in I[0]])

def format_rag_prompt(example):
    context = retrieve_context(example["text"])
    example["prompt"] = f"""Using the context below, classify the complexity of the following text as one of {labels}.

Text:
{example['text']}

Context:
{context}

Answer:"""
    example["label_text"] = example["labels"]
    return example

train_ds = Dataset.from_pandas(df).map(format_rag_prompt)
test_ds = Dataset.from_pandas(df).map(format_rag_prompt)


In [None]:
# Tokenize

max_length = 256
def tokenize(batch):
    return tokenizer(batch["prompt"], truncation=True, padding="max_length", max_length=max_length)

tokenized_train = train_ds.map(tokenize, batched=True)
tokenized_test = test_ds.map(tokenize, batched=True)

In [None]:
# Data Collator

data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)


In [None]:
# Training Arguments

training_args = Seq2SeqTrainingArguments(
    output_dir="./flan-t5-bbc-rag-lora",
    num_train_epochs=3,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=8,
    learning_rate=2e-5,
    fp16=True,
    predict_with_generate=True
)

In [None]:
# Trainer
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
    data_collator=data_collator
)


In [None]:
# Model in evaluation mode

model.eval()

In [None]:
# RAG prediction function

def predict_label_rag(text):
    context = retrieve_context(text, top_k=3)

    prompt = f"""Using the context below, classify the complexity of the following text as one of {labels}.

Text:
{text}

Context:
{context}

Answer:"""

    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        truncation=True,
        padding="max_length",
        max_length=256
    ).to(device)

    with torch.no_grad():  # VERY important for Colab
        outputs = model.generate(
            **inputs,
            max_new_tokens=10
        )

    pred_text = tokenizer.decode(
        outputs[0],
        skip_special_tokens=True
    ).lower()

    for lbl in labels:
        if lbl.lower() in pred_text:
            return lbl

    return "unknown"

In [None]:
# Prepare test data

test_df = df.sample(frac=0.2, random_state=42)

In [None]:
# Run predictions

y_true = test_df["labels"].tolist()
y_pred = [predict_label_rag(x) for x in test_df["text"]]

In [None]:
# Evaluate the performance of the RAG + LoRA model on the test set.
# Computes standard classification metrics:

accuracy = accuracy_score(y_true, y_pred)
precision, recall, f1, _ = precision_recall_fscore_support(
    y_true, y_pred,
    average="macro",
    zero_division=0
)

print(f"RAG + LoRA Accuracy: {accuracy:.4f}")
print(f"Precision (macro): {precision:.4f}")
print(f"Recall (macro): {recall:.4f}")
print(f"F1 (macro): {f1:.4f}")

In [None]:
# Save LoRA model
model.save_pretrained("flan_t5_lora")
tokenizer.save_pretrained("flan_t5_lora")

# Save FAISS index
faiss.write_index(index, "faiss_index.bin")

# Save corpus
import pickle
with open("corpus.pkl", "wb") as f:
    pickle.dump(corpus, f)

**Deployment: Streamlit Web App with RAG + LoRA**
: This section creates an interactive Streamlit application for real-time inference using the fine-tuned Flan-T5 model with LoRA and a Retrieval-Augmented Generation (RAG) pipeline.

In [None]:
# Streamlit Web Application: RAG + LoRA Text Complexity Classifier

%%writefile app.py

st.set_page_config(page_title="RAG LLM Classifier")

@st.cache_resource
def load_all():
    tokenizer = AutoTokenizer.from_pretrained("flan_t5_lora")
    model = AutoModelForSeq2SeqLM.from_pretrained("flan_t5_lora")
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)

    embed_model = SentenceTransformer("all-MiniLM-L6-v2")
    index = faiss.read_index("faiss_index.bin")

    with open("corpus.pkl", "rb") as f:
        corpus = pickle.load(f)

    return tokenizer, model, embed_model, index, corpus, device

tokenizer, model, embed_model, index, corpus, device = load_all()

labels = ["easy", "medium", "hard"]

def retrieve_context(text, top_k=3):
    emb = embed_model.encode([text])
    _, I = index.search(emb, top_k)
    return "\n".join([corpus[i] for i in I[0]])

def predict(text):
    context = retrieve_context(text)
    prompt = f"""
Using the context below, classify the text into exactly one label from {labels}.

Text:
{text}

Context:
{context}

Answer with exactly one label:
"""
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to(device)
    outputs = model.generate(**inputs, max_new_tokens=5)
    prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return prediction, context

st.title("RAG-powered Text Complexity Classifier")

user_text = st.text_area("Enter text")

if st.button("Classify"):
    if user_text.strip():
        label, ctx = predict(user_text)
        st.success(f"Prediction: {label}")
        st.subheader("Retrieved Context")
        st.write(ctx)
    else:
        st.warning("Please enter text")

In [None]:
# Expose the app (ngrok)

public_url = ngrok.connect(8501)
print(public_url)

In [None]:
# Add authtoken in Colab

!ngrok config add-authtoken 396eACYUVuDbWHPwHhf6MsR9AZK_4jtq4swAgtirbVyJjYjuX