In [None]:
!pip install pandas scikit-learn transformers --quiet

In [None]:
#uploading train and test csv files

from google.colab import files
uploaded = files.upload()

In [None]:
#loading data

import pandas as pd

train_df = pd.read_csv("college_feedback_train.csv")
test_df = pd.read_csv("college_feedback_test.csv")

train_df.head()

In [None]:
# few-shot examples from each category
examples = pd.concat([
    train_df[train_df["category"] == "Academics"].sample(1),
    train_df[train_df["category"] == "Facilities"].sample(1),
    train_df[train_df["category"] == "Administration"].sample(1)
])


In [None]:
# Construct few-shot example block
few_shot_examples = ""
for _, row in examples.iterrows():
    few_shot_examples += f"feedback: {row['feedback']}\ncategory: {row['category']}\n"


In [None]:
# Instruction prompt
instruction = """Classify the following student feedback based on the main topic. Choose only one of the categories:
- Academics: if it refers to teaching, subjects, syllabus, grading, or classes.
- Facilities: if it refers to infrastructure, internet, hostel, canteen, or campus services.
- Administration: if it refers to rules, policies, admissions, exams, or certificate processes.
"""

In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

model_name = "google/flan-t5-base"
#model_name = "google/flan-t5-large" 
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

In [None]:
def build_prompt(feedback):
    return instruction + few_shot_examples + f"feedback: {feedback}\ncategory:"

prompts = [build_prompt(text) for text in test_df["feedback"]]

# Tokenize
import torch

inputs = tokenizer(prompts, return_tensors="pt", padding=True, truncation=True).to("cpu")


In [None]:
outputs = model.generate(
    **inputs,
    max_new_tokens=5,
    do_sample=False
)

# Decode output
preds = [tokenizer.decode(output, skip_special_tokens=True).replace('\n', '').strip() for output in outputs]


In [None]:
results = test_df.copy()
results["predicted"] = preds

# Show first 10 predictions
print(results[["feedback", "category", "predicted"]].head(20).to_string(index=False))

# Optional: Accuracy (if clean labels)
accuracy = (results["category"].str.lower() == results["predicted"].str.lower()).mean()
print(f"Accuracy: {accuracy:.2f}")


In [None]:
from sklearn.metrics import classification_report

print(classification_report(test_df["category"], preds))
