In [1]:
!pip install pandas scikit-learn transformers --quiet

In [2]:
#uploading train and test csv files

from google.colab import files
uploaded = files.upload()

Saving college_feedback_test.csv to college_feedback_test.csv
Saving college_feedback_train.csv to college_feedback_train.csv


In [3]:
#loading data

import pandas as pd

train_df = pd.read_csv("college_feedback_train.csv")
test_df = pd.read_csv("college_feedback_test.csv")

train_df.head()

Unnamed: 0,feedback,category
0,The air-conditioning in classrooms is not work...,Facilities
1,The air-conditioning in classrooms is not work...,Facilities
2,There is a shortage of clean drinking water on...,Facilities
3,The classrooms are not well ventilated.,Facilities
4,There is a lot of delay in issuing certificates.,Administration


In [4]:
# few-shot examples from each category
examples = pd.concat([
    train_df[train_df["category"] == "Academics"].sample(1),
    train_df[train_df["category"] == "Facilities"].sample(1),
    train_df[train_df["category"] == "Administration"].sample(1)
])


In [5]:
# Construct few-shot example block
few_shot_examples = ""
for _, row in examples.iterrows():
    few_shot_examples += f"feedback: {row['feedback']}\ncategory: {row['category']}\n"


In [6]:
# Instruction prompt
instruction = "Classify the following student feedback into one of: Academics, Facilities, Administration.\n"


In [7]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [8]:
def build_prompt(feedback):
    return instruction + few_shot_examples + f"feedback: {feedback}\ncategory:"

prompts = [build_prompt(text) for text in test_df["feedback"]]

# Tokenize
import torch

inputs = tokenizer(prompts, return_tensors="pt", padding=True, truncation=True).to("cpu")


In [15]:
outputs = model.generate(
    **inputs,
    max_new_tokens=5,
    do_sample=False
)

# Decode output
preds = [tokenizer.decode(output, skip_special_tokens=True).replace('\n', '').strip() for output in outputs]


In [23]:
results = test_df.copy()
results["predicted"] = preds

# Show first 10 predictions
print(results[["feedback", "category", "predicted"]].head(20).to_string(index=False))

# Optional: Accuracy (if clean labels)
accuracy = (results["category"].str.lower() == results["predicted"].str.lower()).mean()
print(f"Accuracy: {accuracy:.2f}")


                                          feedback       category      predicted
                 The syllabus needs to be revised.      Academics      Academics
    Wi-Fi connectivity is poor in the hostel area.     Facilities     Facilities
                 The syllabus needs to be revised.      Academics      Academics
        The admission process was smooth and easy. Administration     Facilities
                Identity card renewal is too slow. Administration Administration
                Identity card renewal is too slow. Administration Administration
             Hostel rooms need better maintenance.     Facilities     Facilities
                 The syllabus needs to be revised.      Academics      Academics
    No response from administrative email support. Administration Administration
  There is a lot of delay in issuing certificates. Administration     Facilities
       Exam schedules should be announced earlier. Administration     Facilities
    The administration staff

In [24]:
from sklearn.metrics import classification_report

print(classification_report(test_df["category"], preds))


                precision    recall  f1-score   support

     Academics       1.00      0.38      0.55        29
Administration       1.00      0.50      0.67        34
    Facilities       0.46      1.00      0.63        27
      Facility       0.00      0.00      0.00         0

      accuracy                           0.61        90
     macro avg       0.61      0.47      0.46        90
  weighted avg       0.84      0.61      0.62        90



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
