In [34]:
!pip install --quiet scikit-learn pandas

import pandas as pd

train_df = pd.read_csv("college_feedback_train.csv")
test_df = pd.read_csv("college_feedback_test.csv")

label_map = {
    "Academics": 0,
    "Facilities": 1,
    "Administration": 2,
    "Student Life": 3
}
inv_label_map = {v: k for k, v in label_map.items()}

train_df['label'] = train_df['category'].map(label_map)
test_df['label'] = test_df['category'].map(label_map) if 'category' in test_df.columns else None

from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(max_features=5000, stop_words='english')
X_train = vectorizer.fit_transform(train_df['feedback'])
X_test = vectorizer.transform(test_df['feedback'])

y_train = train_df['label']

from sklearn.linear_model import LogisticRegression

clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)

from sklearn.metrics import accuracy_score, f1_score, classification_report

y_pred = clf.predict(X_test)
predicted_categories = [inv_label_map[label] for label in y_pred]

print("✅ Accuracy:", accuracy_score(test_df['label'], y_pred) if test_df['label'] is not None else "N/A")
print("✅ F1 Macro Score:", f1_score(test_df['label'], y_pred, average='macro') if test_df['label'] is not None else "N/A")

if test_df['label'] is not None:
    print("\n📊 Classification Report:")
    print(classification_report(test_df['label'], y_pred, target_names=label_map.keys()))

output_df = pd.DataFrame({
    'feedback': test_df['feedback'],
    'predicted_category': predicted_categories
})

print("\n📝 Sample Predictions:")
print(output_df.head(20).to_string(index=False))



✅ Accuracy: 1.0
✅ F1 Macro Score: 1.0

📊 Classification Report:
                precision    recall  f1-score   support

     Academics       1.00      1.00      1.00         5
    Facilities       1.00      1.00      1.00         5
Administration       1.00      1.00      1.00         3
  Student Life       1.00      1.00      1.00         7

      accuracy                           1.00        20
     macro avg       1.00      1.00      1.00        20
  weighted avg       1.00      1.00      1.00        20


📝 Sample Predictions:
                                        feedback predicted_category
                    Hostel curfew is too strict.       Student Life
          Not enough common spaces for students.       Student Life
       Counseling sessions really help students.       Student Life
       Classrooms are clean and well-maintained.         Facilities
Too much syllabus is covered in too little time.          Academics
             Restroom hygiene is below standard.      

In [None]:
!pip install --upgrade ibm-watson-machine-learning --quiet

from ibm_watson_machine_learning.foundation_models import Model
from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
from ibm_watson_machine_learning.foundation_models.utils.enums import ModelTypes, DecodingMethods

API_KEY     = "AuJvW3Rz5442V_RKllpwejtb1ZCWhTGX0BtnWCEDvShX"
PROJECT_ID  = "87c98eb2-1b1e-4a11-954b-a965fe6dab95"
ENDPOINT    = "https://us-south.ml.cloud.ibm.com"

params = {
    GenParams.DECODING_METHOD: DecodingMethods.GREEDY,
    GenParams.MAX_NEW_TOKENS: 20
}

model = Model(
    model_id=ModelTypes.FLAN_T5_XL,
    params=params,
    credentials={"apikey": API_KEY, "url": ENDPOINT},
    project_id=PROJECT_ID
)

feedback = "The library needs more seating."
prompt = (
    "Classify the student feedback into one of these categories: "
    "Academics, Facilities, Administration, Student Life.\n\n"
    f'Feedback: "{feedback}"\nCategory:'
)

response = model.generate_text(prompt=prompt)
category = response.strip()

print("📝 Feedback:", feedback)
print("🏷️ Predicted Category:", category)
