# Teacher vs. Course Classification Experiments

Goal: classify feedback as about the **teacher** or the **course**. We progress from keyword baselines to transformers and agentic routing with explainable AI and research-ethics considerations.

In [None]:
import pandas as pd
from pathlib import Path

df = pd.read_excel(Path('data_feedback.xlsx'))
df.head()

## 1. Quick heuristic baseline
Rationale: keyword/phrase rules provide a transparent starting point and sanity check.

In [None]:
def keyword_baseline(text):
    teacher_terms = ['teacher', 'sir', 'he', 'she', 'instructor']
    course_terms = ['course', 'syllabus', 'practical', 'module']
    t_hits = sum(term in text.lower() for term in teacher_terms)
    c_hits = sum(term in text.lower() for term in course_terms)
    return 'teacher' if t_hits >= c_hits else 'course'

df['kw_pred'] = df['comments'].apply(keyword_baseline)
(df['kw_pred'] == df['teacher/course']).mean()

## 2. Baseline: TF–IDF + Linear Classifier
Rationale: fast and interpretable with clear feature weights.

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report

X_train, X_val, y_train, y_val = train_test_split(df['comments'], df['teacher/course'], test_size=0.2, stratify=df['teacher/course'], random_state=42)

tfidf_lr = Pipeline([('tfidf', TfidfVectorizer(ngram_range=(1,2), min_df=1)), ('clf', LogisticRegression(max_iter=200, class_weight='balanced'))])
tfidf_lr.fit(X_train, y_train)
print(classification_report(y_val, tfidf_lr.predict(X_val)))

### Feature inspection for explainability
Inspect top n-grams per class to validate reliance on meaningful cues rather than spurious patterns.

In [None]:
import numpy as np
vec = tfidf_lr.named_steps['tfidf']
clf = tfidf_lr.named_steps['clf']
features = np.array(vec.get_feature_names_out())
for i, cls in enumerate(clf.classes_):
    top = clf.coef_[i].argsort()[-10:][::-1]
    print(cls, features[top])

## 3. Intermediate: Character n-grams
Rationale: robustness to spelling variations and low-resource slang.

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer as CharTfidf
char_model = Pipeline([('tfidf', CharTfidf(analyzer='char', ngram_range=(3,5), min_df=1)), ('clf', LogisticRegression(max_iter=200, class_weight='balanced'))])
char_model.fit(X_train, y_train)
print(classification_report(y_val, char_model.predict(X_val)))

## 4. Advanced: Transformer fine-tuning
Rationale: capture nuanced course/teacher cues beyond explicit keywords.

In [None]:
# Transformer skeleton
# from datasets import Dataset
# from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
# tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')
# label2id = {lbl:i for i,lbl in enumerate(sorted(df['teacher/course'].unique()))}
# id2label = {i:lbl for lbl,i in label2id.items()}
# dataset = Dataset.from_pandas(df[['comments', 'teacher/course']])
# dataset = dataset.map(lambda x: {'labels': label2id[x['teacher/course']]}, remove_columns=['teacher/course'])
# dataset = dataset.map(lambda x: tokenizer(x['comments'], truncation=True), batched=True)
# model = AutoModelForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=len(label2id), id2label=id2label, label2id=label2id)
# args = TrainingArguments(output_dir='./teacher-course-model', evaluation_strategy='epoch', learning_rate=2e-5, num_train_epochs=8, per_device_train_batch_size=16)
# trainer = Trainer(model=model, args=args, train_dataset=dataset, eval_dataset=dataset, tokenizer=tokenizer)
# trainer.train()

### Explainability
- Token attributions (IG/SHAP) to highlight cues for each class.
- Counterfactuals: minimally edit a teacher comment to look like course feedback and observe prediction flips.

## 5. Agentic routing and uncertainty
Rationale: combine rule-based confidence with model predictions.
- Use heuristic confidence (e.g., keyword ratio) to decide whether to trust baseline or call LLM.
- For low-confidence cases, ask LLM for classification + rationale; log both for auditability.

In [None]:
# Placeholder for a simple router
# def route_prediction(text):
#     kw = keyword_baseline(text)
#     score = max(tfidf_lr.predict_proba([text])[0])
#     if score > 0.8:
#         return {'label': tfidf_lr.predict([text])[0], 'source': 'tfidf', 'confidence': score}
#     else:
#         # call LLM fallback here
#         return {'label': kw, 'source': 'llm_fallback', 'confidence': 0.5}


## 6. Error analysis & ethics
- Inspect misclassifications grouped by presence/absence of explicit teacher/course terms.
- Ethical considerations: avoid reinforcing gendered language; keep human-in-the-loop for high-stakes use.