# Teacher vs Course Classification with Explainable AI

In [None]:
import pandas as pdimport numpy as npfrom pathlib import Pathfrom dataclasses import dataclassfrom sklearn.model_selection import train_test_splitfrom sklearn.feature_extraction.text import TfidfVectorizerfrom sklearn.linear_model import LogisticRegressionfrom sklearn.pipeline import Pipelinefrom sklearn.metrics import classification_report, confusion_matriximport shapfrom lime.lime_text import LimeTextExplainer

In [None]:
@dataclassclass Config:    data_path: Path = Path("data_feedback.xlsx")    text_col: str = "comments"    label_col: str = "teacher/course"    random_state: int = 42CFG = Config()

In [None]:
def load_data(cfg=CFG):    if cfg.data_path.exists():        df = pd.read_excel(cfg.data_path)    else:        df = pd.DataFrame({            'comments': ["great teacher", "great course", "practical should be by our theory books"],            'teacher/course': ['teacher','course','course'],            'sentiment': ['positive','positive','neutral']        })    return df.rename(columns={cfg.text_col:'text', cfg.label_col:'label'})df = load_data()train_df, val_df = train_test_split(df, test_size=0.2, random_state=CFG.random_state, stratify=df['label'])print(train_df.head())

## Baselines with explainability

In [None]:
def run_tfidf(train_df, val_df, analyzer='word', ngram_range=(1,2)):    pipe = Pipeline([        ('tfidf', TfidfVectorizer(analyzer=analyzer, ngram_range=ngram_range)),        ('clf', LogisticRegression(max_iter=1000, class_weight='balanced'))    ])    pipe.fit(train_df['text'], train_df['label'])    preds = pipe.predict(val_df['text'])    print(classification_report(val_df['label'], preds))    print(confusion_matrix(val_df['label'], preds))    return pipeword_model = run_tfidf(train_df, val_df)char_model = run_tfidf(train_df, val_df, analyzer='char', ngram_range=(3,5))

## SHAP + LIME

In [None]:
def explain(model: Pipeline, text: str):    vect = model.named_steps['tfidf']    clf = model.named_steps['clf']    explainer = shap.LinearExplainer(clf, vect.transform(train_df['text']))    shap_values = explainer(vect.transform([text]))    lime = LimeTextExplainer(class_names=sorted(train_df['label'].unique()))    return {        'shap_values': shap_values.values.tolist(),        'lime': lime.explain_instance(text, model.predict_proba, num_features=8).as_list()    }# explain(word_model, val_df.iloc[0]['text'])

## Robustness: sentiment-stratified evaluation

In [None]:
def stratified_eval(model: Pipeline, val_df):    for sentiment, slice_df in val_df.groupby('sentiment'):        if len(slice_df) < 1:            continue        preds = model.predict(slice_df['text'])        print(f"Sentiment slice: {sentiment}")        print(classification_report(slice_df['label'], preds))# stratified_eval(word_model, val_df)

## Prompted baseline

In [None]:
def prompt_topic(texts, model_name="gpt-4o-mini"):    import openai    client = openai.OpenAI()    outputs = []    for t in texts:        resp = client.responses.create(            model=model_name,            input=[{"role":"system","content":"Label whether feedback is about teacher or course. Return JSON {label, rationale}."},                   {"role":"user","content":t}],            response_format={"type":"json_object"}        )        outputs.append(resp.output_text)    return outputs

## Error table

In [None]:
def error_table(model, val_df):    preds = model.predict(val_df['text'])    errors = val_df.copy()    errors['pred'] = preds    return errors[errors['pred'] != errors['label']][['text','label','pred','sentiment']]# error_table(word_model, val_df)

## CLI entry

In [None]:
def main_cli():    import argparse    parser = argparse.ArgumentParser()    parser.add_argument('--model', choices=['tfidf','char'], default='tfidf')    args = parser.parse_args()    df = load_data()    train_df, val_df = train_test_split(df, test_size=0.2, random_state=CFG.random_state, stratify=df['label'])    if args.model == 'tfidf':        run_tfidf(train_df, val_df)    else:        run_tfidf(train_df, val_df, analyzer='char', ngram_range=(3,5))if __name__ == '__main__':    # main_cli()    pass