In [None]:
%%bash
pip install -q pandas scikit-learn numpy matplotlib seaborn torch torchvision torchaudio transformers datasets sentence-transformers mlflow


### Sentence Embeddings + Linear Classifier
Generate aspect-aware embeddings and train a lightweight classifier.


In [None]:
import pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split

df = pd.read_csv(Path('../../data/comments.csv'))
df['stratify_key'] = df['aspect'] + '_' + df['label']
df['prompt'] = 'Aspect: ' + df['aspect'] + ' | ' + df['comment']
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['stratify_key'])


In [None]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
train_embeddings = model.encode(train_df['prompt'].tolist(), batch_size=16, show_progress_bar=True)
test_embeddings = model.encode(test_df['prompt'].tolist(), batch_size=16, show_progress_bar=True)


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

clf = LogisticRegression(max_iter=200)
clf.fit(train_embeddings, train_df['label'])
preds = clf.predict(test_embeddings)
print(classification_report(test_df['label'], preds))
