In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import joblib

# Load GoEmotions extended CSV
df = pd.read_csv('../data/goemotions_1.csv')

# Extract emotion columns
emotion_columns = df.columns[9:]  # from 'admiration' onwards
def extract_primary_emotion(row):
    emotions = [col for col in emotion_columns if row[col] == 1]
    return emotions[0] if emotions else 'neutral'

# Add primary emotion column
df['label'] = df.apply(extract_primary_emotion, axis=1)
X = df['text']
y = df['label']

# Split and train
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('clf', LinearSVC())
])
pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)
print(classification_report(y_test, y_pred))

# Save model
joblib.dump(pipeline, '../app/model.pkl')
print('✅ Model saved to app/model.pkl')