In [None]:
import pandas as pd

train_df = pd.read_csv(
    "data/train.txt",
    sep=";",
    names=["text", "emotion"]
)

# Load test data
test_df = pd.read_csv(
    "data/test.txt",
    sep=";",
    names=["text", "emotion"]
)

print(train_df.head())
print(test_df.head())


In [None]:
X_train = train_df["text"]
y_train = train_df["emotion"]

X_test = test_df["text"]
y_test = test_df["emotion"]


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(
    max_features=10000,
    ngram_range=(1,2)
)

X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)


In [None]:
from sklearn.svm import LinearSVC

model = LinearSVC()
model.fit(X_train_vec, y_train)


In [None]:
from sklearn.metrics import accuracy_score, classification_report

y_pred = model.predict(X_test_vec)

print("Test Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))


In [None]:
import joblib

joblib.dump(model, "text_emotion_model.pkl")
joblib.dump(vectorizer, "tfidf_vectorizer.pkl")
