In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import seaborn as sns
import neattext.functions as nfx
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
import joblib



In [2]:
# Load Dataset
df = pd.read_csv("../data/emotion_dataset_raw.csv")

# Data Cleaning
df['Clean_Text'] = df['Text'].apply(nfx.remove_userhandles)
df['Clean_Text'] = df['Clean_Text'].apply(nfx.remove_stopwords)

# Features & Labels
Xfeatures = df['Clean_Text']
ylabels = df['Emotion']

# Split Data
x_train, x_test, y_train, y_test = train_test_split(Xfeatures, ylabels, 
                                                   test_size=0.3, 
                                                   random_state=42)



In [3]:
# Build SVM Pipeline
pipe_svm = Pipeline(steps=[
    ('tfidf', TfidfVectorizer()),  # Using TF-IDF instead of CountVectorizer
    ('svm', SVC(
        kernel='rbf',              # Radial Basis Function kernel
        C=1.0,                     # Regularization parameter
        gamma='scale',             # Kernel coefficient
        probability=True           # Enable probability estimates
    ))
])

# Train and Fit Data
pipe_svm.fit(x_train, y_train)



In [4]:
# Check Accuracy
svm_accuracy = pipe_svm.score(x_test, y_test)
print(f"SVM Model Accuracy: {svm_accuracy:.4f}")

# Classification Report
y_pred = pipe_svm.predict(x_test)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Confusion Matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))



SVM Model Accuracy: 0.6233

Classification Report:
              precision    recall  f1-score   support

       anger       0.70      0.50      0.58      1283
     disgust       0.91      0.03      0.07       292
        fear       0.84      0.63      0.72      1645
         joy       0.53      0.88      0.67      3311
     neutral       0.88      0.54      0.67       675
     sadness       0.61      0.55      0.58      2015
       shame       0.76      0.44      0.56        36
    surprise       0.72      0.34      0.46      1181

    accuracy                           0.62     10438
   macro avg       0.74      0.49      0.54     10438
weighted avg       0.67      0.62      0.61     10438


Confusion Matrix:
[[ 640    0   37  394   12  184    1   15]
 [  29   10    7  185    0   51    1    9]
 [  57    1 1033  411    4  104    0   35]
 [  60    0   53 2929   28  187    1   53]
 [  23    0    9  200  366   69    0    8]
 [  76    0   53  729    6 1114    2   35]
 [   0    0    2   16

In [5]:
# Save Model & Pipeline
pipeline_file = open("../models/emotion_classifier_pipe_svm.pkl", "wb")
joblib.dump(pipe_svm, pipeline_file)
pipeline_file.close()



In [6]:
# Make A Prediction
def predict_emotion_svm(text):
    # Preprocess
    text = nfx.remove_userhandles(text)
    text = nfx.remove_stopwords(text)
    
    # Predict
    prediction = pipe_svm.predict([text])
    probabilities = pipe_svm.predict_proba([text])
    
    return prediction[0], probabilities



In [7]:
# Example prediction
ex1 = "This book was so interesting it made me happy"
emotion, prob = predict_emotion_svm(ex1)
print(f"\nPredicted Emotion: {emotion}")
print(f"Probabilities: {prob}")

# To Know the classes
print("\nClass Labels:", pipe_svm.classes_)


Predicted Emotion: joy
Probabilities: [[2.27909925e-03 1.94856744e-03 1.30832435e-03 9.43239468e-01
  6.54920702e-03 8.86924254e-03 1.06729301e-04 3.56993622e-02]]

Class Labels: ['anger' 'disgust' 'fear' 'joy' 'neutral' 'sadness' 'shame' 'surprise']
