In [None]:
%pip install neattext seaborn pandas numpy

In [None]:
import seaborn as sns
import pandas as pd
import numpy as np

In [None]:
# Load data
data = pd.read_csv("Dataset/text.csv")

In [None]:
# Initial data exploration
data.head()

In [None]:
import neattext.functions as nfx

In [None]:
# Preprocess text data
data['Clean_Content'] = data['text'].apply(nfx.remove_userhandles)
data['Clean_Content'] = data['Clean_Content'].apply(nfx.remove_stopwords)

In [None]:
label_to_category = {
    0: 'sadness',
    1: 'joy',
    2: 'love',
    3: 'anger',
    4: 'fear',
    5: 'surprise'
}
data['sentiment'] = data['label'].map(label_to_category)

In [None]:
sns.countplot(x='sentiment', data=data)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

In [None]:
# Split data
x = data['Clean_Content']
y = data['sentiment']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)


## Training Model

In [None]:
# Logistic Regression
pipe_lr = Pipeline([('cv', CountVectorizer()), ('lr', LogisticRegression())])
pipe_lr.fit(x_train, y_train)

In [None]:
#Support vector machines
pipe_svm = Pipeline([('cv', CountVectorizer()), ('svc', SVC(kernel='rbf', C=10))])
pipe_svm.fit(x_train, y_train)

In [None]:
#Randorm Forest Classifier
pipe_rf = Pipeline([('cv', CountVectorizer()), ('rf', RandomForestClassifier(n_estimators=10))])
pipe_rf.fit(x_train, y_train)

In [None]:
import joblib


In [None]:
# Evaluate models and serialize the selected model
print(pipe_lr.score(x_test, y_test))
print(pipe_svm.score(x_test, y_test))
print(pipe_rf.score(x_test, y_test))

In [None]:
joblib.dump(pipe_lr, "text_emotion_6param.pkl")