In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('./dataset/preprocessed.csv')

X = df['text_clean']
y = df['sentiment']

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

In [9]:
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer

In [10]:
bow = CountVectorizer(max_features=50)

In [11]:
X_train = bow.fit_transform(X_train)

In [13]:
tfidf = TfidfTransformer()

In [14]:
X_train = tfidf.fit_transform(X_train)

In [16]:
X_train = X_train.toarray()

In [18]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
lda = LinearDiscriminantAnalysis()

In [19]:
lda.fit(X_train, y_train)

LinearDiscriminantAnalysis()

In [20]:
X_test = bow.transform(X_test).toarray()
X_test = tfidf.transform(X_test).toarray()

In [21]:
lda_pred = lda.predict(X_test)

In [22]:
from sklearn.metrics import classification_report
print(classification_report(y_test, lda_pred))

              precision    recall  f1-score   support

           0       0.96      0.88      0.92      1579
           1       0.95      0.95      0.95      1566
           2       0.96      0.93      0.94      1542
           3       0.94      0.75      0.83      1462
           4       0.64      0.88      0.74      1274

    accuracy                           0.88      7423
   macro avg       0.89      0.88      0.88      7423
weighted avg       0.90      0.88      0.88      7423



In [23]:
from sklearn.naive_bayes import MultinomialNB
nb = MultinomialNB()
nb.fit(X_train, y_train)

MultinomialNB()

In [24]:
nb_pred = nb.predict(X_test)
print(classification_report(y_test, nb_pred))

              precision    recall  f1-score   support

           0       0.76      0.95      0.84      1579
           1       0.82      0.95      0.88      1566
           2       0.91      0.92      0.92      1542
           3       0.90      0.71      0.79      1462
           4       0.57      0.42      0.49      1274

    accuracy                           0.80      7423
   macro avg       0.79      0.79      0.78      7423
weighted avg       0.80      0.80      0.79      7423



In [27]:
from sklearn.tree import DecisionTreeClassifier
tree = DecisionTreeClassifier()
tree.fit(X_train, y_train)

DecisionTreeClassifier()

In [28]:
tree_pred = tree.predict(X_test)
print(classification_report(y_test, tree_pred))

              precision    recall  f1-score   support

           0       0.95      0.89      0.92      1579
           1       0.97      0.96      0.97      1566
           2       0.96      0.94      0.95      1542
           3       0.90      0.78      0.84      1462
           4       0.66      0.83      0.74      1274

    accuracy                           0.89      7423
   macro avg       0.89      0.88      0.88      7423
weighted avg       0.90      0.89      0.89      7423



In [29]:
from sklearn.svm import SVC
svm = SVC()
svm.fit(X_train, y_train)

SVC()

In [30]:
svm_pred = svm.predict(X_test)
print(classification_report(y_test, svm_pred))

              precision    recall  f1-score   support

           0       0.97      0.89      0.93      1579
           1       0.97      0.97      0.97      1566
           2       0.98      0.94      0.96      1542
           3       0.93      0.78      0.85      1462
           4       0.67      0.88      0.76      1274

    accuracy                           0.90      7423
   macro avg       0.90      0.90      0.90      7423
weighted avg       0.91      0.90      0.90      7423

