In [1]:
import pandas as pd
from xgboost import XGBClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.preprocessing import LabelEncoder
from sklearn.multiclass import OneVsRestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.feature_extraction.text import TfidfVectorizer

In [2]:
data = pd.read_csv('Data/Preprocessed/labelled_data.csv', usecols=['text', 'Sentiment'])
data.dropna(inplace=True)
data.head()

Unnamed: 0,text,Sentiment
0,Footage airport bomb Ivano-Frankivsk . # Ukrai...,-1.0
1,Ukraine MP Sophia Fedyna tell ground situation...,1.0
2,A cruise missile fire Russian army fell Kiev #...,-1.0
3,ðŸ‡ºðŸ‡¦ 53rd Mechanized Brigade continue suffer los...,-1.0
4,"Now wrong , absolutely wrong , @ JoeBiden , @ ...",-1.0


In [3]:
vectorizer = TfidfVectorizer(sublinear_tf=True)
X = vectorizer.fit_transform(data.text)
lab = LabelEncoder()
Y = lab.fit_transform(data.Sentiment)

In [4]:
nb = OneVsRestClassifier(MultinomialNB())

score_nb = cross_val_score(nb, X, Y, cv=14, scoring='accuracy')
print(f"Accuracies : {score_nb}\nAverage Accuracy : {score_nb.mean()}")

Accuracies : [0.5365442  0.55238219 0.58587563 0.58795275 0.59574192 0.6520836
 0.62988446 0.68475721 0.66865749 0.71838484 0.65684238 0.60997144
 0.63334199 0.64359907]
Average Accuracy : 0.6254299396747227


In [5]:
lreg = LogisticRegression(multi_class='ovr', solver='saga')

score_lreg = cross_val_score(lreg, X, Y, cv=14, scoring = 'accuracy')
print(f"Accuracies : {score_lreg}\nAverage Accuracy : {score_lreg.mean()}") 

Accuracies : [0.84655329 0.84811113 0.861872   0.84772167 0.84084123 0.86304037
 0.83629755 0.86146455 0.84510517 0.87094261 0.85120748 0.87042327
 0.84938977 0.83861335]
Average Accuracy : 0.8522559591457484


In [6]:
xgb = XGBClassifier()
score_xgb = cross_val_score(xgb, X, Y, cv=14, scoring='accuracy')
print(f"Accuracies : {score_xgb}\nAverage Accuracy : {score_xgb.mean()}")

Accuracies : [0.79579385 0.792808   0.81331949 0.79955861 0.8047514  0.82824873
 0.78787485 0.8170605  0.80498572 0.8209556  0.80511555 0.81783952
 0.79537782 0.78343287]
Average Accuracy : 0.8047944656774989
