In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support
import joblib

In [2]:
data = pd.read_csv('Data/Preprocessed/labelled_data.csv', usecols=['text', 'Sentiment'])
data.dropna(inplace=True)
data.head()

Unnamed: 0,text,Sentiment
0,Footage airport bomb Ivano-Frankivsk . # Ukrai...,-1.0
1,Ukraine MP Sophia Fedyna tell ground situation...,1.0
2,A cruise missile fire Russian army fell Kiev #...,-1.0
3,🇺🇦 53rd Mechanized Brigade continue suffer los...,-1.0
4,"Now wrong , absolutely wrong , @ JoeBiden , @ ...",-1.0


In [3]:
X_train, X_test, y_train, y_test = train_test_split(data.text, data.Sentiment, test_size=0.07, shuffle=True, random_state=42)

In [4]:
vectorizer = TfidfVectorizer(sublinear_tf=True)
vectorizer.fit(X_train)
X_train_tfidf = vectorizer.transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

## Logistic Regression Classifier

In [24]:
from sklearn.linear_model import LogisticRegression

lreg = LogisticRegression(multi_class='ovr', solver='saga')

lreg.fit(X_train_tfidf, y_train)

lreg_accuracy = lreg.score(X_test_tfidf, y_test)
lreg_error = 1-lreg_accuracy
y_pred_lreg = lreg.predict(X_test_tfidf)

lreg_precision, lreg_recall, lreg_f1, lreg_support = precision_recall_fscore_support(y_test, y_pred_lreg, average='weighted')

# Print the results
print("Accuracy:", lreg_accuracy)
print("Error:", lreg_error)
print("Precision:", lreg_precision)
print("Recall:", lreg_recall)
print("F1-score:", lreg_f1)

Accuracy: 0.8656775731885018
Error: 0.1343224268114982
Precision: 0.8652518586490779
Recall: 0.8656775731885018
F1-score: 0.865185875877088


In [34]:
joblib.dump(lreg, 'Models/logisticRegression.pk1')

['Models/logisticRegression.pk1']

## Decision Tree Classifier

In [25]:
from sklearn.tree import DecisionTreeClassifier

tree = DecisionTreeClassifier()

tree.fit(X_train_tfidf, y_train)

tree_accuracy = tree.score(X_test_tfidf, y_test)
tree_error = 1-tree_accuracy
y_pred_tree = tree.predict(X_test_tfidf)

tree_precision, tree_recall, tree_f1, tree_support = precision_recall_fscore_support(y_test, y_pred_tree, average='weighted')

# Print the results
print("Accuracy:", tree_accuracy)
print("Error:", tree_error)
print("Precision:", tree_precision)
print("Recall:", tree_recall)
print("F1-score:", tree_f1)

Accuracy: 0.756524042919592
Error: 0.24347595708040803
Precision: 0.7593589121739158
Recall: 0.756524042919592
F1-score: 0.7572177780630233


In [35]:
joblib.dump(tree, 'Models/decisionTree.pk1')

['Models/decisionTree.pk1']

## KNN Classifier

In [26]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=50)

knn.fit(X_train_tfidf, y_train)

knn_accuracy = knn.score(X_test_tfidf, y_test)
y_pred_knn = knn.predict(X_test_tfidf)
knn_error = 1-knn_accuracy

knn_precision, knn_recall, knn_f1, knn_support = precision_recall_fscore_support(y_test, y_pred_knn, average='weighted')

# Print the results
print("Accuracy:", knn_accuracy)
print("Error:", knn_error)
print("Precision:", knn_precision)
print("Recall:", knn_recall)
print("F1-score:", knn_f1)

Accuracy: 0.6690952444032322
Error: 0.3309047555967678
Precision: 0.6720339375482393
Recall: 0.6690952444032322
F1-score: 0.6586883025222584


In [36]:
joblib.dump(knn, 'Models/kNearestNeighbours.pk1')

['Models/kNearestNeighbours.pk1']

## Random Forest Classifier

In [22]:
from sklearn.ensemble import RandomForestClassifier

forest = RandomForestClassifier(n_estimators=50)

forest.fit(X_train_tfidf, y_train)

forest_accuracy = forest.score(X_test_tfidf, y_test)
forest_error = 1-forest_accuracy
y_pred_forest = forest.predict(X_test_tfidf)

forest_precision, forest_recall, forest_f1, forest_support = precision_recall_fscore_support(y_test, y_pred_forest, average='weighted')

# Print the results
print("Accuracy:", forest_accuracy)
print("Error:", forest_error)
print("Precision:", forest_precision)
print("Recall:", forest_recall)
print("F1-score:", forest_f1)

Accuracy: 0.7948072592396344
Error: 0.20519274076036564
Precision: 0.7980302027817792
Recall: 0.7948072592396344
F1-score: 0.7945693111200129


In [37]:
joblib.dump(forest, 'Models/randomForest.pk1')

['Models/randomForest.pk1']

## Naive Bayes Classifier

In [23]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.multiclass import OneVsRestClassifier

nb = OneVsRestClassifier(MultinomialNB())
nb.fit(X_train_tfidf, y_train)

nb_accuracy = nb.score(X_test_tfidf, y_test)
nb_error = 1-nb_accuracy
y_pred_nb = nb.predict(X_test_tfidf)

nb_precision, nb_recall, nb_f1, nb_support = precision_recall_fscore_support(y_test, y_pred_nb, average='weighted')

# Print the results
print("Accuracy:", nb_accuracy)
print("Error:", nb_error)
print("Precision:", nb_precision)
print("Recall:", nb_recall)
print("F1-score:", nb_f1)

Accuracy: 0.6426016690952444
Error: 0.3573983309047556
Precision: 0.7271863583753823
Recall: 0.6426016690952444
F1-score: 0.5877612647472671


In [38]:
joblib.dump(nb, 'Models/naiveBayes.pk1')

['Models/naiveBayes.pk1']

## SVM Classifier

In [39]:
from sklearn.svm import SVC

svc = SVC(decision_function_shape='ovr', kernel='rbf')

svc.fit(X_train_tfidf, y_train)

svc_accuracy = svc.score(X_test_tfidf, y_test)
svc_error = 1-svc_accuracy
y_pred_svc = svc.predict(X_test_tfidf)

svc_precision, svc_recall, svc_f1, svc_support = precision_recall_fscore_support(y_test, y_pred_svc, average='weighted')

# Print the results
print("Accuracy:", svc_accuracy)
print("Error:", svc_error)
print("Precision:", svc_precision)
print("Recall:", svc_recall)
print("F1-score:", svc_f1)

Accuracy: 0.8768048748178566
Error: 0.12319512518214337
Precision: 0.8767480422787965
Recall: 0.8768048748178566
F1-score: 0.8767566916475652


In [40]:
joblib.dump(svc, 'Models/svm.pk1')

['Models/svm.pk1']

## XGBoost Classifier

In [6]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.fit_transform(y_test)

In [7]:
from xgboost import XGBClassifier

xgb = XGBClassifier()
xgb.fit(X_train_tfidf, y_train)

xgb_accuracy = xgb.score(X_test_tfidf, y_test)
xgb_error = 1-xgb_accuracy
y_pred_xgb = xgb.predict(X_test_tfidf)

xgb_precision,xgb_recall, xgb_f1, xgb_support = precision_recall_fscore_support(y_test, y_pred_xgb, average='weighted')

# Print the results
print("Accuracy:", xgb_accuracy)
print("Error:", xgb_error)
print("Precision:", xgb_precision)
print("Recall:", xgb_recall)
print("F1-score:", xgb_f1)

Accuracy: 0.8054046893628295
Error: 0.1945953106371705
Precision: 0.8170799325019951
Recall: 0.8054046893628295
F1-score: 0.8074553334103636
