In [1]:
# importing dataset
import pandas as pd
data = pd.read_csv("data.csv")
data = data.drop(columns=["Timestamp"])
data.head()

Unnamed: 0,Text,Class
0,"I’m pregnant. It will be scary, but I’m ready ...",Confident
1,"Please join me in sending prayers, strength an...",Sad
2,the feeling or belief that one can rely on som...,Confident
3,"When we feel confident in others, we know and ...",Confident
4,"When we feel confident in ourselves, we are ab...",Confident


In [2]:
# preprocessing text
import string
import nltk
from nltk.tokenize import RegexpTokenizer
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer

punct = string.punctuation + "’"

def remove_punctuation(text):
    no_punct = "".join([c for c in text if c not in punct])
    return no_punct.lower()

data["Text"] = data["Text"].apply(lambda x: remove_punctuation(x))

tokenizer = RegexpTokenizer(r'\w+|\$[\d\.]+|\S+')

data["Text"] = data["Text"].apply(lambda x: tokenizer.tokenize(x))

def remove_stopwords(text):
    words = [w for w in text if w not in stopwords.words('english')]
    return words

data["Text"] = data["Text"].apply(lambda x: remove_stopwords(x))

data["Text"] = data["Text"].apply(lambda x:" ".join(x))

vectorizer = TfidfVectorizer()
vectorizer.fit(data['Text'])

X = vectorizer.transform(data['Text'])
Y = data['Class']

In [3]:
# dividing the data in validation and train set for evaluation
from sklearn import model_selection
from sklearn.metrics import classification_report

train_x, test_x, train_y, test_y = model_selection.train_test_split(X,Y)

In [4]:
# logistic regression
from sklearn.linear_model import LogisticRegression

lr_model = LogisticRegression(solver='lbfgs',multi_class='auto')
lr_model.fit(train_x,train_y)

lr_pred = lr_model.predict(test_x)
print(classification_report(test_y, lr_pred))

              precision    recall  f1-score   support

   Confident       0.62      1.00      0.77         5
  Optimistic       0.00      0.00      0.00         2
 Pessimistic       0.00      0.00      0.00         1

    accuracy                           0.62         8
   macro avg       0.21      0.33      0.26         8
weighted avg       0.39      0.62      0.48         8



  'precision', 'predicted', average, warn_for)


In [5]:
# Naïve Bayes

from sklearn.naive_bayes import MultinomialNB
mnb_model = MultinomialNB()
mnb_model.fit(train_x,train_y)

mnb_pred = mnb_model.predict(test_x)
print(classification_report(test_y, mnb_pred))

              precision    recall  f1-score   support

   Confident       0.62      1.00      0.77         5
  Optimistic       0.00      0.00      0.00         2
 Pessimistic       0.00      0.00      0.00         1

    accuracy                           0.62         8
   macro avg       0.21      0.33      0.26         8
weighted avg       0.39      0.62      0.48         8



  'precision', 'predicted', average, warn_for)


In [6]:
# Stochastic Gradient Descent
from sklearn.linear_model import SGDClassifier

sgd_model = SGDClassifier()
sgd_model.fit(train_x,train_y)

sgd_pred = sgd_model.predict(test_x)
print(classification_report(test_y, sgd_pred))

              precision    recall  f1-score   support

   Confident       0.80      0.80      0.80         5
  Optimistic       0.33      0.50      0.40         2
 Pessimistic       0.00      0.00      0.00         1

    accuracy                           0.62         8
   macro avg       0.38      0.43      0.40         8
weighted avg       0.58      0.62      0.60         8



In [7]:
# K-Nearest Neighbours

from sklearn.neighbors import KNeighborsClassifier

knn_model = KNeighborsClassifier()
knn_model.fit(train_x,train_y)

knn_pred = knn_model.predict(test_x)
print(classification_report(test_y, knn_pred))

              precision    recall  f1-score   support

   Confident       0.80      0.80      0.80         5
  Optimistic       0.33      0.50      0.40         2
 Pessimistic       0.00      0.00      0.00         1

    accuracy                           0.62         8
   macro avg       0.38      0.43      0.40         8
weighted avg       0.58      0.62      0.60         8



  'precision', 'predicted', average, warn_for)


In [8]:
# Decision Tree
from sklearn.tree import DecisionTreeClassifier

dt_model = DecisionTreeClassifier()
dt_model.fit(train_x,train_y)

dt_pred = dt_model.predict(test_x)
print(classification_report(test_y, dt_pred))

              precision    recall  f1-score   support

   Confident       0.50      0.60      0.55         5
  Optimistic       0.00      0.00      0.00         2
 Pessimistic       0.00      0.00      0.00         1

    accuracy                           0.38         8
   macro avg       0.17      0.20      0.18         8
weighted avg       0.31      0.38      0.34         8



  'precision', 'predicted', average, warn_for)


In [10]:
# Random Forest
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier()
rf_model.fit(train_x,train_y)

rf_pred = rf_model.predict(test_x)
print(classification_report(test_y, rf_pred))

              precision    recall  f1-score   support

   Confident       0.62      1.00      0.77         5
  Optimistic       0.00      0.00      0.00         2
 Pessimistic       0.00      0.00      0.00         1

    accuracy                           0.62         8
   macro avg       0.21      0.33      0.26         8
weighted avg       0.39      0.62      0.48         8



  'precision', 'predicted', average, warn_for)


In [12]:
# Support Vector Machine
from sklearn import svm

svm_model = svm.SVC(kernel='linear')
svm_model.fit(train_x,train_y)

svm_pred = svm_model.predict(test_x)
print(classification_report(test_y, svm_pred))

              precision    recall  f1-score   support

   Confident       0.62      1.00      0.77         5
  Optimistic       0.00      0.00      0.00         2
 Pessimistic       0.00      0.00      0.00         1

    accuracy                           0.62         8
   macro avg       0.21      0.33      0.26         8
weighted avg       0.39      0.62      0.48         8



  'precision', 'predicted', average, warn_for)
