In [1]:
# importing dataset
import pandas as pd
data = pd.read_csv("text_emotion.csv")
data.head()

Unnamed: 0,tweet_id,sentiment,author,content
0,1956967341,empty,xoshayzers,@tiffanylue i know i was listenin to bad habi...
1,1956967666,sadness,wannamama,Layin n bed with a headache ughhhh...waitin o...
2,1956967696,sadness,coolfunky,Funeral ceremony...gloomy friday...
3,1956967789,enthusiasm,czareaquino,wants to hang out with friends SOON!
4,1956968416,neutral,xkilljoyx,@dannycastillo We want to trade with someone w...


In [2]:
data = data.drop(columns=["tweet_id","author"])

In [3]:
# preprocessing text
import string
import nltk
from nltk.tokenize import RegexpTokenizer
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer

punct = string.punctuation + "’"

def remove_punctuation(text):
    no_punct = "".join([c for c in text if c not in punct])
    return no_punct.lower()

data["content"] = data["content"].apply(lambda x: remove_punctuation(x))

tokenizer = RegexpTokenizer(r'\w+|\$[\d\.]+|\S+')

data["content"] = data["content"].apply(lambda x: tokenizer.tokenize(x))

def remove_stopwords(text):
    words = [w for w in text if w not in stopwords.words('english')]
    return words

data["content"] = data["content"].apply(lambda x: remove_stopwords(x))

data["content"] = data["content"].apply(lambda x:" ".join(x))

vectorizer = TfidfVectorizer()
vectorizer.fit(data['content'])

X = vectorizer.transform(data['content'])
Y = data['sentiment']

In [4]:
# dividing the data in validation and train set for evaluation
from sklearn import model_selection
from sklearn.metrics import classification_report

train_x, test_x, train_y, test_y = model_selection.train_test_split(X,Y)

In [5]:
# logistic regression
from sklearn.linear_model import LogisticRegression

lr_model = LogisticRegression(solver='lbfgs',multi_class='auto')
lr_model.fit(train_x,train_y)

lr_pred = lr_model.predict(test_x)
print(classification_report(test_y, lr_pred))

  'precision', 'predicted', average, warn_for)


              precision    recall  f1-score   support

       anger       0.00      0.00      0.00        31
     boredom       0.00      0.00      0.00        38
       empty       0.00      0.00      0.00       198
  enthusiasm       0.00      0.00      0.00       175
         fun       0.29      0.01      0.02       452
   happiness       0.35      0.36      0.35      1314
        hate       0.53      0.12      0.20       339
        love       0.53      0.34      0.41       968
     neutral       0.33      0.61      0.43      2234
      relief       0.32      0.02      0.03       381
     sadness       0.37      0.23      0.29      1265
    surprise       0.26      0.02      0.03       558
       worry       0.33      0.49      0.40      2047

    accuracy                           0.35     10000
   macro avg       0.26      0.17      0.17     10000
weighted avg       0.35      0.35      0.31     10000



In [6]:
# Naïve Bayes

from sklearn.naive_bayes import MultinomialNB
mnb_model = MultinomialNB()
mnb_model.fit(train_x,train_y)

mnb_pred = mnb_model.predict(test_x)
print(classification_report(test_y, mnb_pred))

              precision    recall  f1-score   support

       anger       0.00      0.00      0.00        31
     boredom       0.00      0.00      0.00        38
       empty       0.00      0.00      0.00       198
  enthusiasm       0.00      0.00      0.00       175
         fun       0.00      0.00      0.00       452
   happiness       0.37      0.10      0.15      1314
        hate       0.00      0.00      0.00       339
        love       0.65      0.13      0.22       968
     neutral       0.32      0.44      0.37      2234
      relief       0.00      0.00      0.00       381
     sadness       0.26      0.01      0.01      1265
    surprise       0.00      0.00      0.00       558
       worry       0.26      0.80      0.39      2047

    accuracy                           0.29     10000
   macro avg       0.14      0.11      0.09     10000
weighted avg       0.27      0.29      0.21     10000



  'precision', 'predicted', average, warn_for)


In [7]:
# Stochastic Gradient Descent
from sklearn.linear_model import SGDClassifier

sgd_model = SGDClassifier()
sgd_model.fit(train_x,train_y)

sgd_pred = sgd_model.predict(test_x)
print(classification_report(test_y, sgd_pred))

              precision    recall  f1-score   support

       anger       0.00      0.00      0.00        31
     boredom       0.00      0.00      0.00        38
       empty       0.03      0.01      0.01       198
  enthusiasm       0.03      0.01      0.01       175
         fun       0.10      0.04      0.05       452
   happiness       0.29      0.28      0.29      1314
        hate       0.33      0.19      0.24       339
        love       0.39      0.45      0.42       968
     neutral       0.35      0.48      0.41      2234
      relief       0.14      0.05      0.08       381
     sadness       0.28      0.26      0.27      1265
    surprise       0.12      0.03      0.05       558
       worry       0.33      0.43      0.37      2047

    accuracy                           0.32     10000
   macro avg       0.18      0.17      0.17     10000
weighted avg       0.29      0.32      0.30     10000



In [8]:
# K-Nearest Neighbours

from sklearn.neighbors import KNeighborsClassifier

knn_model = KNeighborsClassifier()
knn_model.fit(train_x,train_y)

knn_pred = knn_model.predict(test_x)
print(classification_report(test_y, knn_pred))

  'precision', 'predicted', average, warn_for)


              precision    recall  f1-score   support

       anger       0.00      0.00      0.00        31
     boredom       0.00      0.00      0.00        38
       empty       0.01      0.04      0.02       198
  enthusiasm       0.00      0.00      0.00       175
         fun       0.00      0.00      0.00       452
   happiness       0.26      0.03      0.05      1314
        hate       0.22      0.01      0.02       339
        love       0.57      0.08      0.13       968
     neutral       0.23      0.80      0.36      2234
      relief       0.25      0.00      0.01       381
     sadness       0.23      0.01      0.03      1265
    surprise       0.00      0.00      0.00       558
       worry       0.22      0.11      0.15      2047

    accuracy                           0.22     10000
   macro avg       0.15      0.08      0.06     10000
weighted avg       0.23      0.22      0.13     10000



In [9]:
# Decision Tree
from sklearn.tree import DecisionTreeClassifier

dt_model = DecisionTreeClassifier()
dt_model.fit(train_x,train_y)

dt_pred = dt_model.predict(test_x)
print(classification_report(test_y, dt_pred))

              precision    recall  f1-score   support

       anger       0.00      0.00      0.00        31
     boredom       0.00      0.00      0.00        38
       empty       0.01      0.01      0.01       198
  enthusiasm       0.04      0.02      0.03       175
         fun       0.08      0.06      0.07       452
   happiness       0.25      0.30      0.27      1314
        hate       0.22      0.16      0.19       339
        love       0.35      0.31      0.33       968
     neutral       0.35      0.41      0.38      2234
      relief       0.07      0.04      0.05       381
     sadness       0.23      0.22      0.23      1265
    surprise       0.08      0.06      0.07       558
       worry       0.28      0.32      0.30      2047

    accuracy                           0.27     10000
   macro avg       0.15      0.15      0.15     10000
weighted avg       0.25      0.27      0.26     10000



In [10]:
# Random Forest
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier()
rf_model.fit(train_x,train_y)

rf_pred = rf_model.predict(test_x)
print(classification_report(test_y, rf_pred))



              precision    recall  f1-score   support

       anger       0.00      0.00      0.00        31
     boredom       0.08      0.03      0.04        38
       empty       0.07      0.02      0.02       198
  enthusiasm       0.00      0.00      0.00       175
         fun       0.13      0.05      0.07       452
   happiness       0.27      0.31      0.29      1314
        hate       0.28      0.14      0.19       339
        love       0.40      0.32      0.36       968
     neutral       0.33      0.56      0.41      2234
      relief       0.13      0.03      0.05       381
     sadness       0.25      0.19      0.22      1265
    surprise       0.13      0.03      0.05       558
       worry       0.30      0.34      0.32      2047

    accuracy                           0.30     10000
   macro avg       0.18      0.15      0.16     10000
weighted avg       0.27      0.30      0.27     10000



  'precision', 'predicted', average, warn_for)


In [11]:
# Support Vector Machine
from sklearn import svm

svm_model = svm.SVC(kernel='linear')
svm_model.fit(train_x,train_y)

svm_pred = svm_model.predict(test_x)
print(classification_report(test_y, svm_pred))

  'precision', 'predicted', average, warn_for)


              precision    recall  f1-score   support

       anger       0.00      0.00      0.00        31
     boredom       0.00      0.00      0.00        38
       empty       0.00      0.00      0.00       198
  enthusiasm       0.00      0.00      0.00       175
         fun       0.10      0.00      0.01       452
   happiness       0.33      0.38      0.35      1314
        hate       0.44      0.15      0.22       339
        love       0.51      0.36      0.42       968
     neutral       0.35      0.60      0.44      2234
      relief       0.38      0.03      0.05       381
     sadness       0.37      0.22      0.27      1265
    surprise       0.28      0.03      0.05       558
       worry       0.34      0.48      0.40      2047

    accuracy                           0.35     10000
   macro avg       0.24      0.17      0.17     10000
weighted avg       0.33      0.35      0.31     10000

