In [16]:
import pandas as pd

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder

from sklearn.svm import LinearSVC

In [None]:
# The train, val and test sets are present in this drive link
# Please download the files and upload them in the content folder first

# https://drive.google.com/drive/folders/12GqRiyFHPUauuY0IIMlycVPRv_DuG5FI?usp=sharing

In [34]:
df1 = pd.read_csv('/content/drive/MyDrive/Datasets/Sentiment Analysis/train.txt', sep = ';', names = ['Text', 'Mood'])
display(df1.head())

cv = TfidfVectorizer(stop_words = 'english', min_df = 3, max_df = 300, ngram_range = (1,3))
X_train = cv.fit_transform(df1.iloc[:, 0])

le = LabelEncoder()
y_train = le.fit_transform(df1.iloc[:, 1])

Unnamed: 0,Text,Mood
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger


In [35]:
df2 = pd.read_csv('/content/drive/MyDrive/Datasets/Sentiment Analysis/val.txt', sep = ';', names = ['Text', 'Mood'])
df3 = pd.read_csv('/content/drive/MyDrive/Datasets/Sentiment Analysis/test.txt', sep = ';', names = ['Text', 'Mood'])

X_val = cv.transform(df2.iloc[:, 0])
y_val = le.transform(df2.iloc[:, 1])

X_test = cv.transform(df3.iloc[:, 0])
y_test = le.transform(df3.iloc[:, 1])

In [36]:
svc = LinearSVC(loss = 'hinge')
svc.fit(X_train, y_train)

score_svc_val = svc.score(X_val, y_val)
print('Val set accuracy using SVC: ', score_svc_val*100)

score_svc_test = svc.score(X_test, y_test)
print('Test set accuracy using SVC: ', score_svc_test*100)

Val set accuracy using SVC:  90.85
Test set accuracy using SVC:  90.5


Other models, like Logistic Regression and Multinomial Naive Bayes, were also trained and tested, but LinearSVC() gave the best results.

In [37]:
# The below function extracts the word or phrase which, as per the model, best describes/supports the mood it predicted

def abc(x):
  s1 = x.toarray()
  v = []

  for i in range(s1.shape[0]):
    x = True
    d = 0

    while x:
      if s1[i, d] == s1[i].max():
        break

      else:
        d = d+1
    v.append(cv.get_feature_names()[d])
  return v

In [38]:
# Printing the first 10 rows of the test dataset, with its predicted and actual label and also the supporting text

y_pred_svc = svc.predict(X_test[0:10])    

a = abc(X_test[0:10])
b = le.inverse_transform(y_pred_svc)
c = le.inverse_transform(y_test)

print('Predicted vs Actual mood using SVC\n')
for h, i, j, k in zip(df3.iloc[0:10, 0], a, b, c):
  print(h)
  print('Predicted: ', j, '\tActual: ', k, '\tSupporting Text: ', i)
  print()

Predicted vs Actual mood using SVC

im feeling rather rotten so im not very ambitious right now
Predicted:  sadness 	Actual:  sadness 	Supporting Text:  feeling rotten

im updating my blog because i feel shitty
Predicted:  sadness 	Actual:  sadness 	Supporting Text:  blog feel

i never make her separate from me because i don t ever want her to feel like i m ashamed with her
Predicted:  sadness 	Actual:  sadness 	Supporting Text:  separate

i left with my bouquet of red and yellow tulips under my arm feeling slightly more optimistic than when i arrived
Predicted:  joy 	Actual:  joy 	Supporting Text:  arrived

i was feeling a little vain when i did this one
Predicted:  sadness 	Actual:  sadness 	Supporting Text:  vain

i cant walk into a shop anywhere where i do not feel uncomfortable
Predicted:  fear 	Actual:  fear 	Supporting Text:  shop

i felt anger when at the end of a telephone call
Predicted:  sadness 	Actual:  anger 	Supporting Text:  anger

i explain why i clung to a relationshi

In [39]:
text = [input("Enter the text: ")]
text_ = cv.transform(text)

pred = svc.predict(text_)
print("The mood is: ", le.inverse_transform(pred))

Enter the text: He was really irritated yesterday
The mood is:  ['anger']
