### Natural Language Processing


In [54]:
import pandas as pd
import numpy as np
import re
import nltk
import spacy
import string
from sklearn.metrics import accuracy_score, classification_report

#### Reading Data

In [2]:
from google.colab import files
uploaded = files.upload()

Saving Sentiment Analysis Dataset.xlsx to Sentiment Analysis Dataset (4).xlsx


In [3]:
import io
df = pd.read_excel(io.BytesIO(uploaded['Sentiment Analysis Dataset.xlsx']))
df["Sentence"] = df["Sentence"].astype(str)

#### Preprocessing
  Dropping irrelevant column

In [4]:
df.drop('id', inplace=True, axis=1)

  Lowering text

In [5]:
df = df.applymap(lambda x: x.lower() if pd.notnull(x) else x)
df.head(5)

Unnamed: 0,Sentence,label
0,don't worry i'm girlhmm how do i know if you ...,others
1,when did i?saw many times i think -_-no. i nev...,angry
2,byby google chromewhere you live,others
3,u r ridiculousi might be ridiculous but i am t...,angry
4,just for time passwt do u do 4 a living thenmaybe,others


  Removing Punctuations

In [6]:
Punctuations = string.punctuation

df["Sentence"] = df["Sentence"].apply(lambda x: x.translate(str.maketrans('', '', Punctuations)))
df.tail(5)

Unnamed: 0,Sentence,label
30154,dilutediluting is for the weakwhat is the mean...,others
30155,i dont worki could take your shifti am a student,others
30156,im not getting you 😭😭😭why are you cryingbecaus...,sad
30157,hahano seriously what is up with that oohad yo...,others
30158,do you singyea a lilnice,others


  Removing Stopwords

In [7]:
# import nltk
# nltk.download('stopwords')
stopwords = nltk.corpus.stopwords.words('english')
more_stop_words = ['u', 'im', 'dont', 'nope', 'nd', 'ur', 'r', '4', 'hv', 'abt']
stopwords.extend(more_stop_words)

df['Sentence'] = df['Sentence'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stopwords)]))
df.tail(5)

Unnamed: 0,Sentence,label
30154,dilutediluting weakwhat meaning weak,others
30155,worki could take shifti student,others
30156,getting 😭😭😭why cryingbecause making sense,sad
30157,hahano seriously oohad breakfast,others
30158,singyea lilnice,others


In [8]:
df.head(10)

Unnamed: 0,Sentence,label
0,worry girlhmm know arewhats name,others
1,isaw many times think never saw,angry
2,byby google chromewhere live,others
3,ridiculousi might ridiculous telling truthu li...,angry
4,time passwt living thenmaybe,others
5,dog personyoure rudewhaaaat,others
6,whatsupnothing much sitting sipping watching t...,others
7,okok backso,others
8,reallyreally really really really reallyy sayi...,others
9,bayin bay😘 love,others


  Converting emoticons to meaningful words

In [9]:
emoticons = {
    u":‑\)":"Happy face or smiley",
    u":\)":"Happy face or smiley",
    u":-\]":"Happy face or smiley",
    u":\]":"Happy face or smiley",
    u":-3":"Happy face smiley",
    u":3":"Happy face smiley",
    u":->":"Happy face smiley",
    u":>":"Happy face smiley",
    u"8-\)":"Happy face smiley",
    u":o\)":"Happy face smiley",
    u":-\}":"Happy face smiley",
    u":\}":"Happy face smiley",
    u":-\)":"Happy face smiley",
    u":c\)":"Happy face smiley",
    u":\^\)":"Happy face smiley",
    u"=\]":"Happy face smiley",
    u"=\)":"Happy face smiley",
    u":‑D":"Laughing, big grin or laugh with glasses",
    u":D":"Laughing, big grin or laugh with glasses",
    u"8‑D":"Laughing, big grin or laugh with glasses",
    u"8D":"Laughing, big grin or laugh with glasses",
    u"X‑D":"Laughing, big grin or laugh with glasses",
    u"XD":"Laughing, big grin or laugh with glasses",
    u"=D":"Laughing, big grin or laugh with glasses",
    u"=3":"Laughing, big grin or laugh with glasses",
    u"B\^D":"Laughing, big grin or laugh with glasses",
    u":-\)\)":"Very happy",
    u":‑\(":"Frown, sad, andry or pouting",
    u":-\(":"Frown, sad, andry or pouting",
    u":\(":"Frown, sad, andry or pouting",
    u":‑c":"Frown, sad, andry or pouting",
    u":c":"Frown, sad, andry or pouting",
    u":‑<":"Frown, sad, andry or pouting",
    u":<":"Frown, sad, andry or pouting",
    u":‑\[":"Frown, sad, andry or pouting",
    u":\[":"Frown, sad, andry or pouting",
    u":-\|\|":"Frown, sad, andry or pouting",
    u">:\[":"Frown, sad, andry or pouting",
    u":\{":"Frown, sad, andry or pouting",
    u":@":"Frown, sad, andry or pouting",
    u">:\(":"Frown, sad, andry or pouting",
    u":'‑\(":"Crying",
    u":'\(":"Crying",
    u":'‑\)":"Tears of happiness",
    u":'\)":"Tears of happiness",
    u"D‑':":"Horror",
    u"D:<":"Disgust",
    u"D:":"Sadness",
    u"D8":"Great dismay",
    u"D;":"Great dismay",
    u"D=":"Great dismay",
    u"DX":"Great dismay",
    u":‑O":"Surprise",
    u":O":"Surprise",
    u":‑o":"Surprise",
    u":o":"Surprise",
    u":-0":"Shock",
    u"8‑0":"Yawn",
    u">:O":"Yawn",
    u":-\*":"Kiss",
    u":\*":"Kiss",
    u":X":"Kiss",
    u";‑\)":"Wink or smirk",
    u";\)":"Wink or smirk",
    u"\*-\)":"Wink or smirk",
    u"\*\)":"Wink or smirk",
    u";‑\]":"Wink or smirk",
    u";\]":"Wink or smirk",
    u";\^\)":"Wink or smirk",
    u":‑,":"Wink or smirk",
    u";D":"Wink or smirk",
    u":‑P":"Tongue sticking out, cheeky, playful or blowing a raspberry",
    u":P":"Tongue sticking out, cheeky, playful or blowing a raspberry",
    u"X‑P":"Tongue sticking out, cheeky, playful or blowing a raspberry",
    u"XP":"Tongue sticking out, cheeky, playful or blowing a raspberry",
    u":‑Þ":"Tongue sticking out, cheeky, playful or blowing a raspberry",
    u":Þ":"Tongue sticking out, cheeky, playful or blowing a raspberry",
    u":b":"Tongue sticking out, cheeky, playful or blowing a raspberry",
    u"d:":"Tongue sticking out, cheeky, playful or blowing a raspberry",
    u"=p":"Tongue sticking out, cheeky, playful or blowing a raspberry",
    u">:P":"Tongue sticking out, cheeky, playful or blowing a raspberry",
    u":‑/":"Skeptical, annoyed, undecided, uneasy or hesitant",
    u":/":"Skeptical, annoyed, undecided, uneasy or hesitant",
    u":-[.]":"Skeptical, annoyed, undecided, uneasy or hesitant",
    u">:[(\\\)]":"Skeptical, annoyed, undecided, uneasy or hesitant",
    u">:/":"Skeptical, annoyed, undecided, uneasy or hesitant",
    u":[(\\\)]":"Skeptical, annoyed, undecided, uneasy or hesitant",
    u"=/":"Skeptical, annoyed, undecided, uneasy or hesitant",
    u"=[(\\\)]":"Skeptical, annoyed, undecided, uneasy or hesitant",
    u":L":"Skeptical, annoyed, undecided, uneasy or hesitant",
    u"=L":"Skeptical, annoyed, undecided, uneasy or hesitant",
    u":S":"Skeptical, annoyed, undecided, uneasy or hesitant",
    u":‑\|":"Straight face",
    u":\|":"Straight face",
    u":$":"Embarrassed or blushing",
    u":‑x":"Sealed lips or wearing braces or tongue-tied",
    u":x":"Sealed lips or wearing braces or tongue-tied",
    u":‑#":"Sealed lips or wearing braces or tongue-tied",
    u":#":"Sealed lips or wearing braces or tongue-tied",
    u":‑&":"Sealed lips or wearing braces or tongue-tied",
    u":&":"Sealed lips or wearing braces or tongue-tied",
    u"O:‑\)":"Angel, saint or innocent",
    u"O:\)":"Angel, saint or innocent",
    u"0:‑3":"Angel, saint or innocent",
    u"0:3":"Angel, saint or innocent",
    u"0:‑\)":"Angel, saint or innocent",
    u"0:\)":"Angel, saint or innocent",
    u":‑b":"Tongue sticking out, cheeky, playful or blowing a raspberry",
    u"0;\^\)":"Angel, saint or innocent",
    u">:‑\)":"Evil or devilish",
    u">:\)":"Evil or devilish",
    u"\}:‑\)":"Evil or devilish",
    u"\}:\)":"Evil or devilish",
    u"3:‑\)":"Evil or devilish",
    u"3:\)":"Evil or devilish",
    u">;\)":"Evil or devilish",
    u"\|;‑\)":"Cool",
    u"\|‑O":"Bored",
    u":‑J":"Tongue-in-cheek",
    u"#‑\)":"Party all night",
    u"%‑\)":"Drunk or confused",
    u"%\)":"Drunk or confused",
    u":-###..":"Being sick",
    u":###..":"Being sick",
    u"<:‑\|":"Dump",
    u"\(>_<\)":"Troubled",
    u"\(>_<\)>":"Troubled",
    u"\(';'\)":"Baby",
    u"\(\^\^>``":"Nervous or Embarrassed or Troubled or Shy or Sweat drop",
    u"\(\^_\^;\)":"Nervous or Embarrassed or Troubled or Shy or Sweat drop",
    u"\(-_-;\)":"Nervous or Embarrassed or Troubled or Shy or Sweat drop",
    u"\(~_~;\) \(・\.・;\)":"Nervous or Embarrassed or Troubled or Shy or Sweat drop",
    u"\(-_-\)zzz":"Sleeping",
    u"\(\^_-\)":"Wink",
    u"\(\(\+_\+\)\)":"Confused",
    u"\(\+o\+\)":"Confused",
    u"\(o\|o\)":"Ultraman",
    u"\^_\^":"Joyful",
    u"\(\^_\^\)/":"Joyful",
    u"\(\^O\^\)／":"Joyful",
    u"\(\^o\^\)／":"Joyful",
    u"\(__\)":"Kowtow as a sign of respect, or dogeza for apology",
    u"_\(\._\.\)_":"Kowtow as a sign of respect, or dogeza for apology",
    u"<\(_ _\)>":"Kowtow as a sign of respect, or dogeza for apology",
    u"<m\(__\)m>":"Kowtow as a sign of respect, or dogeza for apology",
    u"m\(__\)m":"Kowtow as a sign of respect, or dogeza for apology",
    u"m\(_ _\)m":"Kowtow as a sign of respect, or dogeza for apology",
    u"\('_'\)":"Sad or Crying",
    u"\(/_;\)":"Sad or Crying",
    u"\(T_T\) \(;_;\)":"Sad or Crying",
    u"\(;_;":"Sad of Crying",
    u"\(;_:\)":"Sad or Crying",
    u"\(;O;\)":"Sad or Crying",
    u"\(:_;\)":"Sad or Crying",
    u"\(ToT\)":"Sad or Crying",
    u";_;":"Sad or Crying",
    u";-;":"Sad or Crying",
    u";n;":"Sad or Crying",
    u";;":"Sad or Crying",
    u"Q\.Q":"Sad or Crying",
    u"T\.T":"Sad or Crying",
    u"QQ":"Sad or Crying",
    u"Q_Q":"Sad or Crying",
    u"\(-\.-\)":"Shame",
    u"\(-_-\)":"Shame",
    u"\(一一\)":"Shame",
    u"\(；一_一\)":"Shame",
    u"\(=_=\)":"Tired",
    u"\(=\^\·\^=\)":"cat",
    u"\(=\^\·\·\^=\)":"cat",
    u"=_\^=	":"cat",
    u"\(\.\.\)":"Looking down",
    u"\(\._\.\)":"Looking down",
    u"\^m\^":"Giggling with hand covering mouth",
    u"\(\・\・?":"Confusion",
    u"\(?_?\)":"Confusion",
    u">\^_\^<":"Normal Laugh",
    u"<\^!\^>":"Normal Laugh",
    u"\^/\^":"Normal Laugh",
    u"\（\*\^_\^\*）" :"Normal Laugh",
    u"\(\^<\^\) \(\^\.\^\)":"Normal Laugh",
    u"\(^\^\)":"Normal Laugh",
    u"\(\^\.\^\)":"Normal Laugh",
    u"\(\^_\^\.\)":"Normal Laugh",
    u"\(\^_\^\)":"Normal Laugh",
    u"\(\^\^\)":"Normal Laugh",
    u"\(\^J\^\)":"Normal Laugh",
    u"\(\*\^\.\^\*\)":"Normal Laugh",
    u"\(\^—\^\）":"Normal Laugh",
    u"\(#\^\.\^#\)":"Normal Laugh",
    u"\（\^—\^\）":"Waving",
    u"\(;_;\)/~~~":"Waving",
    u"\(\^\.\^\)/~~~":"Waving",
    u"\(-_-\)/~~~ \($\·\·\)/~~~":"Waving",
    u"\(T_T\)/~~~":"Waving",
    u"\(ToT\)/~~~":"Waving",
    u"\(\*\^0\^\*\)":"Excited",
    u"\(\*_\*\)":"Amazed",
    u"\(\*_\*;":"Amazed",
    u"\(\+_\+\) \(@_@\)":"Amazed",
    u"\(\*\^\^\)v":"Laughing,Cheerful",
    u"\(\^_\^\)v":"Laughing,Cheerful",
    u"\(\(d[-_-]b\)\)":"Headphones,Listening to music",
    u'\(-"-\)':"Worried",
    u"\(ーー;\)":"Worried",
    u"\(\^0_0\^\)":"Eyeglasses",
    u"\(\＾ｖ\＾\)":"Happy",
    u"\(\＾ｕ\＾\)":"Happy",
    u"\(\^\)o\(\^\)":"Happy",
    u"\(\^O\^\)":"Happy",
    u"\(\^o\^\)":"Happy",
    u"\)\^o\^\(":"Happy",
    u":O o_O":"Surprised",
    u"o_0":"Surprised",
    u"o\.O":"Surpised",
    u"\(o\.o\)":"Surprised",
    u"oO":"Surprised",
    u"\(\*￣m￣\)":"Dissatisfied",
    u"\(‘A`\)":"Snubbed or Deflated"
}

emoticon_regex = re.compile(u'(' + u'|'.join(k for k in emoticons) + u')')
df["Sentence"] = df["Sentence"].apply(lambda x: emoticon_regex.sub(r'', x))

df.tail(50)

Unnamed: 0,Sentence,label
30109,ask question go itwould fuck chat box,angry
30110,hey today results declaredwaiting results cont...,sad
30111,ok ill talk leavehow younot ok,sad
30112,like guidei needed people could reply language...,angry
30113,like page also follow pagethank 😉,others
30114,saved 🌍do shipno fly drone,others
30115,whomthat answer question bi didnt get,others
30116,losingabout joining themcool,others
30117,intentimes hehehethis d😎,others
30118,going meet frndssame you😂😂😂😂👌,happy


  Converting emojies to meaningful words

In [10]:
emojies_unicodes = {
    u':anger_symbol :': u'\U0001F4A2',
    u':angry_face :': u'\U0001F620',
    u':broken_heart :': u'\U0001F494',
    u':loudly_crying_face :': u'\U0001F62D',
    u':angry_face_with_horns :': u'\U0001F47F',
    u':anguished_face :': u'\U0001F627',
    u':crying_cat_face :': u'\U0001F63F',
    u':cat_face_with_wry_smile :': u'\U0001F63C',
    u':crying_face ': u'\U0001F622',
    u':disappointed_but_relieved_face ': u'\U0001F625',
    u':disappointed_face ': u'\U0001F61E',
    u':dizzy ': u'\U0001F4AB',
    u':dizzy_face ': u'\U0001F635',
    u':face_blowing_a_kiss ': u'\U0001F618',
    u':face_savouring_delicious_food ': u'\U0001F60B',
    u':face_screaming_in_fear ': u'\U0001F631',
    u':face_with_cold_sweat ': u'\U0001F613',
    u':face_with_head-bandage: ': u'\U0001F915',
    u':face_with_medical_mask: ': u'\U0001F637',
    u':face_with_open_mouth: ': u'\U0001F62E',
    u':face_with_open_mouth_&_cold_sweat: ': u'\U0001F630',
    u':face_with_rolling_eyes: ': u'\U0001F644',
    u':face_with_steam_from_nose: ': u'\U0001F624',
    u':face_with_stuck-out_tongue: ': u'\U0001F61B',
    u':face_with_stuck-out_tongue_&_closed_eyes: ': u'\U0001F61D',
    u':face_with_stuck-out_tongue_&_winking_eye: ': u'\U0001F61C',
    u':face_with_tears_of_joy: ': u'\U0001F602',
    u':face_with_thermometer: ': u'\U0001F912',
    u':face_without_mouth: ': u'\U0001F636',
    u':fearful_face: ': u'\U0001F628',
    u':flushed_face: ': u'\U0001F633',
    u':frowning_face: ': u'\U00002639',
    u':frowning_face_with_open_mouth: ': u'\U0001F626',
    u':grimacing_face: ': u'\U0001F62C',
    u':grinning_cat_face_with_smiling_eyes: ': u'\U0001F638',
    u':grinning_face: ': u'\U0001F600',
    u':grinning_face_with_smiling_eyes: ': u'\U0001F601',
    u':growing_heart: ': u'\U0001F497',
    u':kissing_face: ': u'\U0001F617',
    u':kissing_face_with_closed_eyes: ': u'\U0001F61A',
    u':kissing_face_with_smiling_eyes: ': u'\U0001F619',
    u':man_frowning: ': u'\U0001F64D \U0000200D \U00002642 \U0000FE0F',
    u':man_frowning_dark_skin_tone: ': u'\U0001F64D \U0001F3FF \U0000200D \U00002642 \U0000FE0F',
    u':man_frowning_light_skin_tone: ': u'\U0001F64D \U0001F3FB \U0000200D \U00002642 \U0000FE0F',
    u':man_frowning_medium-dark_skin_tone: ': u'\U0001F64D \U0001F3FE \U0000200D \U00002642 \U0000FE0F',
    u':man_frowning_medium-light_skin_tone: ': u'\U0001F64D \U0001F3FC \U0000200D \U00002642 \U0000FE0F',
    u':man_frowning_medium_skin_tone: ': u'\U0001F64D \U0001F3FD \U0000200D \U00002642 \U0000FE0F',
    u':nerd_face: ': u'\U0001F913',
    u':neutral_face: ': u'\U0001F610',
    u':person_frowning: ': u'\U0001F64D',
    u':person_frowning_dark_skin_tone: ': u'\U0001F64D \U0001F3FF',
    u':person_frowning_light_skin_tone: ': u'\U0001F64D \U0001F3FB',
    u':person_frowning_medium-dark_skin_tone: ': u'\U0001F64D \U0001F3FE',
    u':person_frowning_medium-light_skin_tone: ': u'\U0001F64D \U0001F3FC',
    u':person_frowning_medium_skin_tone: ': u'\U0001F64D \U0001F3FD',
    u':pouting_face: ': u'\U0001F621',
    u':sleeping_face: ': u'\U0001F634',
    u':sleepy_face: ': u'\U0001F62A',
    u':slightly_frowning_face: ': u'\U0001F641',
    u':slightly_smiling_face: ': u'\U0001F642',
    u':unamused_face: ': u'\U0001F612',
    u':unicorn_face: ': u'\U0001F984',
    u':winking_face: ': u'\U0001F609',
    u':woman_dancing: ': u'\U0001F483',
    u':woman_dancing_dark_skin_tone: ': u'\U0001F483 \U0001F3FF',
    u':woman_dancing_light_skin_tone: ': u'\U0001F483 \U0001F3FB',
    u':woman_dancing_medium-dark_skin_tone: ': u'\U0001F483 \U0001F3FE',
    u':woman_dancing_medium-light_skin_tone: ': u'\U0001F483 \U0001F3FC',
    u':woman_dancing_medium_skin_tone: ': u'\U0001F483 \U0001F3FD',
    u':woman_facepalming: ': u'\U0001F926 \U0000200D \U00002640 \U0000FE0F',
    u':woman_facepalming_dark_skin_tone: ': u'\U0001F926 \U0001F3FF \U0000200D \U00002640 \U0000FE0F',
    u':woman_facepalming_light_skin_tone: ': u'\U0001F926 \U0001F3FB \U0000200D \U00002640 \U0000FE0F',
    u':woman_facepalming_medium-dark_skin_tone: ': u'\U0001F926 \U0001F3FE \U0000200D \U00002640 \U0000FE0F',
    u':woman_facepalming_medium-light_skin_tone: ': u'\U0001F926 \U0001F3FC \U0000200D \U00002640 \U0000FE0F',
    u':woman_facepalming_medium_skin_tone: ': u'\U0001F926 \U0001F3FD \U0000200D \U00002640 \U0000FE0F',
    u':woman_frowning: ': u'\U0001F64D \U0000200D \U00002640 \U0000FE0F',
    u':woman_frowning_dark_skin_tone: ': u'\U0001F64D \U0001F3FF \U0000200D \U00002640 \U0000FE0F',
    u':woman_frowning_light_skin_tone: ': u'\U0001F64D \U0001F3FB \U0000200D \U00002640 \U0000FE0F',
    u':woman_frowning_medium-dark_skin_tone: ': u'\U0001F64D \U0001F3FE \U0000200D \U00002640 \U0000FE0F',
    u':woman_frowning_medium-light_skin_tone: ': u'\U0001F64D \U0001F3FC \U0000200D \U00002640 \U0000FE0F',
    u':woman_frowning_medium_skin_tone: ': u'\U0001F64D \U0001F3FD \U0000200D \U00002640 \U0000FE0F',
    u':worried_face: ': u'\U0001F61F',
    u':rolling_on_the_floor_laughing: ': u'\U0001F923',
    u':grinning_cat_face_with_smiling_eyes: ': u'\U0001F638',
    u':smiling_cat_face_with_heart-eyes: ': u'\U0001F63B',
    u':smiling_cat_face_with_open_mouth: ': u'\U0001F63A',
    u':smiling_face: ': u'\U0000263A',
    u':smiling_face_with_halo: ': u'\U0001F607',
    u':smiling_face_with_heart-eyes: ': u'\U0001F60D',
    u':smiling_face_with_horns: ': u'\U0001F608',
    u':smiling_face_with_open_mouth: ': u'\U0001F603',
    u':smiling_face_with_open_mouth_&_closed_eyes: ': u'\U0001F606',
    u':smiling_face_with_open_mouth_&_cold_sweat: ': u'\U0001F605',
    u':smiling_face_with_open_mouth_&_smiling_eyes: ': u'\U0001F604',
    u':smiling_face_with_smiling_eyes: ': u'\U0001F60A',
    u':smiling_face_with_sunglasses: ': u'\U0001F60E',
}

emo_unicodes = {v: k for k, v in emojies_unicodes.items()}

for emoji in emo_unicodes:
    df["Sentence"] = df["Sentence"].apply(lambda x: re.sub(r'('+emoji+')', " ".join(emo_unicodes[emoji].replace(","," ").replace("_", " ").replace(":"," ").replace(": "," ").split()), x))
    
df.tail(50)

Unnamed: 0,Sentence,label
30109,ask question go itwould fuck chat box,angry
30110,hey today results declaredwaiting results cont...,sad
30111,ok ill talk leavehow younot ok,sad
30112,like guidei needed people could reply language...,angry
30113,like page also follow pagethank winking face,others
30114,saved 🌍do shipno fly drone,others
30115,whomthat answer question bi didnt get,others
30116,losingabout joining themcool,others
30117,intentimes hehehethis dsmiling face with sungl...,others
30118,going meet frndssame youface with tears of joy...,happy


  Removing remaining emojies from text

In [11]:
emojies_code = re.compile("["
                           u"\U0001F600-\U0001F64F"  # emoticons
                           u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                           u"\U0001F680-\U0001F6FF"  # transport & map symbols
                           u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                           u"\U00002702-\U000027B0"
                           u"\U000024C2-\U0001F251"
                           "]+", flags=re.UNICODE)

df["Sentence"] = df["Sentence"].apply(lambda x: emojies_code.sub(r'', x))
df.tail(50)

Unnamed: 0,Sentence,label
30109,ask question go itwould fuck chat box,angry
30110,hey today results declaredwaiting results cont...,sad
30111,ok ill talk leavehow younot ok,sad
30112,like guidei needed people could reply language...,angry
30113,like page also follow pagethank winking face,others
30114,saved do shipno fly drone,others
30115,whomthat answer question bi didnt get,others
30116,losingabout joining themcool,others
30117,intentimes hehehethis dsmiling face with sungl...,others
30118,going meet frndssame youface with tears of joy...,happy


  Stemming

In [12]:
from nltk.stem.porter import PorterStemmer

stemmer = PorterStemmer()

df["Sentence"] = df["Sentence"].apply(lambda x: " ".join([stemmer.stem(word) for word in x.split()]))
df.tail(5)

Unnamed: 0,Sentence,label
30154,dilutedilut weakwhat mean weak,others
30155,worki could take shifti student,others
30156,get loudli cri faceloudli cri faceloudli cri f...,sad
30157,hahano serious oohad breakfast,others
30158,singyea lilnic,others


  Lemmatizing

In [13]:
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')

lemmatizer = WordNetLemmatizer()
POS = {"N":wordnet.NOUN, "V":wordnet.VERB, "J":wordnet.ADJ, "R":wordnet.ADV}

def lemmatize(sentence):
    pos_tag = nltk.pos_tag(sentence.split())
    return " ".join([lemmatizer.lemmatize(word, POS.get(pos[0], wordnet.NOUN)) for word, pos in pos_tag])


df["Sentence"] = df["Sentence"].apply(lambda x: lemmatize(x))
df.head(10)

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


Unnamed: 0,Sentence,label
0,worri girlhmm know arewhat name,others
1,isaw mani time think never saw,angry
2,bybi googl chromewher live,others
3,ridiculousi might ridicul tell truthu littl di...,angry
4,time passwt live thenmayb,others
5,dog personyour rudewhaaaat,others
6,whatsupnoth much sit sip watch tv uwhat watch tv,others
7,okok backso,others
8,reallyr realli realli realli reallyy say mani ...,others
9,bayin bayfac blow a kiss love,others


#### TFIDF Vectorizer

In [14]:
from sklearn.feature_extraction.text import TfidfVectorizer

phrases = df['Sentence']
tfidf = TfidfVectorizer(max_features=2000, min_df=5, max_df=0.7)

X = tfidf.fit_transform(phrases).toarray()

In [15]:
sum(X)

array([19.98037737, 13.90113184,  5.66418984, ..., 12.44604452,
        5.72122517, 22.21667   ])

#### Label Encoder

In [16]:
from sklearn.preprocessing import LabelEncoder
import keras
import tensorflow 
from tensorflow.keras.utils import to_categorical
from keras import utils

labelencoder = LabelEncoder()
Y = labelencoder.fit_transform(df['label'])
Y

array([2, 0, 2, ..., 3, 2, 2])

In [17]:
y_labels = dict(zip(labelencoder.classes_, labelencoder.transform(labelencoder.classes_)))
y_labels

{'angry': 0, 'happy': 1, 'others': 2, 'sad': 3}

#### Splitting the dataset

In [18]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=42)

In [19]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

In [20]:
X_train

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [21]:
y_train

array([2, 2, 0, ..., 1, 2, 2])

In [24]:
# sc = StandardScaler(with_mean = False)

# X_train = sc.fit_transform(X_train_sparse)
# X_test = sc.fit_transform(X_test_sparse)

#### Training the models: ANN, Logistic Regression, Support Vector Machine, Naive Bayes

#### ANN - MultiLinear Perceptron (MLP)

In [114]:
from sklearn.neural_network import MLPClassifier
ann = MLPClassifier(random_state=1, max_iter=300).fit(X_train, y_train)

  ANN - Evaluation Metrics

In [115]:
ann_pred = ann.predict(X_test)
print(classification_report(y_test, ann_pred))

              precision    recall  f1-score   support

           0       0.73      0.66      0.69      1380
           1       0.67      0.58      0.62      1000
           2       0.76      0.83      0.79      3797
           3       0.73      0.69      0.71      1363

    accuracy                           0.74      7540
   macro avg       0.72      0.69      0.70      7540
weighted avg       0.74      0.74      0.74      7540



#### Logistic Regression

In [108]:
from sklearn.linear_model import LogisticRegression

logisticRegr = LogisticRegression(solver = 'saga')
logisticRegr.fit(X_train, y_train)

LogisticRegression(solver='saga')

  Logistic Regression - Evaluation Metrics

In [109]:
y_pred=logisticRegr.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.81      0.64      0.72      1380
           1       0.76      0.59      0.66      1000
           2       0.75      0.90      0.82      3797
           3       0.81      0.67      0.73      1363

    accuracy                           0.77      7540
   macro avg       0.78      0.70      0.73      7540
weighted avg       0.77      0.77      0.76      7540



#### SVM 

In [110]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

svm = make_pipeline(StandardScaler(with_mean=False), SVC(C=1.0, random_state=1, kernel='linear'))
svm.fit(X_train, y_train)

Pipeline(steps=[('standardscaler', StandardScaler(with_mean=False)),
                ('svc', SVC(kernel='linear', random_state=1))])

  SVM - Evaluation Metrics

In [111]:
svm_predict = svm.predict(X_test)
print(classification_report(y_test, svm_predict))

              precision    recall  f1-score   support

           0       0.69      0.67      0.68      1380
           1       0.66      0.63      0.64      1000
           2       0.78      0.83      0.80      3797
           3       0.75      0.65      0.70      1363

    accuracy                           0.74      7540
   macro avg       0.72      0.70      0.71      7540
weighted avg       0.74      0.74      0.74      7540



#### Multinomial Naive Bayes

In [112]:
from sklearn.naive_bayes import MultinomialNB
NB = MultinomialNB()
NB.fit(X_train, y_train)

MultinomialNB()

  Multinomial Naive Bayes - Evaluation Metrics

In [113]:
nb_pred = NB.predict(X_test)
print(classification_report(y_test, nb_pred))

              precision    recall  f1-score   support

           0       0.81      0.53      0.64      1380
           1       0.70      0.50      0.59      1000
           2       0.69      0.91      0.79      3797
           3       0.82      0.56      0.66      1363

    accuracy                           0.72      7540
   macro avg       0.75      0.63      0.67      7540
weighted avg       0.74      0.72      0.71      7540

