In [56]:
# !pip install spacy
# !pip install vaderSentiment

In [57]:
import pandas as pd
import re
import spacy
import nltk
from nltk.corpus import stopwords
from nltk.stem import SnowballStemmer
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.svm import LinearSVC

In [58]:
dff=pd.read_csv("emotions/train.txt",sep=";",names=['text','emotions'])
dff.head()

Unnamed: 0,text,emotions
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger


# EDA Starts


Question1

In [59]:
df=dff.copy()

In [60]:
df.isnull().any() #no missing value

text        False
emotions    False
dtype: bool

In [61]:
df.iloc[0,0]

'i didnt feel humiliated'

In [62]:
coverage=round(len(df.iloc[0,0])/len(set(df.iloc[0,0])),2)
coverage

1.92

In [63]:
df_q1=df.copy()

In [64]:
df.head()

Unnamed: 0,text,emotions
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger


Question 2

In [65]:
df['no_punct']=df['text'].str.lower().apply(lambda x:re.sub(r'[^a-z\s]','',x))

In [66]:
#loading spacy
nlp=spacy.load(name="en_core_web_sm")
from spacy.tokenizer import Tokenizer
from nltk.corpus import stopwords
stop_words=set(stopwords.words('english'))
from nltk.stem import WordNetLemmatizer


In [77]:
#Spacy tokenizer
def tokens(text):
    doc=nlp(text)
    token=[i.text for i in doc]
    return token
df['tokenized']=df['no_punct'].apply(lambda x:tokens(x))

#stopwords removal

def remove_stopwords(text,stop_words):
    withoutstopwords=[i for i in text if i not in stop_words]
    return ', '.join(withoutstopwords)
    
df['rem_stop']=df['tokenized'].apply(lambda x:remove_stopwords(x,stop_words))

#lemmatization
lemmatizer=WordNetLemmatizer()
def lemma(text):
    text = text.split(", ") # as rem_stop is having , seperator
    lemma_Word=[lemmatizer.lemmatize(w) for w in text]
    return lemma_Word
df['lemmatized']=df['rem_stop'].apply(lambda x:lemma(x))
    


In [78]:
df.head()

Unnamed: 0,text,emotions,no_punct,tokenized,rem_stop,lemmatized
0,i didnt feel humiliated,sadness,i didnt feel humiliated,"[i, did, nt, feel, humiliated]","nt, feel, humiliated","[nt, feel, humiliated]"
1,i can go from feeling so hopeless to so damned...,sadness,i can go from feeling so hopeless to so damned...,"[i, can, go, from, feeling, so, hopeless, to, ...","go, feeling, hopeless, damned, hopeful, around...","[go, feeling, hopeless, damned, hopeful, aroun..."
2,im grabbing a minute to post i feel greedy wrong,anger,im grabbing a minute to post i feel greedy wrong,"[i, m, grabbing, a, minute, to, post, i, feel,...","grabbing, minute, post, feel, greedy, wrong","[grabbing, minute, post, feel, greedy, wrong]"
3,i am ever feeling nostalgic about the fireplac...,love,i am ever feeling nostalgic about the fireplac...,"[i, am, ever, feeling, nostalgic, about, the, ...","ever, feeling, nostalgic, fireplace, know, sti...","[ever, feeling, nostalgic, fireplace, know, st..."
4,i am feeling grouchy,anger,i am feeling grouchy,"[i, am, feeling, grouchy]","feeling, grouchy","[feeling, grouchy]"


In [79]:
df_q3=df.copy()

In [80]:
df.head()

Unnamed: 0,text,emotions,no_punct,tokenized,rem_stop,lemmatized
0,i didnt feel humiliated,sadness,i didnt feel humiliated,"[i, did, nt, feel, humiliated]","nt, feel, humiliated","[nt, feel, humiliated]"
1,i can go from feeling so hopeless to so damned...,sadness,i can go from feeling so hopeless to so damned...,"[i, can, go, from, feeling, so, hopeless, to, ...","go, feeling, hopeless, damned, hopeful, around...","[go, feeling, hopeless, damned, hopeful, aroun..."
2,im grabbing a minute to post i feel greedy wrong,anger,im grabbing a minute to post i feel greedy wrong,"[i, m, grabbing, a, minute, to, post, i, feel,...","grabbing, minute, post, feel, greedy, wrong","[grabbing, minute, post, feel, greedy, wrong]"
3,i am ever feeling nostalgic about the fireplac...,love,i am ever feeling nostalgic about the fireplac...,"[i, am, ever, feeling, nostalgic, about, the, ...","ever, feeling, nostalgic, fireplace, know, sti...","[ever, feeling, nostalgic, fireplace, know, st..."
4,i am feeling grouchy,anger,i am feeling grouchy,"[i, am, feeling, grouchy]","feeling, grouchy","[feeling, grouchy]"


In [81]:
#pos tagging
new_df=df.head(10) 
def pos_tag(text):
    doc=nlp(text)#just like before in tokenize()
    post_term=[i.pos_ for i in doc]
    return post_term
new_df['pos_tag']=new_df['text'].apply(lambda x:pos_tag(x))

df_q4=new_df.copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['pos_tag']=new_df['text'].apply(lambda x:pos_tag(x))


In [82]:
#name entity recognition(NER)
def ents(text):
    doc=nlp(text)
    entss=[(ent.text,ent.label_) for ent in doc.ents]
    return entss
new_df['named_entity']=new_df['text'].apply(lambda x:ents(x))
df_q5=new_df.copy()
    

    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['named_entity']=new_df['text'].apply(lambda x:ents(x))


In [83]:
#Polarity score
obj=SentimentIntensityAnalyzer()
def calculate_polarity(text):
    pol=obj.polarity_scores(text)
    return pol
new_df['polarity_score']=new_df['text'].apply(lambda x:calculate_polarity(x))
df_q6=new_df.copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['polarity_score']=new_df['text'].apply(lambda x:calculate_polarity(x))


Feature Extraction and model Selection

In [84]:
df.head()

Unnamed: 0,text,emotions,no_punct,tokenized,rem_stop,lemmatized
0,i didnt feel humiliated,sadness,i didnt feel humiliated,"[i, did, nt, feel, humiliated]","nt, feel, humiliated","[nt, feel, humiliated]"
1,i can go from feeling so hopeless to so damned...,sadness,i can go from feeling so hopeless to so damned...,"[i, can, go, from, feeling, so, hopeless, to, ...","go, feeling, hopeless, damned, hopeful, around...","[go, feeling, hopeless, damned, hopeful, aroun..."
2,im grabbing a minute to post i feel greedy wrong,anger,im grabbing a minute to post i feel greedy wrong,"[i, m, grabbing, a, minute, to, post, i, feel,...","grabbing, minute, post, feel, greedy, wrong","[grabbing, minute, post, feel, greedy, wrong]"
3,i am ever feeling nostalgic about the fireplac...,love,i am ever feeling nostalgic about the fireplac...,"[i, am, ever, feeling, nostalgic, about, the, ...","ever, feeling, nostalgic, fireplace, know, sti...","[ever, feeling, nostalgic, fireplace, know, st..."
4,i am feeling grouchy,anger,i am feeling grouchy,"[i, am, feeling, grouchy]","feeling, grouchy","[feeling, grouchy]"


In [122]:
x=df['lemmatized'].astype(str)
y=df['emotions']

count_vect = CountVectorizer()
count_fit = count_vect.fit(x)  # Fit CountVectorizer on X and transform

# Transform X and convert to DataFrame with feature names
X_transformed = count_fit.transform(x)
words = pd.DataFrame(X_transformed.toarray(), columns=count_fit.get_feature_names_out())

In [124]:
words=pd.DataFrame(words)
words.head()

Unnamed: 0,aa,aaaaaaand,aaaaand,aaaand,aac,aahhh,aaron,ab,abandon,abandoned,...,zoned,zonisamide,zoo,zoom,zooming,zq,zucchini,zum,zumba,zz
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [125]:
X_train, X_test, y_train, y_test=train_test_split(words,y,test_size=0.2,random_state=42)


In [133]:
model=LinearSVC()
model=model.fit(X_train, y_train)



In [135]:
y_pred=model.predict(X_test)


In [143]:
acc_score=round(model.score(X_test,y_test),2)
print(acc_score*100)

89.0


In [140]:
from sklearn.metrics import classification_report

In [141]:
class_report = classification_report(y_test, y_pred)

In [142]:
class_report

'              precision    recall  f1-score   support\n\n       anger       0.89      0.88      0.88       427\n        fear       0.84      0.86      0.85       397\n         joy       0.90      0.92      0.91      1021\n        love       0.82      0.78      0.80       296\n     sadness       0.92      0.92      0.92       946\n    surprise       0.84      0.73      0.78       113\n\n    accuracy                           0.89      3200\n   macro avg       0.87      0.85      0.86      3200\nweighted avg       0.89      0.89      0.89      3200\n'