# VADER

In [132]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
import nltk
nltk.download('vader_lexicon')
%matplotlib inline
import warnings
from nltk.sentiment.vader import SentimentIntensityAnalyzer

warnings.filterwarnings("ignore")

import os

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/satyasasivatsal/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [133]:
vader_df = pd.read_csv('3MZ6aTpFrE0_Afiltered.csv', error_bad_lines=False)

In [134]:
vader_df

Unnamed: 0.1,Unnamed: 0,Comments,Comment ID
0,0,hey guys any other awesome deal that we missed...,298107_1
1,1,what s best smartphone under k inr for best ca...,774454_1
2,2,whats best smartphone for best camera under k ...,378283_1
3,3,please make best smartphone under k no one tou...,674053_1
4,4,there is no discount on asus vivobook pro,390621_1
...,...,...,...
941,949,to al those who are saying dont buy s fe cuz i...,680052_1
942,950,i see someone commenting exynos br br there is...,969564_1
943,951,creaky sound make this video annoying,194318_1
944,952,amazon is overrated,515685_1


In [135]:
vader_df['Comments'] = vader_df['Comments'].apply(lambda x: ' '.join([w for w in x.split() if len(w)>3]))
vader_df['Comments'] = vader_df['Comments'].apply(lambda x:x.lower())

tokenized_tweet = vader_df['Comments'].apply(lambda x: x.split())

wnl = WordNetLemmatizer()

tokenized_tweet.apply(lambda x: [wnl.lemmatize(i) for i in x if i not in set(stopwords.words('english'))]) 
tokenized_tweet.head()

for i in range(len(tokenized_tweet)):
    tokenized_tweet[i] = ' '.join(tokenized_tweet[i])
    
    
vader_df['Comments'] = tokenized_tweet

sia = SentimentIntensityAnalyzer()
vader_df['Sentiment Scores'] = vader_df['Comments'].apply(lambda x:sia.polarity_scores(x)['compound'])
vader_df['Sentiment'] = vader_df['Sentiment Scores'].apply(lambda s : 'Positive' if s > 0 else ('Neutral' if s == 0 else 'Negative'))

In [136]:
vader_df.Sentiment.value_counts()

Neutral     441
Positive    418
Negative     87
Name: Sentiment, dtype: int64

In [137]:
vader_percentages = vader_df['Sentiment'].value_counts(normalize=True) * 100
vader_percentages

Neutral     46.617336
Positive    44.186047
Negative     9.196617
Name: Sentiment, dtype: float64

In [138]:
vader_df

Unnamed: 0.1,Unnamed: 0,Comments,Comment ID,Sentiment Scores,Sentiment
0,0,guys other awesome deal that missed,298107_1,0.4404,Positive
1,1,what best smartphone under best camera vivo pi...,774454_1,0.9274,Positive
2,2,whats best smartphone best camera under need o...,378283_1,0.8555,Positive
3,3,please make best smartphone under touches segm...,674053_1,0.9100,Positive
4,4,there discount asus vivobook,390621_1,0.0000,Neutral
...,...,...,...,...,...
941,949,those saying dont updates anymore guys phone s...,680052_1,-0.5012,Negative
942,950,someone commenting exynos there snapdragon,969564_1,0.0000,Neutral
943,951,creaky sound make this video annoying,194318_1,-0.4019,Negative
944,952,amazon overrated,515685_1,0.1779,Positive


# Text Blob

In [139]:
from textblob import TextBlob
import pandas as pd

textBlob_df = pd.read_csv('3MZ6aTpFrE0_Afiltered.csv')
textBlob_df['Sentiment Scores'] = ''
textBlob_df['Sentiment'] = ''

for index, row in textBlob_df.iterrows():
    comment = row['Comments']
    
    blob = TextBlob(comment)
    polarity = blob.sentiment.polarity
    
    textBlob_df.at[index, 'Sentiment Scores'] = polarity
    
    if polarity > 0:
        textBlob_df.at[index, 'Sentiment'] = 'Positive'
    elif polarity < 0:
        textBlob_df.at[index, 'Sentiment'] = 'Negative'
    else:
        textBlob_df.at[index, 'Sentiment'] = 'Neutral'


In [140]:
textBlob_df

Unnamed: 0.1,Unnamed: 0,Comments,Comment ID,Sentiment Scores,Sentiment
0,0,hey guys any other awesome deal that we missed...,298107_1,0.4375,Positive
1,1,what s best smartphone under k inr for best ca...,774454_1,1.0,Positive
2,2,whats best smartphone for best camera under k ...,378283_1,0.525,Positive
3,3,please make best smartphone under k no one tou...,674053_1,0.4,Positive
4,4,there is no discount on asus vivobook pro,390621_1,0.0,Neutral
...,...,...,...,...,...
941,949,to al those who are saying dont buy s fe cuz i...,680052_1,0.221,Positive
942,950,i see someone commenting exynos br br there is...,969564_1,0.0,Neutral
943,951,creaky sound make this video annoying,194318_1,-0.2,Negative
944,952,amazon is overrated,515685_1,0.0,Neutral


In [141]:
textBlob_df.Sentiment.value_counts()

Neutral     433
Positive    430
Negative     83
Name: Sentiment, dtype: int64

In [142]:
textBlob_percentages = textBlob_df['Sentiment'].value_counts(normalize=True) * 100
textBlob_percentages

Neutral     45.771670
Positive    45.454545
Negative     8.773784
Name: Sentiment, dtype: float64

# AFINN Model

In [143]:
import pandas as pd
from afinn import Afinn

afinn_df = pd.read_csv("3MZ6aTpFrE0_Afiltered.csv")

afinn = Afinn()

def get_sentiment(text):
    sentiment_score = afinn.score(text)
    if sentiment_score > 0:
        return "Positive"
    elif sentiment_score < 0:
        return "Negative"
    else:
        return "Neutral"

afinn_df["Sentiment Scores"] = ""
afinn_df["Sentiment"] = afinn_df["Comments"].apply(get_sentiment)

for index, row in afinn_df.iterrows():
    comment = row["Comments"]
    sentiment_score = afinn.score(comment)
    afinn_df.at[index, "Sentiment Scores"] = sentiment_score



In [144]:
afinn_df

Unnamed: 0.1,Unnamed: 0,Comments,Comment ID,Sentiment Scores,Sentiment
0,0,hey guys any other awesome deal that we missed...,298107_1,2.0,Positive
1,1,what s best smartphone under k inr for best ca...,774454_1,9.0,Positive
2,2,whats best smartphone for best camera under k ...,378283_1,6.0,Positive
3,3,please make best smartphone under k no one tou...,674053_1,8.0,Positive
4,4,there is no discount on asus vivobook pro,390621_1,-1.0,Negative
...,...,...,...,...,...
941,949,to al those who are saying dont buy s fe cuz i...,680052_1,2.0,Positive
942,950,i see someone commenting exynos br br there is...,969564_1,0.0,Neutral
943,951,creaky sound make this video annoying,194318_1,-2.0,Negative
944,952,amazon is overrated,515685_1,0.0,Neutral


In [145]:
afinn_df.Sentiment.value_counts()

Neutral     458
Positive    369
Negative    119
Name: Sentiment, dtype: int64

In [146]:
afinn_percentages = afinn_df['Sentiment'].value_counts(normalize=True) * 100
afinn_percentages

Neutral     48.414376
Positive    39.006342
Negative    12.579281
Name: Sentiment, dtype: float64

In [147]:
afinn_df

Unnamed: 0.1,Unnamed: 0,Comments,Comment ID,Sentiment Scores,Sentiment
0,0,hey guys any other awesome deal that we missed...,298107_1,2.0,Positive
1,1,what s best smartphone under k inr for best ca...,774454_1,9.0,Positive
2,2,whats best smartphone for best camera under k ...,378283_1,6.0,Positive
3,3,please make best smartphone under k no one tou...,674053_1,8.0,Positive
4,4,there is no discount on asus vivobook pro,390621_1,-1.0,Negative
...,...,...,...,...,...
941,949,to al those who are saying dont buy s fe cuz i...,680052_1,2.0,Positive
942,950,i see someone commenting exynos br br there is...,969564_1,0.0,Neutral
943,951,creaky sound make this video annoying,194318_1,-2.0,Negative
944,952,amazon is overrated,515685_1,0.0,Neutral


# Voting and Finalising +ve,-ve and neutral comments

In [148]:
comments = pd.read_csv('3MZ6aTpFrE0_Afiltered.csv', error_bad_lines=False)
comment_ids = list(comments['Comment ID'].to_list())

In [149]:
vote_df = pd.DataFrame()
for comment_id in comment_ids:
    
    pos_count = 0
    neg_count = 0
    neu_count = 0
    
    vader_sentimet_pred = vader_df.loc[vader_df['Comment ID'] == comment_id, 'Sentiment'].values 
    pos_count += sum(vader_sentimet_pred == 'Positive')
    neg_count += sum(vader_sentimet_pred == 'Negative')
    neu_count += sum(vader_sentimet_pred == 'Neutral')
    
    textblob_sentimet_pred = textBlob_df.loc[textBlob_df['Comment ID'] == comment_id, 'Sentiment'].values
    pos_count += sum(textblob_sentimet_pred == 'Positive')
    neg_count += sum(textblob_sentimet_pred == 'Negative')
    neu_count += sum(textblob_sentimet_pred == 'Neutral')
    
    afinn_sentimet_pred = afinn_df.loc[afinn_df['Comment ID'] == comment_id, 'Sentiment'].values
    pos_count += sum(afinn_sentimet_pred == 'Positive')
    neg_count += sum(afinn_sentimet_pred == 'Negative')
    neu_count += sum(afinn_sentimet_pred == 'Neutral')
    
    vote_df = vote_df.append({'Comment ID': comment_id, 'Positive': pos_count, 'Negative': neg_count, 'Neutral': neu_count}, ignore_index=True)

vote_df = vote_df.fillna(0)

vote_df['Positive'] = vote_df['Positive'].astype(int)
vote_df['Negative'] = vote_df['Negative'].astype(int)
vote_df['Neutral'] = vote_df['Neutral'].astype(int)


In [150]:
vote_df

Unnamed: 0,Comment ID,Positive,Negative,Neutral
0,298107_1,3,0,0
1,774454_1,3,0,0
2,378283_1,3,0,0
3,674053_1,3,0,0
4,390621_1,0,1,2
...,...,...,...,...
941,680052_1,2,1,0
942,969564_1,0,0,3
943,194318_1,0,3,0
944,515685_1,1,0,2


In [151]:
max_sentiment = vote_df[['Positive', 'Negative', 'Neutral']].idxmax(axis=1)

# Split the dataframe into separate dataframes based on the maximum sentiment
positive_df = vote_df[max_sentiment == 'Positive'][['Comment ID']]
negative_df = vote_df[max_sentiment == 'Negative'][['Comment ID']]
neutral_df = vote_df[max_sentiment == 'Neutral'][['Comment ID']]

In [152]:
positive_df

Unnamed: 0,Comment ID
0,298107_1
1,774454_1
2,378283_1
3,674053_1
11,408045_1
...,...
928,83589_1
930,237492_1
933,863068_1
936,902645_1


In [153]:
neutral_df

Unnamed: 0,Comment ID
4,390621_1
6,121174_1
7,322520_1
8,947935_1
9,167775_1
...,...
934,524892_1
940,811116_1
942,969564_1
944,515685_1


In [154]:
negative_df

Unnamed: 0,Comment ID
5,745618_1
104,750309_1
112,741370_1
124,871876_1
134,73553_1
...,...
935,694506_1
937,403870_1
938,67872_1
939,341850_1


# Mapping 

In [155]:
master = pd.read_csv("All_3MZ6aTpFrE0_master.csv", encoding='utf-8')

In [156]:
master

Unnamed: 0.1,Unnamed: 0,Comments,Comment ID
0,0,Hey Guys! Any other awesome deal that we misse...,298107_1
1,1,What&#39;s best smartphone under 55000-62k INR...,774454_1
2,2,Whats best smartphone for best camera under 50...,378283_1
3,3,Please make best smartphone under 55k... no on...,674053_1
4,4,There is no discount on Asus vivobook pro,390621_1
...,...,...,...
949,949,To al those who are saying dont buy s20 fe cuz...,680052_1
950,950,+ i see someone commenting Exynos 990. <br><br...,969564_1
951,951,creaky Sound. make this video Annoying,194318_1
952,952,Amazon is overrated,515685_1


In [157]:
postive_comments = pd.merge(master, positive_df, on='Comment ID', how='inner')
postive_comments

Unnamed: 0.1,Unnamed: 0,Comments,Comment ID
0,0,Hey Guys! Any other awesome deal that we misse...,298107_1
1,1,What&#39;s best smartphone under 55000-62k INR...,774454_1
2,2,Whats best smartphone for best camera under 50...,378283_1
3,3,Please make best smartphone under 55k... no on...,674053_1
4,11,What&#39;s best smartphones under 55k for offi...,408045_1
...,...,...,...
404,936,i think before buying online one should always...,83589_1
405,938,galaxy watch 5 pro. available for 31k.. 9k str...,237492_1
406,941,Stop recommending S20 FE in 2023. It is a 2 ye...,863068_1
407,944,someone stop this guy..omg this guy is a joke ...,902645_1


In [158]:
negative_comments = pd.merge(master, negative_df, on='Comment ID', how='inner')
negative_comments

Unnamed: 0.1,Unnamed: 0,Comments,Comment ID
0,5,@Beebom you missed ipad deal. :),745618_1
1,104,Thanks for letting us know about the bad deals...,750309_1
2,112,Don&#39;t buy its a chineese rebranded shit,741370_1
3,124,It&#39;s Rs 1860 for me rn. I need a new trimm...,871876_1
4,134,"Sorry guys its not worth it, bad quality.Nothi...",73553_1
...,...,...,...
80,943,Guys don&#39;t buy realme gt neo 3t........ It...,694506_1
81,945,Is it just me or did anyone else also get anno...,403870_1
82,946,The tirrr tirrr sound is really irritating,67872_1
83,947,Dude what&#39;s that kkrrrrrrr sound.. that&#3...,341850_1


In [159]:
neutral_comments = pd.merge(master, neutral_df, on='Comment ID', how='inner')
neutral_comments

Unnamed: 0.1,Unnamed: 0,Comments,Comment ID
0,4,There is no discount on Asus vivobook pro,390621_1
1,6,Realme GT 2 was at 24k.,121174_1
2,7,Gt Neo 3 t is out of stock 😅😢,322520_1
3,8,Sony srs xe 200,947935_1
4,9,Motorola edge 30 ultra,167775_1
...,...,...,...
447,942,Nothing for a common man. 🙄,524892_1
448,948,All this are overhyped product,811116_1
449,950,+ i see someone commenting Exynos 990. <br><br...,969564_1
450,952,Amazon is overrated,515685_1
