In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from textblob import TextBlob
from wordcloud import WordCloud
import plotly.graph_objects as go
import plotly.express as px

bjp_reviews = pd.read_csv("bjp.csv")
bsp_reviews = pd.read_csv("bsp.csv", encoding= 'unicode_escape')
sp_reviews = pd.read_csv("sp.csv", encoding= 'unicode_escape')

In [None]:
print(bjp_reviews.head())
print(bsp_reviews.head())
print(sp_reviews.head())

           user                                               text
0   MarkHodder3         @bjp And we’ll find out who won in 2026...
1    K87327961G  @bjp Your Democratic   Party cannot be trusted...
2      OldlaceA                             @bjp So did Lying Barr
3    penblogger  @bjp It's clear you didnt compose this tweet. ...
4  Aquarian0264              @bjp I will vote in person thank you.
              user                                               text
0      manny_rosen   @sanofi please tell us how many shares the Cr...
1        osi_abdul   https://t.co/atM98CpqF7  Like, comment, RT #P...
2          Patsyrw   Your AG Barr is as useless &amp; corrupt as y...
3  seyedebrahimi_m    BSP! Wake Up!  Most of the comments below yo...
4    James09254677   After 4 years you think you would have figure...
              user                                               text
0          mattv68   Thank you for fighting for the American peopl...
1  TheGodd69623245   @TheRISEofROD Gre

In [None]:
textblob1 = TextBlob(bjp_reviews["text"][10])
print("BJP :",textblob1.sentiment)
textblob2 = TextBlob(bsp_reviews["text"][100])
print("BSP :",textblob2.sentiment)
textblob3 = TextBlob(sp_reviews["text"][200])
print("SP :",textblob3.sentiment)

BJP : Sentiment(polarity=0.0, subjectivity=0.0)
BSP : Sentiment(polarity=0.125, subjectivity=0.16666666666666666)
SP : Sentiment(polarity=-1.0, subjectivity=1.0)


In [None]:

def find_pol(review):
    return TextBlob(review).sentiment.polarity
bjp_reviews["Sentiment Polarity"] = bjp_reviews["text"].apply(find_pol)
print(bjp_reviews.tail())

bsp_reviews["Sentiment Polarity"] = bsp_reviews["text"].apply(find_pol)
print(bsp_reviews.tail())

sp_reviews["Sentiment Polarity"] = sp_reviews["text"].apply(find_pol)
print(sp_reviews.tail())


             user  ... Sentiment Polarity
2535    meryn1977  ...               0.15
2536  BSNelson114  ...               0.00
2537     KenCapel  ...               0.00
2538   LeslyeHale  ...               0.10
2539     rerickre  ...               0.20

[5 rows x 3 columns]
                 user  ... Sentiment Polarity
2782          4diva63  ...              0.000
2783         hidge826  ...              0.000
2784     SpencerRossy  ...              0.225
2785  ScoobyMcpherson  ...              0.000
2786          bjklinz  ...             -0.500

[5 rows x 3 columns]
                user  ... Sentiment Polarity
460     MattDillonNC  ...              -0.50
461      MMMendoza11  ...              -0.50
462      Tacos4tacos  ...               0.00
463         PistThug  ...               0.35
464  Bertina56941901  ...               0.70

[5 rows x 3 columns]


In [None]:
bjp_reviews["Expression Label"] = np.where(bjp_reviews["Sentiment Polarity"]>0, "positive", "negative")
bjp_reviews["Expression Label"][bjp_reviews["Sentiment Polarity"]==0]="Neutral"
print(bjp_reviews.tail())

bsp_reviews["Expression Label"] = np.where(bsp_reviews["Sentiment Polarity"]>0, "positive", "negative")
# bsp_reviews["Expression Label"][bjp_reviews["Sentiment Polarity"]==0]="Neutral"
print(bsp_reviews.tail())

sp_reviews["Expression Label"] = np.where(sp_reviews["Sentiment Polarity"]>0, "positive", "negative")
print(sp_reviews.tail())

             user  ... Expression Label
2535    meryn1977  ...         positive
2536  BSNelson114  ...          Neutral
2537     KenCapel  ...          Neutral
2538   LeslyeHale  ...         positive
2539     rerickre  ...         positive

[5 rows x 4 columns]
                 user  ... Expression Label
2782          4diva63  ...         negative
2783         hidge826  ...         negative
2784     SpencerRossy  ...         positive
2785  ScoobyMcpherson  ...         negative
2786          bjklinz  ...         negative

[5 rows x 4 columns]
                user  ... Expression Label
460     MattDillonNC  ...         negative
461      MMMendoza11  ...         negative
462      Tacos4tacos  ...         negative
463         PistThug  ...         positive
464  Bertina56941901  ...         positive

[5 rows x 4 columns]




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:

reviews1 = bjp_reviews[bjp_reviews['Sentiment Polarity'] == 0.0000]
print(reviews1.shape)

cond1=bjp_reviews['Sentiment Polarity'].isin(reviews1['Sentiment Polarity'])
bjp_reviews.drop(bjp_reviews[cond1].index, inplace = True)
print(bjp_reviews.shape)

reviews2 = bsp_reviews[bsp_reviews['Sentiment Polarity'] == 0.0000]
print(reviews2.shape)

cond2=bsp_reviews['Sentiment Polarity'].isin(reviews1['Sentiment Polarity'])
bsp_reviews.drop(bsp_reviews[cond2].index, inplace = True)
print(bsp_reviews.shape)

reviews2 = sp_reviews[sp_reviews['Sentiment Polarity'] == 0.0000]
print(reviews2.shape)

cond2=sp_reviews['Sentiment Polarity'].isin(reviews1['Sentiment Polarity'])
sp_reviews.drop(sp_reviews[cond2].index, inplace = True)
print(sp_reviews.shape)

(1509, 4)
(1031, 4)
(1452, 4)
(1335, 4)
(222, 4)
(243, 4)


In [None]:
# BJP
np.random.seed(10)
remove_n =324
drop_indices = np.random.choice(bjp_reviews.index, remove_n, replace=False)
df_subset_bjp = bjp_reviews.drop(drop_indices)
print(df_subset_bjp.shape)
# BSP
np.random.seed(10)
remove_n =31
drop_indices = np.random.choice(bsp_reviews.index, remove_n, replace=False)
df_subset_bsp = bsp_reviews.drop(drop_indices)
print(df_subset_bsp.shape)
# BSP
np.random.seed(10)
remove_n =31
drop_indices = np.random.choice(sp_reviews.index, remove_n, replace=False)
df_subset_sp = sp_reviews.drop(drop_indices)
print(df_subset_sp.shape)

(707, 4)
(1304, 4)
(212, 4)


In [None]:
count_1 = df_subset_bsp.groupby('Expression Label').count()
print(count_1)

negative_per1 = (count_1['Sentiment Polarity'][0]/1000)*10
positive_per1 = (count_1['Sentiment Polarity'][1]/1000)*100

count_2 = df_subset_sp.groupby('Expression Label').count()
print(count_2)

negative_per2 = (count_2['Sentiment Polarity'][0]/1000)*100
positive_per2 = (count_2['Sentiment Polarity'][1]/1000)*100

count_3 = df_subset_bjp.groupby('Expression Label').count()
print(count_3)

negative_per3 = (count_3['Sentiment Polarity'][0]/1000)*100
positive_per3 = (count_3['Sentiment Polarity'][1]/1000)*100

Politicians = ['BJP', 'BSP', 'SP']
lis_pos = [positive_per1, positive_per2, positive_per3]
lis_neg = [negative_per1, negative_per2, negative_per3]

fig = go.Figure(data=[
    go.Bar(name='Positive', x=Politicians, y=lis_pos),
    go.Bar(name='Negative', x=Politicians, y=lis_neg)
])
# Change the bar mode
fig.update_layout(barmode='group')
fig.show()

                  user  text  Sentiment Polarity
Expression Label                                
negative           585   585                 585
positive           719   719                 719
                  user  text  Sentiment Polarity
Expression Label                                
negative            89    89                  89
positive           123   123                 123
                  user  text  Sentiment Polarity
Expression Label                                
negative           273   273                 273
positive           434   434                 434
