In [2]:
!pip install textblob

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from textblob import TextBlob

import plotly.graph_objects as go
import plotly.express as px





# Loading the datasets 

In [3]:
biden_review = pd.read_csv('Bidenall2.csv')
trump_review = pd.read_csv('Trumpall2.csv')
print(biden_review.head())
print(trump_review.head())

           user                                               text
0   MarkHodder3    @JoeBiden And we’ll find out who won in 2026...
1    K87327961G  @JoeBiden Your Democratic Nazi Party cannot be...
2      OldlaceA                        @JoeBiden So did Lying Barr
3    penblogger  @JoeBiden It's clear you didnt compose this tw...
4  Aquarian0264         @JoeBiden I will vote in person thank you.
              user                                               text
0      manny_rosen   @sanofi please tell us how many shares the Cr...
1        osi_abdul   https://t.co/atM98CpqF7  Like, comment, RT #P...
2          Patsyrw   Your AG Barr is as useless &amp; corrupt as y...
3  seyedebrahimi_m   Mr. Trump! Wake Up!  Most of the comments bel...
4    James09254677   After 4 years you think you would have figure...


# Sentiment Analysis using Textblob package

In [4]:
txt_blb_biden = TextBlob(biden_review["text"][500])
txt_blb_trump = TextBlob(trump_review["text"][10])
print("Biden:",txt_blb_biden.sentiment)
print("Trump:",txt_blb_trump.sentiment)

Biden: Sentiment(polarity=0.6, subjectivity=0.9)
Trump: Sentiment(polarity=0.15, subjectivity=0.3125)


 # Adding a new column - Sentiment_Polarity 

In [5]:
def getPolarity(review):
    return TextBlob(review).sentiment.polarity

biden_review["Sentiment_Polarity"] = biden_review["text"].apply(getPolarity)
trump_review["Sentiment_Polarity"] = trump_review["text"].apply(getPolarity)
trump_review

Unnamed: 0,user,text,Sentiment_Polarity
0,manny_rosen,@sanofi please tell us how many shares the Cr...,0.050
1,osi_abdul,"https://t.co/atM98CpqF7 Like, comment, RT #P...",0.000
2,Patsyrw,Your AG Barr is as useless &amp; corrupt as y...,-0.500
3,seyedebrahimi_m,Mr. Trump! Wake Up! Most of the comments bel...,0.500
4,James09254677,After 4 years you think you would have figure...,0.000
...,...,...,...
2783,4diva63,"@realDonaldTrump For the 1/100 time, absentee ...",0.000
2784,hidge826,@realDonaldTrump If you’re so scared of losing...,0.000
2785,SpencerRossy,@realDonaldTrump I rarely get involved with fo...,0.225
2786,ScoobyMcpherson,@realDonaldTrump This is the moment when Trump...,0.000


# Adding a new column - Expression Label

In [7]:
biden_review["Expression_Label"] = np.where(biden_review["Sentiment_Polarity"] > 0 , 'Positive','Negative')
biden_review["Expression_Label"][biden_review["Sentiment_Polarity"]==0]="Neutral"

trump_review["Expression_Label"] = np.where(trump_review["Sentiment_Polarity"]>0,'Positive','Negative')
trump_review["Expression_Label"][trump_review["Sentiment_Polarity"] == 0] = "Neutral"


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  biden_review["Expression_Label"][biden_review["Sentiment_Polarity"]==0]="Neutral"
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  trump_review["Expression_Label"][trump_review["Sentiment_Polarity"] == 0] = "Neutral"


# Drop all the tweets with neutral polarity from both the datasets to balance the data equally

In [8]:
biden_review.drop(biden_review[(biden_review["Expression_Label"] == 'Neutral')].index,inplace=True)

trump_review.drop(trump_review[(trump_review["Expression_Label"] == 'Neutral')].index,inplace=True)
print(biden_review,trump_review)


                 user                                               text  \
3          penblogger  @JoeBiden It's clear you didnt compose this tw...   
5     FabrizioBenass4  @JoeBiden #ALL LIVES MATTER #HUMAN RIGHTS HELP...   
13    candyisyummy333  @JoeBiden My standard reply to propaganda: htt...   
14            5h0rtie  @JoeBiden @richardmarx Trouble is trump voted ...   
15    shut_up_stewart  @JoeBiden I honestly cannot comprehend how a c...   
...               ...                                                ...   
2532        Gypsy0112  @JoeBiden The only you can do to  calm the wat...   
2534  virgil_merchant  @JoeBiden fire is the last thing our country n...   
2535        meryn1977  @JoeBiden You'll just try to calm those waters...   
2538       LeslyeHale  @JoeBiden Trump wants our children back at sch...   
2539         rerickre  @JoeBiden ... and I know, because it’s much co...   

      Sentiment_Polarity Expession_Label Expression_Label  
3               0.050000   

# Balancing both the datasets

In [10]:
np.random.seed(10)
remove_n =324
drop_indices = np.random.choice(trump_review.index, remove_n, replace=False)
df_subset_trump = trump_review.drop(drop_indices)
print(df_subset_trump.shape)

np.random.seed(10)
remove_n =31
drop_indices = np.random.choice(biden_review.index, remove_n, replace=False)
df_subset_biden = biden_review.drop(drop_indices)
print(df_subset_biden.shape)

(1000, 4)
(1000, 5)


In [12]:
df_subset_biden['Expression_Label'].value_counts(),df_subset_trump['Expression_Label'].value_counts()

(Expression_Label
 Positive    607
 Negative    393
 Name: count, dtype: int64,
 Expression_Label
 Positive    551
 Negative    449
 Name: count, dtype: int64)

# Analyzing the number of positive and negative sentiments in both the accounts

In [19]:

biden_positive = (df_subset_biden['Expression_Label'].value_counts().tolist())[0]
biden_negative = (df_subset_biden['Expression_Label'].value_counts().tolist())[1]

trump_positive = df_subset_trump['Expression_Label'].value_counts().tolist()[0]
trump_negative = df_subset_trump['Expression_Label'].value_counts().tolist()[1]

Politicians = ['Joe Biden', 'Donald Trump']
lis_pos = [biden_positive * 0.1, trump_positive * 0.1]
lis_neg = [biden_negative * 0.1, trump_negative * 0.1]


# Visualizing using grouped barchart

In [49]:
#layout = go.Layout(title='Basic Grouped Bar Chart')
fig = go.Figure(data=[
    go.Bar(name='Positive', x=Politicians, y=lis_pos,marker = dict(color='#0000F0')),
    go.Bar(name='Negative', x=Politicians, y=lis_neg,marker = dict(color='#FF0000' ))
],layout=layout)
fig.show()
