In [1]:
#Subjectivity Analysis


In [2]:
import pandas as pd

In [3]:
# Read csv file
file=('Resources/town_hall.csv')
first_df=pd.read_csv(file)
first_df.head()

Unnamed: 0,tweet,replies_count,retweets_count,likes_count,candidate,tweet_length,Subjectivity,Polarity,Sentiment
0,In 2016 Massachusetts Gov Charlie Baker sat ou...,38,41,144,Biden,25,0.9,0.5,Positive
1,Obama came in after Bush Trump didn inherit t...,1,0,0,Biden,46,0.55,0.0,Neutral
2,People voting against their best interest is ...,0,0,8,Biden,24,0.15,0.4,Positive
3,Joe is senile Sorry But also he a crook But s...,0,0,0,Biden,19,1.0,-0.5,Negative
4,hope the media stops reporting his every utte...,0,0,1,Biden,9,0.0,0.0,Neutral


In [4]:
# Separating Biden and Trump Data
biden_sentiment=first_df[(first_df['candidate']=='Biden')]
trump_sentiment=first_df[(first_df['candidate']=='Trump')]

In [5]:
#Biden Subjectivity Analysis

In [6]:
# Creating Bins for Subjectivity
biden_bin=biden_sentiment.Subjectivity
biden_bin

0         0.900000
1         0.550000
2         0.150000
3         1.000000
4         0.000000
            ...   
122922    0.000000
122923    0.193750
122924    0.212500
122925    0.651515
122926    0.000000
Name: Subjectivity, Length: 122927, dtype: float64

In [7]:
#Creating Bins

In [8]:
# Assumption of Subjectivity Ranges
# 0.0 - 0.3 => Passive (or Nonchalant Tweeter)
# 0.3 - 0.7 => Balanced (or more Objective Tweeter)
# 0.7 - 1.0 => Subjective (or less Objective Tweeter)
bins = bins = [float('-0.5'),0.3,0.7,float('1.5')]
groups = ["Passive", "Balanced", "Subjective"]
biden_bin.groupby(pd.cut(biden_bin, bins)).count()

Subjectivity
(-0.5, 0.3]    56341
(0.3, 0.7]     48844
(0.7, 1.5]     17742
Name: Subjectivity, dtype: int64

In [9]:
# Categorize subjectivity bins
biden_sentiment["Subjectivity_Class"] = pd.cut(biden_bin, bins=bins, labels=groups)
biden_sentiment

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,tweet,replies_count,retweets_count,likes_count,candidate,tweet_length,Subjectivity,Polarity,Sentiment,Subjectivity_Class
0,In 2016 Massachusetts Gov Charlie Baker sat ou...,38,41,144,Biden,25,0.900000,0.500000,Positive,Subjective
1,Obama came in after Bush Trump didn inherit t...,1,0,0,Biden,46,0.550000,0.000000,Neutral,Balanced
2,People voting against their best interest is ...,0,0,8,Biden,24,0.150000,0.400000,Positive,Passive
3,Joe is senile Sorry But also he a crook But s...,0,0,0,Biden,19,1.000000,-0.500000,Negative,Subjective
4,hope the media stops reporting his every utte...,0,0,1,Biden,9,0.000000,0.000000,Neutral,Passive
...,...,...,...,...,...,...,...,...,...,...
122922,The direction Texas is headed,0,0,0,Biden,5,0.000000,0.000000,Neutral,Passive
122923,Trump is the dead past while offers vision of ...,0,0,0,Biden,11,0.193750,-0.087500,Negative,Passive
122924,How much did he pay in payroll taxes thought ...,2,0,0,Biden,45,0.212500,0.087500,Positive,Passive
122925,federal judge finalized the 25 million settle...,5,4,11,Biden,39,0.651515,0.062121,Positive,Balanced


In [10]:
#Feature Engineering (Biden)

In [11]:
# Calculating Totals by Subjectivity Ranges
biden_sub_groups_count = biden_sentiment.groupby(["Subjectivity_Class"]).count()["tweet"]
biden_sub_groups_count

Subjectivity_Class
Passive       56341
Balanced      48844
Subjective    17742
Name: tweet, dtype: int64

In [12]:
# Average Replies
biden_avg_replies = biden_sentiment.groupby(["Subjectivity_Class"]).mean()["replies_count"]
biden_avg_replies

Subjectivity_Class
Passive       1.610994
Balanced      1.756060
Subjective    0.972326
Name: replies_count, dtype: float64

In [13]:
# Average Retweets
biden_avg_retweets = biden_sentiment.groupby(["Subjectivity_Class"]).mean()["retweets_count"]
biden_avg_retweets

Subjectivity_Class
Passive       4.591807
Balanced      5.599992
Subjective    3.642599
Name: retweets_count, dtype: float64

In [14]:
# Average Likes
biden_avg_likes = biden_sentiment.groupby(["Subjectivity_Class"]).mean()["likes_count"]
biden_avg_likes

Subjectivity_Class
Passive       18.412648
Balanced      21.199963
Subjective    16.357570
Name: likes_count, dtype: float64

In [15]:
# Average Tweet Length
biden_avg_tweet_length = biden_sentiment.groupby(["Subjectivity_Class"]).mean()["tweet_length"]
biden_avg_tweet_length

Subjectivity_Class
Passive       14.353135
Balanced      25.472259
Subjective    18.881073
Name: tweet_length, dtype: float64

In [16]:
# Average Sentiment Score
biden_avg_sentiment_score= biden_sentiment.groupby(["Subjectivity_Class"]).mean()["Polarity"]
biden_avg_sentiment_score

Subjectivity_Class
Passive       0.016212
Balanced      0.116926
Subjective    0.057129
Name: Polarity, dtype: float64

In [17]:
# Combine each Series for table
biden_subjectivity_table = pd.DataFrame({
               "Avg Replies": biden_avg_replies,
               "Avg Retweets": biden_avg_retweets,
               "Avg Likes": biden_avg_likes,
               "Avg Tweet Length": biden_avg_tweet_length,
               "Avg Sentiment" : biden_avg_sentiment_score,
               "Total" : biden_sub_groups_count})

biden_subjectivity_table.head()

Unnamed: 0_level_0,Avg Replies,Avg Retweets,Avg Likes,Avg Tweet Length,Avg Sentiment,Total
Subjectivity_Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Passive,1.610994,4.591807,18.412648,14.353135,0.016212,56341
Balanced,1.75606,5.599992,21.199963,25.472259,0.116926,48844
Subjective,0.972326,3.642599,16.35757,18.881073,0.057129,17742


In [18]:
# Format each column
biden_subjectivity_table["Avg Replies"] = biden_subjectivity_table["Avg Replies"].map("{:.1f}".format)

biden_subjectivity_table["Avg Retweets"] = biden_subjectivity_table["Avg Retweets"].map("{:.1f}".format)

biden_subjectivity_table["Avg Likes"] = biden_subjectivity_table["Avg Likes"].map("{:.1f}".format)

biden_subjectivity_table["Avg Tweet Length"] = biden_subjectivity_table["Avg Tweet Length"].map("{:.1f}".format)

biden_subjectivity_table["Avg Sentiment"] = biden_subjectivity_table["Avg Sentiment"].map("{:.3f}".format)


# Display the DataFrame.
biden_subjectivity_table.head()

Unnamed: 0_level_0,Avg Replies,Avg Retweets,Avg Likes,Avg Tweet Length,Avg Sentiment,Total
Subjectivity_Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Passive,1.6,4.6,18.4,14.4,0.016,56341
Balanced,1.8,5.6,21.2,25.5,0.117,48844
Subjective,1.0,3.6,16.4,18.9,0.057,17742


In [19]:
#Writing csv file for Dashboard

In [20]:
biden_subjectivity_table.to_csv(r'C:\Users\kevin\Documents\myrepo\Final-Project\Resources\town_hall_biden_subjectivity.csv', index = True, header=True)

In [21]:
#Trump Subjectivity Analysis

In [22]:
# Creating Bins for Subjectivity
trump_bin=trump_sentiment.Subjectivity
trump_bin


122927    0.750000
122928    0.505000
122929    0.375000
122930    0.466667
122931    0.000000
            ...   
298514    0.000000
298515    0.173737
298516    0.000000
298517    0.000000
298518    0.500000
Name: Subjectivity, Length: 175592, dtype: float64

In [23]:
#Creating Bins

In [24]:
# Assumption of Subjectivity Ranges
# 0.0 - 0.3 => Passive (or Nonchalant Tweeter)
# 0.3 - 0.7 => Balanced (or more Objective Tweeter)
# 0.7 - 1.0 => Subjective (or less Objective Tweeter)
trump_bin.groupby(pd.cut(trump_bin, bins)).count()

Subjectivity
(-0.5, 0.3]    82230
(0.3, 0.7]     65588
(0.7, 1.5]     27774
Name: Subjectivity, dtype: int64

In [25]:
# Categorize subjectivity bins
trump_sentiment["Subjectivity_Class"] = pd.cut(trump_bin, bins=bins, labels=groups)
trump_sentiment

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,tweet,replies_count,retweets_count,likes_count,candidate,tweet_length,Subjectivity,Polarity,Sentiment,Subjectivity_Class
122927,m from Nebraska and also disappointed in Sass...,1,13,51,Trump,29,0.750000,-0.750000,Negative,Subjective
122928,His ratings were higher than yours reminder y...,0,0,0,Trump,51,0.505000,-0.030000,Negative,Balanced
122929,media just ignores and covers for Biden day i...,1,0,1,Trump,35,0.375000,-0.333333,Negative,Balanced
122930,LOL those fans are dummies too Democrat or Re...,0,0,0,Trump,29,0.466667,0.208333,Positive,Balanced
122931,Vote Biden,0,0,0,Trump,2,0.000000,0.000000,Neutral,Passive
...,...,...,...,...,...,...,...,...,...,...
298514,Already voted Biden Harris,0,0,1,Trump,4,0.000000,0.000000,Neutral,Passive
298515,And that is why you will probably die before ...,0,0,0,Trump,49,0.173737,-0.010101,Negative,Passive
298516,Your life like 218 000 Americans have,0,0,0,Trump,7,0.000000,0.000000,Neutral,Passive
298517,Liar Liar pants on fire,0,0,0,Trump,5,0.000000,0.000000,Neutral,Passive


In [26]:
# Calculating Totals by Subjectivity Ranges
trump_sub_groups_count = trump_sentiment.groupby(["Subjectivity_Class"]).count()["tweet"]
trump_sub_groups_count

Subjectivity_Class
Passive       82230
Balanced      65588
Subjective    27774
Name: tweet, dtype: int64

In [27]:
# Average Replies
trump_avg_replies = trump_sentiment.groupby(["Subjectivity_Class"]).mean()["replies_count"]
trump_avg_replies

Subjectivity_Class
Passive       1.005424
Balanced      2.348722
Subjective    0.607367
Name: replies_count, dtype: float64

In [28]:
# Average Retweets
trump_avg_retweets = trump_sentiment.groupby(["Subjectivity_Class"]).mean()["retweets_count"]
trump_avg_retweets

Subjectivity_Class
Passive       2.209765
Balanced      4.974523
Subjective    1.986678
Name: retweets_count, dtype: float64

In [29]:
# Average Likes
trump_avg_likes = trump_sentiment.groupby(["Subjectivity_Class"]).mean()["likes_count"]
trump_avg_likes

Subjectivity_Class
Passive        9.262593
Balanced      22.981277
Subjective     8.690826
Name: likes_count, dtype: float64

In [30]:
# Average Tweet Length
trump_avg_tweet_length = trump_sentiment.groupby(["Subjectivity_Class"]).mean()["tweet_length"]
trump_avg_tweet_length

Subjectivity_Class
Passive       12.009242
Balanced      22.494984
Subjective    16.235832
Name: tweet_length, dtype: float64

In [31]:
# Average Sentiment
trump_avg_sentiment_score= trump_sentiment.groupby(["Subjectivity_Class"]).mean()["Polarity"]
trump_avg_sentiment_score

Subjectivity_Class
Passive       0.015524
Balanced      0.106958
Subjective   -0.024445
Name: Polarity, dtype: float64

In [32]:
# Combine each Series for table
trump_subjectivity_table = pd.DataFrame({
               "Avg Replies": trump_avg_replies,
               "Avg Retweets": trump_avg_retweets,
               "Avg Likes": trump_avg_likes,
               "Avg Tweet Length": trump_avg_tweet_length,
               "Avg Sentiment" : trump_avg_sentiment_score,
               "Total" : trump_sub_groups_count})

trump_subjectivity_table.head()

Unnamed: 0_level_0,Avg Replies,Avg Retweets,Avg Likes,Avg Tweet Length,Avg Sentiment,Total
Subjectivity_Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Passive,1.005424,2.209765,9.262593,12.009242,0.015524,82230
Balanced,2.348722,4.974523,22.981277,22.494984,0.106958,65588
Subjective,0.607367,1.986678,8.690826,16.235832,-0.024445,27774


In [33]:
# Format each column
trump_subjectivity_table["Avg Replies"] = trump_subjectivity_table["Avg Replies"].map("{:.1f}".format)

trump_subjectivity_table["Avg Retweets"] = trump_subjectivity_table["Avg Retweets"].map("{:.1f}".format)

trump_subjectivity_table["Avg Likes"] = trump_subjectivity_table["Avg Likes"].map("{:.1f}".format)

trump_subjectivity_table["Avg Tweet Length"] = trump_subjectivity_table["Avg Tweet Length"].map("{:.1f}".format)

trump_subjectivity_table["Avg Sentiment"] = trump_subjectivity_table["Avg Sentiment"].map("{:.3f}".format)


# Display the DataFrame.
trump_subjectivity_table.head()

Unnamed: 0_level_0,Avg Replies,Avg Retweets,Avg Likes,Avg Tweet Length,Avg Sentiment,Total
Subjectivity_Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Passive,1.0,2.2,9.3,12.0,0.016,82230
Balanced,2.3,5.0,23.0,22.5,0.107,65588
Subjective,0.6,2.0,8.7,16.2,-0.024,27774


In [34]:
trump_subjectivity_table.to_csv(r'C:\Users\kevin\Documents\myrepo\Final-Project\Resources\town_hall_trump_subjectivity.csv', index = True, header=True)