# Subjectivity Analysis

### Import Dependency

In [17]:
import pandas as pd

In [18]:
# Read csv file
file=('Concat_ML_Data/first_debate.csv')
first_df=pd.read_csv(file)
first_df.head()

Unnamed: 0,tweet,replies_count,retweets_count,likes_count,candidate,tweet_length,Subjectivity,Polarity,Sentiment
0,Truly wish YOU had been the Moderator Brian Y...,0,0,0,Biden,40,0.1,-0.2,Negative
1,Crazy different RT Joe Biden hit Trump with th...,0,0,0,Biden,13,0.7,-0.2,Negative
2,Brilliant retort Joe The world hopes for your...,0,0,0,Biden,20,0.5,0.45,Positive
3,Anytime or slandering Joe Biden Me,0,2,2,Biden,6,0.0,0.0,Neutral
4,You are just trying to rile us and it working,0,0,1,Biden,10,0.0,0.0,Neutral


In [19]:
# Separating Biden and Trump Data
biden_sentiment=first_df[(first_df['candidate']=='Biden')]
trump_sentiment=first_df[(first_df['candidate']=='Trump')]

### Biden Subjectivity Analysis

In [20]:
# Creating Bins for Subjectivity
biden_bin=biden_sentiment.Subjectivity
biden_bin

0         0.100000
1         0.700000
2         0.500000
3         0.000000
4         0.000000
            ...   
147161    0.300000
147162    0.000000
147163    0.666667
147164    0.500000
147165    0.000000
Name: Subjectivity, Length: 147166, dtype: float64

### Creating Bins

In [23]:
# Assumption of Subjectivity Ranges
# 0.0 - 0.3 => Passive (or Nonchalant Tweeter)
# 0.3 - 0.7 => Balanced (or more Objective Tweeter)
# 0.7 - 1.0 => Subjective (or less Objective Tweeter)
bins = bins = [float('-0.5'),0.3,0.7,float('1.5')]
groups = ["Passive", "Balanced", "Subjective"]
biden_bin.groupby(pd.cut(biden_bin, bins)).count()

Subjectivity
(-0.5, 0.3]    71438
(0.3, 0.7]     55724
(0.7, 1.5]     20004
Name: Subjectivity, dtype: int64

In [24]:
# Categorize subjectivity bins
biden_sentiment["Subjectivity_Class"] = pd.cut(biden_bin, bins=bins, labels=groups)
biden_sentiment

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,tweet,replies_count,retweets_count,likes_count,candidate,tweet_length,Subjectivity,Polarity,Sentiment,Subjectivity_Class
0,Truly wish YOU had been the Moderator Brian Y...,0,0,0,Biden,40,0.100000,-0.200000,Negative,Passive
1,Crazy different RT Joe Biden hit Trump with th...,0,0,0,Biden,13,0.700000,-0.200000,Negative,Subjective
2,Brilliant retort Joe The world hopes for your...,0,0,0,Biden,20,0.500000,0.450000,Positive,Balanced
3,Anytime or slandering Joe Biden Me,0,2,2,Biden,6,0.000000,0.000000,Neutral,Passive
4,You are just trying to rile us and it working,0,0,1,Biden,10,0.000000,0.000000,Neutral,Passive
...,...,...,...,...,...,...,...,...,...,...
147161,doesn matter you shouldn bring up personal life,0,0,3,Biden,8,0.300000,0.000000,Neutral,Passive
147162,How,0,0,0,Biden,1,0.000000,0.000000,Neutral,Passive
147163,could have done better easily,0,0,1,Biden,5,0.666667,0.466667,Positive,Balanced
147164,unconscious more like,1,0,1,Biden,3,0.500000,0.500000,Positive,Balanced


### Feature Engineering (Biden)

In [27]:
# Calculating Totals by Subjectivity Ranges
biden_sub_groups_count = biden_sentiment.groupby(["Subjectivity_Class"]).count()["tweet"]
biden_sub_groups_count

Subjectivity_Class
Passive       71438
Balanced      55724
Subjective    20004
Name: tweet, dtype: int64

In [28]:
# Average Replies
biden_avg_replies = biden_sentiment.groupby(["Subjectivity_Class"]).mean()["replies_count"]
biden_avg_replies

Subjectivity_Class
Passive       1.182186
Balanced      1.500574
Subjective    2.073885
Name: replies_count, dtype: float64

In [29]:
# Average Retweets
biden_avg_retweets = biden_sentiment.groupby(["Subjectivity_Class"]).mean()["retweets_count"]
biden_avg_retweets

Subjectivity_Class
Passive       4.639730
Balanced      4.098342
Subjective    4.870176
Name: retweets_count, dtype: float64

In [30]:
# Average Likes
biden_avg_likes = biden_sentiment.groupby(["Subjectivity_Class"]).mean()["likes_count"]
biden_avg_likes

Subjectivity_Class
Passive       19.837020
Balanced      18.730565
Subjective    24.556239
Name: likes_count, dtype: float64

In [31]:
# Average Tweet Length
biden_avg_tweet_length = biden_sentiment.groupby(["Subjectivity_Class"]).mean()["tweet_length"]
biden_avg_tweet_length

Subjectivity_Class
Passive       13.898149
Balanced      25.190295
Subjective    18.645321
Name: tweet_length, dtype: float64

In [32]:
# Average Sentiment Score
biden_avg_sentiment_score= biden_sentiment.groupby(["Subjectivity_Class"]).mean()["Polarity"]
biden_avg_sentiment_score

Subjectivity_Class
Passive       0.016924
Balanced      0.108526
Subjective    0.013532
Name: Polarity, dtype: float64

In [45]:
# Combine each Series for table
biden_subjectivity_table = pd.DataFrame({
               "Avg Replies": biden_avg_replies,
               "Avg Retweets": biden_avg_retweets,
               "Avg Likes": biden_avg_likes,
               "Avg Tweet Length": biden_avg_tweet_length,
               "Avg Sentiment" : biden_avg_sentiment_score,
               "Total" : biden_sub_groups_count})

biden_subjectivity_table.head()

Unnamed: 0_level_0,Avg Replies,Avg Retweets,Avg Likes,Avg Tweet Length,Avg Sentiment,Total
Subjectivity_Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Passive,1.182186,4.63973,19.83702,13.898149,0.016924,71438
Balanced,1.500574,4.098342,18.730565,25.190295,0.108526,55724
Subjective,2.073885,4.870176,24.556239,18.645321,0.013532,20004


In [46]:
# Format each column
biden_subjectivity_table["Avg Replies"] = biden_subjectivity_table["Avg Replies"].map("{:.1f}".format)

biden_subjectivity_table["Avg Retweets"] = biden_subjectivity_table["Avg Retweets"].map("{:.1f}".format)

biden_subjectivity_table["Avg Likes"] = biden_subjectivity_table["Avg Likes"].map("{:.1f}".format)

biden_subjectivity_table["Avg Tweet Length"] = biden_subjectivity_table["Avg Tweet Length"].map("{:.1f}".format)

biden_subjectivity_table["Avg Sentiment"] = biden_subjectivity_table["Avg Sentiment"].map("{:.3f}".format)


# Display the DataFrame.
biden_subjectivity_table.head()

Unnamed: 0_level_0,Avg Replies,Avg Retweets,Avg Likes,Avg Tweet Length,Avg Sentiment,Total
Subjectivity_Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Passive,1.2,4.6,19.8,13.9,0.017,71438
Balanced,1.5,4.1,18.7,25.2,0.109,55724
Subjective,2.1,4.9,24.6,18.6,0.014,20004


### Writing csv file for Dashboard

In [47]:
biden_subjectivity_table.to_csv(r'C:\Users\Greg\Documents\Analysis_Projects\Final_Project\Concat_ML_Data\first_debate_biden_subjectivity.csv', index = True, header=True)

### Trump Subjectivity Analysis

In [33]:
# Creating Bins for Subjectivity
trump_bin=trump_sentiment.Subjectivity
trump_bin

147166    0.40625
147167    0.00000
147168    0.00000
147169    0.32500
147170    0.70000
           ...   
421550    0.00000
421551    0.00000
421552    0.00000
421553    0.00000
421554    0.00000
Name: Subjectivity, Length: 274389, dtype: float64

### Creating Bins

In [34]:
# Assumption of Subjectivity Ranges
# 0.0 - 0.3 => Passive (or Nonchalant Tweeter)
# 0.3 - 0.7 => Balanced (or more Objective Tweeter)
# 0.7 - 1.0 => Subjective (or less Objective Tweeter)
trump_bin.groupby(pd.cut(trump_bin, bins)).count()

Subjectivity
(-0.5, 0.3]    133953
(0.3, 0.7]     100199
(0.7, 1.5]      40237
Name: Subjectivity, dtype: int64

In [35]:
# Categorize subjectivity bins
trump_sentiment["Subjectivity_Class"] = pd.cut(trump_bin, bins=bins, labels=groups)
trump_sentiment

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,tweet,replies_count,retweets_count,likes_count,candidate,tweet_length,Subjectivity,Polarity,Sentiment,Subjectivity_Class
147166,mean Modi ji and XI are synonymous Donald tru...,0,0,0,Trump,29,0.40625,-0.156250,Negative,Balanced
147167,Toasted trump,0,0,0,Trump,2,0.00000,0.000000,Neutral,Passive
147168,And integrity,0,0,0,Trump,2,0.00000,0.000000,Neutral,Passive
147169,Tell Chris amp Biden to stop lying CNN Jake T...,0,0,0,Trump,39,0.32500,0.270833,Positive,Balanced
147170,The anchor is your abrasive conceit ship anch...,0,0,0,Trump,21,0.70000,0.500000,Positive,Balanced
...,...,...,...,...,...,...,...,...,...,...
421550,Organization,0,0,0,Trump,1,0.00000,0.000000,Neutral,Passive
421551,Are you kidding He too sleepy to listen to us...,0,0,0,Trump,24,0.00000,0.000000,Neutral,Passive
421552,Comedy president in the world,0,0,0,Trump,5,0.00000,0.000000,Neutral,Passive
421553,JAJAJAJA mco risa la gente que apoya Trump yq ...,1,0,1,Trump,22,0.00000,0.000000,Neutral,Passive


In [36]:
# Calculating Totals by Subjectivity Ranges
trump_sub_groups_count = trump_sentiment.groupby(["Subjectivity_Class"]).count()["tweet"]
trump_sub_groups_count

Subjectivity_Class
Passive       133953
Balanced      100199
Subjective     40237
Name: tweet, dtype: int64

In [37]:
# Average Replies
trump_avg_replies = trump_sentiment.groupby(["Subjectivity_Class"]).mean()["replies_count"]
trump_avg_replies

Subjectivity_Class
Passive       1.216083
Balanced      1.220551
Subjective    0.775679
Name: replies_count, dtype: float64

In [38]:
# Average Retweets
trump_avg_retweets = trump_sentiment.groupby(["Subjectivity_Class"]).mean()["retweets_count"]
trump_avg_retweets

Subjectivity_Class
Passive       2.152591
Balanced      2.329544
Subjective    0.998186
Name: retweets_count, dtype: float64

In [39]:
# Average Likes
trump_avg_likes = trump_sentiment.groupby(["Subjectivity_Class"]).mean()["likes_count"]
trump_avg_likes

Subjectivity_Class
Passive        9.726822
Balanced      10.354624
Subjective     5.554539
Name: likes_count, dtype: float64

In [40]:
# Average Tweet Length
trump_avg_tweet_length = trump_sentiment.groupby(["Subjectivity_Class"]).mean()["tweet_length"]
trump_avg_tweet_length

Subjectivity_Class
Passive       12.583526
Balanced      23.578928
Subjective    16.523449
Name: tweet_length, dtype: float64

In [41]:
# Average Sentiment
trump_avg_sentiment_score= trump_sentiment.groupby(["Subjectivity_Class"]).mean()["Polarity"]
trump_avg_sentiment_score

Subjectivity_Class
Passive       0.012286
Balanced      0.093594
Subjective   -0.015504
Name: Polarity, dtype: float64

In [48]:
# Combine each Series for table
trump_subjectivity_table = pd.DataFrame({
               "Avg Replies": trump_avg_replies,
               "Avg Retweets": trump_avg_retweets,
               "Avg Likes": trump_avg_likes,
               "Avg Tweet Length": trump_avg_tweet_length,
               "Avg Sentiment" : trump_avg_sentiment_score,
               "Total" : trump_sub_groups_count})

trump_subjectivity_table.head()

Unnamed: 0_level_0,Avg Replies,Avg Retweets,Avg Likes,Avg Tweet Length,Avg Sentiment,Total
Subjectivity_Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Passive,1.216083,2.152591,9.726822,12.583526,0.012286,133953
Balanced,1.220551,2.329544,10.354624,23.578928,0.093594,100199
Subjective,0.775679,0.998186,5.554539,16.523449,-0.015504,40237


In [49]:
# Format each column
trump_subjectivity_table["Avg Replies"] = trump_subjectivity_table["Avg Replies"].map("{:.1f}".format)

trump_subjectivity_table["Avg Retweets"] = trump_subjectivity_table["Avg Retweets"].map("{:.1f}".format)

trump_subjectivity_table["Avg Likes"] = trump_subjectivity_table["Avg Likes"].map("{:.1f}".format)

trump_subjectivity_table["Avg Tweet Length"] = trump_subjectivity_table["Avg Tweet Length"].map("{:.1f}".format)

trump_subjectivity_table["Avg Sentiment"] = trump_subjectivity_table["Avg Sentiment"].map("{:.3f}".format)


# Display the DataFrame.
trump_subjectivity_table.head()

Unnamed: 0_level_0,Avg Replies,Avg Retweets,Avg Likes,Avg Tweet Length,Avg Sentiment,Total
Subjectivity_Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Passive,1.2,2.2,9.7,12.6,0.012,133953
Balanced,1.2,2.3,10.4,23.6,0.094,100199
Subjective,0.8,1.0,5.6,16.5,-0.016,40237


### Writing csv file for Dashboard

In [50]:
trump_subjectivity_table.to_csv(r'C:\Users\Greg\Documents\Analysis_Projects\Final_Project\Concat_ML_Data\first_debate_trump_subjectivity.csv', index = True, header=True)