In [1]:
#Subjectivity Analysis


In [2]:
import pandas as pd

In [3]:
# Read csv file
file=('Resources/Second_debate.csv')
first_df=pd.read_csv(file)
first_df.head()

Unnamed: 0,tweet,replies_count,retweets_count,likes_count,candidate,tweet_length,Subjectivity,Polarity,Sentiment
0,So why was trump elected,0,0,0,Biden,5,0.0,0.0,Neutral
1,Oklahoma early voting is October 29 30 and 31...,0,0,0,Biden,15,0.3,0.1,Positive
2,Open the state,0,0,0,Biden,3,0.5,0.0,Neutral
3,if you haven noticed more than half the count...,0,1,0,Biden,20,0.222222,0.111111,Positive
4,It came down to muting the president That what...,0,1,1,Biden,40,0.489815,0.028241,Positive


In [4]:
# Separating Biden and Trump Data
biden_sentiment=first_df[(first_df['candidate']=='Biden')]
trump_sentiment=first_df[(first_df['candidate']=='Trump')]

In [None]:
#Biden Subjectivity Analysis

In [5]:
# Creating Bins for Subjectivity
biden_bin=biden_sentiment.Subjectivity
biden_bin

0        0.000000
1        0.300000
2        0.500000
3        0.222222
4        0.489815
           ...   
94680    0.595000
94681    0.100000
94682    1.000000
94683    0.937500
94684    0.000000
Name: Subjectivity, Length: 94685, dtype: float64

In [None]:
#Creating Bins

In [6]:
# Assumption of Subjectivity Ranges
# 0.0 - 0.3 => Passive (or Nonchalant Tweeter)
# 0.3 - 0.7 => Balanced (or more Objective Tweeter)
# 0.7 - 1.0 => Subjective (or less Objective Tweeter)
bins = bins = [float('-0.5'),0.3,0.7,float('1.5')]
groups = ["Passive", "Balanced", "Subjective"]
biden_bin.groupby(pd.cut(biden_bin, bins)).count()

Subjectivity
(-0.5, 0.3]    46757
(0.3, 0.7]     35591
(0.7, 1.5]     12337
Name: Subjectivity, dtype: int64

In [7]:
# Categorize subjectivity bins
biden_sentiment["Subjectivity_Class"] = pd.cut(biden_bin, bins=bins, labels=groups)
biden_sentiment

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,tweet,replies_count,retweets_count,likes_count,candidate,tweet_length,Subjectivity,Polarity,Sentiment,Subjectivity_Class
0,So why was trump elected,0,0,0,Biden,5,0.000000,0.000000,Neutral,Passive
1,Oklahoma early voting is October 29 30 and 31...,0,0,0,Biden,15,0.300000,0.100000,Positive,Passive
2,Open the state,0,0,0,Biden,3,0.500000,0.000000,Neutral,Balanced
3,if you haven noticed more than half the count...,0,1,0,Biden,20,0.222222,0.111111,Positive,Passive
4,It came down to muting the president That what...,0,1,1,Biden,40,0.489815,0.028241,Positive,Balanced
...,...,...,...,...,...,...,...,...,...,...
94680,Joe Biden had the line of the night after Don...,0,0,0,Biden,36,0.595000,-0.048333,Negative,Balanced
94681,What do you expect Joe Biden coronavirus resp...,3,0,0,Biden,11,0.100000,0.000000,Neutral,Passive
94682,Proud to have voted for you,0,0,3,Biden,6,1.000000,0.800000,Positive,Subjective
94683,She cut off our President 111 times And she o...,0,0,0,Biden,34,0.937500,0.116667,Positive,Subjective


In [8]:
#Feature Engineering (Biden)

In [9]:
# Calculating Totals by Subjectivity Ranges
biden_sub_groups_count = biden_sentiment.groupby(["Subjectivity_Class"]).count()["tweet"]
biden_sub_groups_count

Subjectivity_Class
Passive       46757
Balanced      35591
Subjective    12337
Name: tweet, dtype: int64

In [10]:
# Average Replies
biden_avg_replies = biden_sentiment.groupby(["Subjectivity_Class"]).mean()["replies_count"]
biden_avg_replies

Subjectivity_Class
Passive       1.572620
Balanced      1.306679
Subjective    1.765583
Name: replies_count, dtype: float64

In [11]:
# Average Retweets
biden_avg_retweets = biden_sentiment.groupby(["Subjectivity_Class"]).mean()["retweets_count"]
biden_avg_retweets

Subjectivity_Class
Passive       4.058259
Balanced      3.505381
Subjective    3.653076
Name: retweets_count, dtype: float64

In [12]:
# Average Likes
biden_avg_likes = biden_sentiment.groupby(["Subjectivity_Class"]).mean()["likes_count"]
biden_avg_likes

Subjectivity_Class
Passive       19.226105
Balanced      14.639516
Subjective    19.595120
Name: likes_count, dtype: float64

In [13]:
# Average Tweet Length
biden_avg_tweet_length = biden_sentiment.groupby(["Subjectivity_Class"]).mean()["tweet_length"]
biden_avg_tweet_length

Subjectivity_Class
Passive       14.413200
Balanced      25.457531
Subjective    18.265218
Name: tweet_length, dtype: float64

In [14]:
# Average Sentiment Score
biden_avg_sentiment_score= biden_sentiment.groupby(["Subjectivity_Class"]).mean()["Polarity"]
biden_avg_sentiment_score

Subjectivity_Class
Passive       0.014744
Balanced      0.113267
Subjective    0.066271
Name: Polarity, dtype: float64

In [15]:
# Combine each Series for table
biden_subjectivity_table = pd.DataFrame({
               "Avg Replies": biden_avg_replies,
               "Avg Retweets": biden_avg_retweets,
               "Avg Likes": biden_avg_likes,
               "Avg Tweet Length": biden_avg_tweet_length,
               "Avg Sentiment" : biden_avg_sentiment_score,
               "Total" : biden_sub_groups_count})

biden_subjectivity_table.head()

Unnamed: 0_level_0,Avg Replies,Avg Retweets,Avg Likes,Avg Tweet Length,Avg Sentiment,Total
Subjectivity_Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Passive,1.57262,4.058259,19.226105,14.4132,0.014744,46757
Balanced,1.306679,3.505381,14.639516,25.457531,0.113267,35591
Subjective,1.765583,3.653076,19.59512,18.265218,0.066271,12337


In [16]:
# Format each column
biden_subjectivity_table["Avg Replies"] = biden_subjectivity_table["Avg Replies"].map("{:.1f}".format)

biden_subjectivity_table["Avg Retweets"] = biden_subjectivity_table["Avg Retweets"].map("{:.1f}".format)

biden_subjectivity_table["Avg Likes"] = biden_subjectivity_table["Avg Likes"].map("{:.1f}".format)

biden_subjectivity_table["Avg Tweet Length"] = biden_subjectivity_table["Avg Tweet Length"].map("{:.1f}".format)

biden_subjectivity_table["Avg Sentiment"] = biden_subjectivity_table["Avg Sentiment"].map("{:.3f}".format)


# Display the DataFrame.
biden_subjectivity_table.head()

Unnamed: 0_level_0,Avg Replies,Avg Retweets,Avg Likes,Avg Tweet Length,Avg Sentiment,Total
Subjectivity_Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Passive,1.6,4.1,19.2,14.4,0.015,46757
Balanced,1.3,3.5,14.6,25.5,0.113,35591
Subjective,1.8,3.7,19.6,18.3,0.066,12337


In [17]:
#Writing csv file for Dashboard

In [18]:
biden_subjectivity_table.to_csv(r'C:\Users\kevin\Documents\myrepo\Final-Project\Resources\Second_debate_biden_subjectivity.csv', index = True, header=True)

In [19]:
#Trump Subjectivity Analysis

In [20]:
# Creating Bins for Subjectivity
trump_bin=trump_sentiment.Subjectivity
trump_bin


94685     0.500000
94686     0.800000
94687     0.308333
94688     0.600000
94689     1.000000
            ...   
255596    0.000000
255597    0.000000
255598    0.547222
255599    0.100000
255600    1.000000
Name: Subjectivity, Length: 160916, dtype: float64

In [21]:
#Creating Bins

In [22]:
# Assumption of Subjectivity Ranges
# 0.0 - 0.3 => Passive (or Nonchalant Tweeter)
# 0.3 - 0.7 => Balanced (or more Objective Tweeter)
# 0.7 - 1.0 => Subjective (or less Objective Tweeter)
trump_bin.groupby(pd.cut(trump_bin, bins)).count()

Subjectivity
(-0.5, 0.3]    77259
(0.3, 0.7]     59357
(0.7, 1.5]     24300
Name: Subjectivity, dtype: int64

In [23]:
# Categorize subjectivity bins
trump_sentiment["Subjectivity_Class"] = pd.cut(trump_bin, bins=bins, labels=groups)
trump_sentiment

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,tweet,replies_count,retweets_count,likes_count,candidate,tweet_length,Subjectivity,Polarity,Sentiment,Subjectivity_Class
94685,SMH such ignorance,0,0,0,Trump,3,0.500000,0.000000,Neutral,Balanced
94686,Precise Hahahahahahahaha,0,0,0,Trump,2,0.800000,0.400000,Positive,Subjective
94687,shame on mr president 4 yrs ago when voted fo...,0,1,0,Trump,25,0.308333,0.066667,Positive,Balanced
94688,YEP And PTL you did Love you,0,0,0,Trump,7,0.600000,0.500000,Positive,Balanced
94689,not even your wife loves you Sad,0,0,0,Trump,7,1.000000,-0.500000,Negative,Subjective
...,...,...,...,...,...,...,...,...,...,...
255596,Keep on dreaming Trump,0,0,0,Trump,4,0.000000,0.000000,Neutral,Passive
255597,He lied People died,0,0,0,Trump,4,0.000000,0.000000,Neutral,Passive
255598,Thanks for your hard work to re elect Preside...,0,0,2,Trump,10,0.547222,0.236111,Positive,Balanced
255599,My Big President,0,0,0,Trump,3,0.100000,0.000000,Neutral,Passive


In [24]:
# Calculating Totals by Subjectivity Ranges
trump_sub_groups_count = trump_sentiment.groupby(["Subjectivity_Class"]).count()["tweet"]
trump_sub_groups_count

Subjectivity_Class
Passive       77259
Balanced      59357
Subjective    24300
Name: tweet, dtype: int64

In [25]:
# Average Replies
trump_avg_replies = trump_sentiment.groupby(["Subjectivity_Class"]).mean()["replies_count"]
trump_avg_replies

Subjectivity_Class
Passive       0.771496
Balanced      1.009215
Subjective    0.554733
Name: replies_count, dtype: float64

In [26]:
# Average Retweets
trump_avg_retweets = trump_sentiment.groupby(["Subjectivity_Class"]).mean()["retweets_count"]
trump_avg_retweets

Subjectivity_Class
Passive       1.397714
Balanced      2.131627
Subjective    0.898395
Name: retweets_count, dtype: float64

In [27]:
# Average Likes
trump_avg_likes = trump_sentiment.groupby(["Subjectivity_Class"]).mean()["likes_count"]
trump_avg_likes

Subjectivity_Class
Passive        8.497793
Balanced      10.751790
Subjective     6.537037
Name: likes_count, dtype: float64

In [28]:
# Average Tweet Length
trump_avg_tweet_length = trump_sentiment.groupby(["Subjectivity_Class"]).mean()["tweet_length"]
trump_avg_tweet_length

Subjectivity_Class
Passive       12.532106
Balanced      23.292737
Subjective    16.267737
Name: tweet_length, dtype: float64

In [29]:
# Average Sentiment
trump_avg_sentiment_score= trump_sentiment.groupby(["Subjectivity_Class"]).mean()["Polarity"]
trump_avg_sentiment_score

Subjectivity_Class
Passive       0.015592
Balanced      0.114384
Subjective    0.001986
Name: Polarity, dtype: float64

In [30]:
# Combine each Series for table
trump_subjectivity_table = pd.DataFrame({
               "Avg Replies": trump_avg_replies,
               "Avg Retweets": trump_avg_retweets,
               "Avg Likes": trump_avg_likes,
               "Avg Tweet Length": trump_avg_tweet_length,
               "Avg Sentiment" : trump_avg_sentiment_score,
               "Total" : trump_sub_groups_count})

trump_subjectivity_table.head()

Unnamed: 0_level_0,Avg Replies,Avg Retweets,Avg Likes,Avg Tweet Length,Avg Sentiment,Total
Subjectivity_Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Passive,0.771496,1.397714,8.497793,12.532106,0.015592,77259
Balanced,1.009215,2.131627,10.75179,23.292737,0.114384,59357
Subjective,0.554733,0.898395,6.537037,16.267737,0.001986,24300


In [31]:
# Format each column
trump_subjectivity_table["Avg Replies"] = trump_subjectivity_table["Avg Replies"].map("{:.1f}".format)

trump_subjectivity_table["Avg Retweets"] = trump_subjectivity_table["Avg Retweets"].map("{:.1f}".format)

trump_subjectivity_table["Avg Likes"] = trump_subjectivity_table["Avg Likes"].map("{:.1f}".format)

trump_subjectivity_table["Avg Tweet Length"] = trump_subjectivity_table["Avg Tweet Length"].map("{:.1f}".format)

trump_subjectivity_table["Avg Sentiment"] = trump_subjectivity_table["Avg Sentiment"].map("{:.3f}".format)


# Display the DataFrame.
trump_subjectivity_table.head()

Unnamed: 0_level_0,Avg Replies,Avg Retweets,Avg Likes,Avg Tweet Length,Avg Sentiment,Total
Subjectivity_Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Passive,0.8,1.4,8.5,12.5,0.016,77259
Balanced,1.0,2.1,10.8,23.3,0.114,59357
Subjective,0.6,0.9,6.5,16.3,0.002,24300


In [32]:
trump_subjectivity_table.to_csv(r'C:\Users\kevin\Documents\myrepo\Final-Project\Resources\Second_debate_trump_subjectivity.csv', index = True, header=True)