## Import Modules, Download Language Library & Read Data

In [27]:
# Module Imports
import pandas as pd
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer as SIA

# Import Sentiment Lexicon 
nltk.downloader.download('vader_lexicon')

# Import data as Pandas data frame
df = pd.read_csv('tweets_list_cleaned.csv')
 
# Candidates of interest 
candidate_names = df['Candidate Name'].unique().tolist()

# Show first 5 data rows

df.head()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/philippeble/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Unnamed: 0,District,Candidate Name,Name on Twitter,UserName,Post_Date,Post Day,Post Month,Post Year,Post_Content,Replies,Retweets,Likes
0,16,Jim Costa,Rep. Jim Costa,@RepJimCosta,03-11-2020,3,11,2020,Make your voice heard - VOTE! #Vote,0,1,5
1,16,Jim Costa,Rep. Jim Costa,@RepJimCosta,02-11-2020,2,11,2020,Fresno County: https://bit.ly/3kQvWFe\nMadera ...,1,0,1
2,16,Jim Costa,Rep. Jim Costa,@RepJimCosta,02-11-2020,2,11,2020,Tomorrow is Election Day! If you haven't alrea...,1,1,4
3,16,Jim Costa,Rep. Jim Costa,@RepJimCosta,02-11-2020,2,11,2020,This #VeteransSmallBusinessWeek we are recogni...,0,1,0
4,16,Jim Costa,Rep. Jim Costa,@RepJimCosta,30-10-2020,30,10,2020,4 more days! #Vote2020,0,0,5


## Derive Polarity Score from Language Library and assign value to each tweet

In [29]:
# Initialise sentiment analyzer 
sia = SIA()

# Add compound sentiment score to each value 
for ind,text in df['Post_Content'].iteritems(): 
    df.loc[ind, 'Content_Sentiment'] = round(float(sia.polarity_scores(text)['compound']),3)

# Show first 5 data rows 
df.head()

Unnamed: 0,District,Candidate Name,Name on Twitter,UserName,Post_Date,Post Day,Post Month,Post Year,Post_Content,Replies,Retweets,Likes,Content_Sentiment
0,16,Jim Costa,Rep. Jim Costa,@RepJimCosta,03-11-2020,3,11,2020,Make your voice heard - VOTE! #Vote,0,1,5,0.0
1,16,Jim Costa,Rep. Jim Costa,@RepJimCosta,02-11-2020,2,11,2020,Fresno County: https://bit.ly/3kQvWFe\nMadera ...,1,0,1,0.0
2,16,Jim Costa,Rep. Jim Costa,@RepJimCosta,02-11-2020,2,11,2020,Tomorrow is Election Day! If you haven't alrea...,1,1,4,-0.455
3,16,Jim Costa,Rep. Jim Costa,@RepJimCosta,02-11-2020,2,11,2020,This #VeteransSmallBusinessWeek we are recogni...,0,1,0,0.382
4,16,Jim Costa,Rep. Jim Costa,@RepJimCosta,30-10-2020,30,10,2020,4 more days! #Vote2020,0,0,5,0.0


## Group & Aggregate Average Social Media Impressions by Candidate

In [24]:
# Create list of columns of interest
columns_oi = ['District', 'Candidate Name','Replies', 'Retweets', 'Likes','Content_Sentiment']

# Groupby candidate name to aggregate average social media activity
df_output = df[columns_oi].groupby(by=["Candidate Name"]).mean().round(3)

# Show aggregation output
df_output

Unnamed: 0_level_0,District,Replies,Retweets,Likes,Content_Sentiment
Candidate Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Ammar Campa-Najjar,50.0,5.657,64.348,169.961,0.237
Anna Eshoo,18.0,5.419,12.343,26.419,0.063
Darrell Issa,50.0,96.277,107.239,350.077,0.213
David Valadao,21.0,4.676,12.73,30.189,0.355
Georgette Gómez,53.0,0.979,6.69,25.009,0.293
Jim Costa,16.0,2.729,8.26,16.031,0.254
John Cox,21.0,3.037,20.403,72.366,0.1
Kevin Cookingham,16.0,13.175,207.381,586.351,0.205
Rishi Kumar,18.0,0.242,0.828,2.923,0.203
Ritesh Tandom,17.0,4.94,41.786,129.167,0.24


In [25]:
# Export data frame to CSV file for analysis
df_output.to_csv('sentiment_analysis.csv')