## Profiling mentions of and comments about candidates

In [5]:
import pandas as pd
import numpy as np
import glob
import re

import emoji
import googletrans
from googletrans import Translator
from watson_developer_cloud import PersonalityInsightsV3 as PersonalityInsights
import json
import nltk
import string
from nltk.stem.wordnet import WordNetLemmatizer

## Profiling Facebook comments

In [None]:
fb_df = pd.read_csv('fb_cleaned_070120.csv')

In [None]:
fb_df.columns

Index(['Unnamed: 0', 'Post_url', 'Post_text', 'Date', 'Shares', 'Reactions',
       'User', 'Comment', 'ID', 'Candidate', 'Shares_Count', 'Reactions_Count',
       'Reaction1', 'Reaction2', 'Reaction3'],
      dtype='object')

In [None]:
fb_df = fb_df[['Candidate', 'Comment']]

In [None]:
fb_df.head()

Unnamed: 0,Candidate,Comment,Platform
0,Bong Go,Salamat Mr. President you are the best and APO...,Facebook
1,Bong Go,Thank you Tatay Digong Love you ♥️,Facebook
2,Bong Go,Salamat tatay digong!You are the best ...god b...,Facebook
3,Bong Go,Thank you tatay digong sa greetings,Facebook
4,Bong Go,Thank you po tatay Digong! 😍 God bless you alw...,Facebook


In [None]:
fb_df.shape

(113621, 3)

In [None]:
fb_df.isnull().sum()

Candidate    0
Comment      0
Platform     0
dtype: int64

In [None]:
fb_df=fb_df[(fb_df['Comment']!='None') & (fb_df['Comment']!='[]')]

In [None]:
fb_df.shape

(107806, 3)

In [None]:
fb_df['Comment'] = fb_df['Comment'].apply(emoji.demojize)

In [None]:
fb_df.head()

Unnamed: 0,Candidate,Comment,Platform
0,Bong Go,Salamat Mr. President you are the best and APO...,Facebook
1,Bong Go,Thank you Tatay Digong Love you :heart_suit:,Facebook
2,Bong Go,Salamat tatay digong!You are the best ...god b...,Facebook
3,Bong Go,Thank you tatay digong sa greetings,Facebook
4,Bong Go,Thank you po tatay Digong! :smiling_face_with_...,Facebook


In [None]:
cndlist=[]
cmtlist=[]
cmtcount=[]

for i in fb_df['Candidate'].unique():
    cndlist.append(i)
    cmtlist.append(' '.join((fb_df[fb_df['Candidate']==i]['Comment']).to_list()))
    cmtcount.append(len(fb_df[fb_df['Candidate']==i]))

fb_df_summary=pd.DataFrame({'Candidate': cndlist, 'Comment': cmtlist, 'Comment Count': cmtcount})

In [None]:
fb_df_summary['Platform']='Facebook'

In [None]:
fb_df_summary.shape

(52, 4)

In [None]:
fb_df_summary['Comment'] = fb_df_summary['Comment'].apply(str).apply(lambda x: x.replace('[','').replace(']','').strip())

In [None]:
translator = Translator()
# fb_df_summary['Translated_text'] = fb_df_summary['Comment'].apply(str).apply(translator.translate).apply(lambda x: x.text)

In [None]:
trans=[]

for i in range(len(fb_df_summary)):
    try:
        trans.append(translator.translate(fb_df_summary['Comment'][i]).text)
        print(str(i) + " ok")
    except:
        trans.append(fb_df_summary['Comment'][i])
        print(str(i) + " can't translate")

0 ok
1 ok
2 ok
3 ok
4 ok
5 ok
6 can't translate
7 ok
8 ok
9 ok
10 ok
11 ok
12 ok
13 can't translate
14 ok
15 can't translate
16 ok
17 ok
18 ok
19 ok
20 can't translate
21 ok
22 can't translate
23 ok
24 ok
25 ok
26 ok
27 ok
28 ok
29 ok
30 ok
31 ok
32 ok
33 ok
34 ok
35 ok
36 ok
37 ok
38 ok
39 can't translate
40 ok
41 ok
42 ok
43 ok
44 ok
45 ok
46 ok
47 ok
48 ok
49 ok
50 ok
51 ok


In [None]:
fb_df_summary['Translated_text'] = trans

In [None]:
fb_df_summary.head()

Unnamed: 0,Candidate,Comment,Comment Count,Platform,Translated_text
0,Bong Go,Salamat Mr. President you are the best and APO...,1929,Facebook,Salamat Mr. President you are the best and APO...
1,Charlie Gaddi,'Pagpapalain nawA po kayo!...' tanong ko lang ...,3,Facebook,'Blessed be you! ...' I only asked if you were...
2,Bernard Austria,Good luck Kuya Berns... :smiling_face: Godbles...,8,Facebook,Good luck Kuya Berns ...: smiling_face: Godble...
3,Abner Afuang,I vote for you po True :thumbs_up::thumbs_up::...,404,Facebook,I vote for you po True :thumbs_up::thumbs_up::...
4,Raffy Alunan,Vote ko PO kau Vote alunan I voted for you mr ...,4607,Facebook,Vote ko PO kau Vote alunan I voted for you mr ...


In [None]:
stopwords = set(nltk.corpus.stopwords.words('english'))
punctuation = string.punctuation
lemmatizer = WordNetLemmatizer()


def text_cleaner(row):
    words=[]
    for sent in nltk.sent_tokenize(row):
        for word in nltk.wordpunct_tokenize(sent):
            word = word.lower()
            word = lemmatizer.lemmatize(word)
            if word not in stopwords and word not in punctuation:
                words.append(word)
    word_count=len(words)
    clean_text=' '.join(words)
    return word_count, clean_text

In [None]:
fb_df_summary['word_count'] = fb_df_summary['Translated_text'].apply(text_cleaner).apply(lambda x: x[0])
fb_df_summary['clean_text'] = fb_df_summary['Translated_text'].apply(text_cleaner).apply(lambda x: x[1])

In [None]:
fb_df_summary.head()

Unnamed: 0,Candidate,Comment,Comment Count,Platform,Translated_text,word_count,clean_text
0,Bong Go,Salamat Mr. President you are the best and APO...,1929,Facebook,Salamat Mr. President you are the best and APO...,23829,salamat mr president best apo marcos !!!!!! go...
1,Charlie Gaddi,'Pagpapalain nawA po kayo!...' tanong ko lang ...,3,Facebook,'Blessed be you! ...' I only asked if you were...,15,blessed ...' asked graduate manuel dela fuente...
2,Bernard Austria,Good luck Kuya Berns... :smiling_face: Godbles...,8,Facebook,Good luck Kuya Berns ...: smiling_face: Godble...,75,good luck kuya bern ...: smiling_face godbless...
3,Abner Afuang,I vote for you po True :thumbs_up::thumbs_up::...,404,Facebook,I vote for you po True :thumbs_up::thumbs_up::...,4786,vote po true thumbs_up :: thumbs_up :: thumbs_...
4,Raffy Alunan,Vote ko PO kau Vote alunan I voted for you mr ...,4607,Facebook,Vote ko PO kau Vote alunan I voted for you mr ...,44964,vote ko po kau vote alunan voted mr senator re...


In [None]:
fb_profile = fb_df_summary.copy()

In [None]:
fb_profile = fb_profile[fb_profile['word_count']>=100]

In [None]:
#copy Personality Insights API credentials here
# url=''
# apikey = ''

personality_insights = PersonalityInsights(url=url, iam_apikey=apikey, version='2017-10-13') 

  This is separate from the ipykernel package so we can avoid doing imports until


In [None]:
fb_profile['pia']= fb_profile['clean_text'].apply(lambda x: personality_insights.profile(x, content_type='text/plain',raw_scores=True).get_result())

In [None]:
fb_profiles_df = pd.DataFrame([])

for i in fb_profile.index:
    row=fb_profile['pia'][i]
    cand = fb_profile['Candidate'][i]
    needs = pd.DataFrame([{need['name'] : need['percentile'] for need in row['needs']}], index = [cand])
    val = pd.DataFrame([{val['name'] : val['percentile'] for val in row['values']}], index = [cand])
    ocean = pd.DataFrame([{big5['name'] : big5['percentile'] for big5 in row['personality']}], index = [cand])

    traits={}

    for big5 in row['personality']:
        for child in big5['children']:
            traits[str(child['name'])] = child['percentile']

    traits = pd.DataFrame([traits], index = [cand])
    temp = needs.join([val,ocean,traits])
    fb_profiles_df=pd.concat([fb_profiles_df,temp])

In [None]:
fb_profiles_df

Unnamed: 0,Challenge,Closeness,Curiosity,Excitement,Harmony,Ideal,Liberty,Love,Practicality,Self-expression,...,Modesty,Uncompromising,Sympathy,Trust,Fiery,Prone to worry,Melancholy,Immoderation,Self-consciousness,Susceptible to stress
Bong Go,0.277451,0.588992,0.954011,0.444973,0.595876,0.36136,0.777409,0.526197,0.687256,0.958058,...,0.553728,0.310322,0.564614,0.505812,0.585779,0.739281,0.799756,0.057815,0.81845,0.768938
Abner Afuang,0.294631,0.416068,0.891295,0.395108,0.374589,0.312939,0.576307,0.438414,0.580854,0.862449,...,0.563389,0.333343,0.601964,0.446972,0.602583,0.754094,0.81438,0.237014,0.784075,0.767739
Raffy Alunan,0.274095,0.586411,0.942743,0.415045,0.603707,0.351845,0.627322,0.482268,0.682361,0.931878,...,0.53318,0.304513,0.568428,0.450578,0.622159,0.76207,0.803698,0.039118,0.848068,0.783182
Pilo Hilbay,0.296092,0.47593,0.958855,0.41032,0.470092,0.32717,0.672215,0.46542,0.732118,0.927585,...,0.537539,0.315089,0.590272,0.38747,0.624231,0.780787,0.808733,0.124088,0.817082,0.780377
Samira Gutoc,0.285378,0.635685,0.972334,0.450044,0.684467,0.393544,0.728918,0.528295,0.73508,0.980039,...,0.521523,0.290326,0.558908,0.419569,0.680196,0.832126,0.809266,0.026986,0.882307,0.835397
Emily Mallillin,0.051203,0.270341,0.245855,0.033637,0.424954,0.050053,0.052246,0.275428,0.057899,0.164593,...,0.255147,0.884407,0.987398,0.34777,0.13265,0.180373,0.142592,0.000488,0.06648,0.073596
Koko Pimentel,0.213714,0.819907,0.92432,0.44027,0.860908,0.435964,0.697677,0.65601,0.327741,0.908808,...,0.511907,0.289142,0.531217,0.609558,0.609567,0.748269,0.766518,0.005182,0.823522,0.781976
Bato Dela Rosa,0.301358,0.506347,0.946465,0.419177,0.493914,0.336597,0.685414,0.477358,0.734943,0.957264,...,0.550682,0.314235,0.58092,0.39744,0.598897,0.738031,0.80631,0.132318,0.823439,0.779657
Bam Aquino,0.30471,0.654385,0.953409,0.458886,0.691576,0.395503,0.735388,0.54762,0.803512,0.975308,...,0.503183,0.290804,0.548273,0.42092,0.621974,0.762617,0.792134,0.024002,0.848455,0.767452
Bong Revilla,0.262162,0.666673,0.904813,0.455176,0.654855,0.400731,0.786061,0.578522,0.526328,0.987403,...,0.595576,0.324176,0.563203,0.545364,0.596747,0.745851,0.811243,0.067086,0.817881,0.790598


In [None]:
fb_pia_scores = fb_profiles_df.reset_index()
fb_pia_data = fb_profile.merge(fb_pia_scores, left_on='Candidate', right_on='index').drop(columns='index')
fb_pia_data.to_csv('FB_Audience_PersonalityScores_Percentiles.csv')

In [None]:
fb_profiles_raw=pd.DataFrame([])

for i in fb_profile.index:
    row=fb_profile['pia'][i]
    cand = fb_profile['Candidate'][i]
    needs = pd.DataFrame([{need['name'] : need['raw_score'] for need in row['needs']}], index = [cand])
    val = pd.DataFrame([{val['name'] : val['raw_score'] for val in row['values']}], index = [cand])
    ocean = pd.DataFrame([{big5['name'] : big5['raw_score'] for big5 in row['personality']}], index = [cand])

    traits={}

    for big5 in row['personality']:
        for child in big5['children']:
            traits[str(child['name'])] = child['raw_score']

    traits = pd.DataFrame([traits], index = [cand])
    temp = needs.join([val,ocean,traits])
    fb_profiles_raw=pd.concat([fb_profiles_raw,temp])

In [None]:
fb_pia_rawscores = fb_profiles_raw.reset_index()
fb_pia_rawdata = fb_profile.merge(fb_pia_rawscores, left_on='Candidate', right_on='index').drop(columns='index')
fb_pia_rawdata.to_csv('FB_Audience_PersonalityScores_Raw.csv')

## Profiling Twitter mentions

In [13]:
twit_df = pd.read_csv('twitter_cleaned.csv')

In [14]:
twit_df.shape

(259029, 8)

In [15]:
twit_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 259029 entries, 0 to 259028
Data columns (total 8 columns):
candidate_num    259029 non-null int64
text             259029 non-null object
date             259029 non-null object
favorites        259029 non-null int64
retweets         259029 non-null int64
replies          259029 non-null int64
hashtags         53444 non-null object
mentions         50610 non-null object
dtypes: int64(4), object(4)
memory usage: 15.8+ MB


In [16]:
twit_df.head()

Unnamed: 0,candidate_num,text,date,favorites,retweets,replies,hashtags,mentions
0,1,Here is the certified list of senatorial candi...,2019-02-12 12:45:05+08:00,0,0,0,,
1,1,Who is Vangie Abejo 😭😭😭,2019-03-06 17:35:05+08:00,0,0,0,,
2,1,VANGIE ABEJO Independent Vangie Abejo served a...,2019-03-16 23:11:42+08:00,2,0,1,,
3,1,"Abejo, Vangie",2019-03-27 18:54:08+08:00,0,0,0,,
4,1,"Inclined not to vote for: ARIAS, MARCELINO CAC...",2019-03-30 23:54:06+08:00,0,0,1,,


In [17]:
twit_df.isnull().sum()

candidate_num         0
text                  0
date                  0
favorites             0
retweets              0
replies               0
hashtags         205585
mentions         208419
dtype: int64

In [None]:
twit_df = twit_df[['candidate_num', 'text']]

In [None]:
display(twit_df[twit_df['text']=='None'])
display(twit_df[twit_df['text']==''])
display(twit_df[twit_df['text']=='[]'])

Unnamed: 0,candidate_num,text


Unnamed: 0,candidate_num,text


Unnamed: 0,candidate_num,text


In [None]:
twit_df['text'] = twit_df['text'].apply(emoji.demojize)

In [None]:
cndnlist=[]
twtlist=[]
twtcount=[]

for i in twit_df['candidate_num'].unique():
    cndnlist.append(i)
    twtlist.append(' '.join((twit_df[twit_df['candidate_num']==i]['text']).to_list()))
    twtcount.append(len(twit_df[twit_df['candidate_num']==i]))

twit_df_summary=pd.DataFrame({'Candidate_num': cndnlist, 'Text': twtlist, 'Post Count': twtcount})
twit_df_summary['Platform']='Twitter'

In [None]:
twit_df_summary.shape

(62, 4)

In [None]:
twit_df_summary['Text'] = twit_df_summary['Text'].apply(str).apply(lambda x: x.replace('[','').replace(']','').strip())
# twit_df_summary['Translated_text'] = twit_df_summary['Text'].apply(str).apply(translator.translate).apply(lambda x: x.text)

In [None]:
trans=[]

for i in range(len(twit_df_summary)):
    try:
        trans.append(translator.translate(twit_df_summary['Text'][i]).text)
        print(str(i) + " ok")
    except:
        trans.append(twit_df_summary['Text'][i])
        print(str(i) + " can't translate")

0 ok
1 ok
2 ok
3 ok
4 can't translate
5 ok
6 ok
7 can't translate
8 can't translate
9 ok
10 ok
11 ok
12 ok
13 ok
14 ok
15 can't translate
16 ok
17 ok
18 ok
19 ok
20 ok
21 can't translate
22 ok
23 can't translate
24 can't translate
25 can't translate
26 ok
27 ok
28 ok
29 ok
30 ok
31 can't translate
32 ok
33 can't translate
34 ok
35 can't translate
36 ok
37 ok
38 ok
39 can't translate
40 ok
41 ok
42 ok
43 ok
44 can't translate
45 can't translate
46 ok
47 ok
48 ok
49 ok
50 ok
51 ok
52 ok
53 can't translate
54 can't translate
55 ok
56 can't translate
57 ok
58 can't translate
59 can't translate
60 ok
61 can't translate


In [None]:
twit_df_summary['Translated_text']=trans

In [None]:
twit_df_summary['word_count'] = twit_df_summary['Translated_text'].apply(text_cleaner).apply(lambda x: x[0])
twit_df_summary['clean_text'] = twit_df_summary['Translated_text'].apply(text_cleaner).apply(lambda x: x[1])

twit_profile = twit_df_summary.copy()
twit_profile = twit_profile[twit_profile['word_count']>=100]

In [None]:
twit_profile['pia']= twit_profile['clean_text'].apply(lambda x: personality_insights.profile(x, content_type='text/plain',raw_scores=True).get_result())

In [None]:
twit_profiles_df = pd.DataFrame([])

for i in twit_profile.index:
    row=twit_profile['pia'][i]
    cand = twit_profile['Candidate_num'][i]
    needs = pd.DataFrame([{need['name'] : need['percentile'] for need in row['needs']}], index = [cand])
    val = pd.DataFrame([{val['name'] : val['percentile'] for val in row['values']}], index = [cand])
    ocean = pd.DataFrame([{big5['name'] : big5['percentile'] for big5 in row['personality']}], index = [cand])

    traits={}

    for big5 in row['personality']:
        for child in big5['children']:
            traits[str(child['name'])] = child['percentile']

    traits = pd.DataFrame([traits], index = [cand])
    temp = needs.join([val,ocean,traits])
    twit_profiles_df=pd.concat([twit_profiles_df,temp])

twit_pia_scores = twit_profiles_df.reset_index()
twit_pia_data = twit_profile.merge(twit_pia_scores, left_on='Candidate_num', right_on='index').drop(columns='index')
twit_pia_data.to_csv('Twitter_Audience_PersonalityScores_Percentiles.csv')

In [None]:
twit_profiles_raw=pd.DataFrame([])

for i in twit_profile.index:
    row = twit_profile['pia'][i]
    cand = twit_profile['Candidate_num'][i]
    needs = pd.DataFrame([{need['name'] : need['raw_score'] for need in row['needs']}], index = [cand])
    val = pd.DataFrame([{val['name'] : val['raw_score'] for val in row['values']}], index = [cand])
    ocean = pd.DataFrame([{big5['name'] : big5['raw_score'] for big5 in row['personality']}], index = [cand])

    traits={}

    for big5 in row['personality']:
        for child in big5['children']:
            traits[str(child['name'])] = child['raw_score']

    traits = pd.DataFrame([traits], index = [cand])
    temp = needs.join([val,ocean,traits])
    twit_profiles_raw=pd.concat([twit_profiles_raw,temp])

twit_pia_rawscores = twit_profiles_raw.reset_index()
twit_pia_rawdata = twit_profile.merge(twit_pia_rawscores, left_on='Candidate_num', right_on='index').drop(columns='index')
twit_pia_rawdata.to_csv('Twitter_Audience_PersonalityScores_Raw.csv')

## Combining audience scores from Facebook and Twitter

If using percentiles, opt for scores from Twitter audience. Has data for audience of all personalities

In [None]:
candidates=pd.read_csv('agg_candidate.csv')[['#', 'candidate']]

In [None]:
candidates.head()

Unnamed: 0,#,candidate
0,1,Vangie Abejo
1,2,Abner Afuang
2,3,Freddie Aguilar
3,4,Shariff Albani
4,5,Gary Alejano


In [None]:
twit_aud = twit_pia_data.merge(candidates, left_on='Candidate_num', right_on='#')

In [None]:
twit_aud.head()

Unnamed: 0,Candidate_num,Text,Post Count,Platform,Translated_text,word_count,clean_text,pia,Challenge,Closeness,...,Sympathy,Trust,Fiery,Prone to worry,Melancholy,Immoderation,Self-consciousness,Susceptible to stress,#,candidate
0,1,Here is the certified list of senatorial candi...,13,Twitter,Here is the certified list of senatorial candi...,332,certified list senatorial candidate may 2019 p...,"{'word_count': 298, 'word_count_message': 'The...",0.040886,0.502701,...,0.811167,0.209482,0.977056,0.991719,0.949507,0.019149,0.978811,0.945928,1,Vangie Abejo
1,2,Here is the certified list of senatorial candi...,199,Twitter,Here is the certified list of senatorial candi...,3290,certified list senatorial candidate may 2019 p...,"{'word_count': 3268, 'processed_language': 'en...",0.2252,0.753283,...,0.461449,0.238956,0.873692,0.946612,0.834778,0.001465,0.974206,0.846837,2,Abner Afuang
2,3,"may kulang, sina Freddie Aguilar at Lito Lapid...",1701,Twitter,"may kulang, sina Freddie Aguilar at Lito Lapid...",28520,may kulang sina freddie aguilar lito lapid hug...,"{'word_count': 28661, 'processed_language': 'e...",0.182894,0.673239,...,0.524033,0.216106,0.787384,0.877754,0.846714,0.041326,0.934077,0.860202,3,Freddie Aguilar
3,4,Here is the certified list of senatorial candi...,119,Twitter,Here is the certified list of senatorial candi...,2929,certified list senatorial candidate may 2019 p...,"{'word_count': 2759, 'processed_language': 'en...",0.275165,0.740147,...,0.489494,0.426106,0.711497,0.9069,0.800035,0.001778,0.958155,0.822721,4,Shariff Albani
4,5,"If you think that’s bad, you should see Ilocos...",10837,Twitter,"If you think that’s bad, you should see Ilocos...",184785,think ’ bad see ilocos dito sa sipalay pinatan...,"{'word_count': 38406, 'processed_language': 'e...",0.287582,0.632948,...,0.545506,0.35675,0.672221,0.826341,0.80116,0.023004,0.883764,0.805235,5,Gary Alejano


In [None]:
twit_aud.columns

Index(['Candidate_num', 'Text', 'Post Count', 'Platform', 'Translated_text',
       'word_count', 'clean_text', 'pia', 'Challenge', 'Closeness',
       'Curiosity', 'Excitement', 'Harmony', 'Ideal', 'Liberty', 'Love',
       'Practicality', 'Self-expression', 'Stability', 'Structure',
       'Conservation', 'Openness to change', 'Hedonism', 'Self-enhancement',
       'Self-transcendence', 'Openness', 'Conscientiousness', 'Extraversion',
       'Agreeableness', 'Emotional range', 'Adventurousness',
       'Artistic interests', 'Emotionality', 'Imagination', 'Intellect',
       'Authority-challenging', 'Achievement striving', 'Cautiousness',
       'Dutifulness', 'Orderliness', 'Self-discipline', 'Self-efficacy',
       'Activity level', 'Assertiveness', 'Cheerfulness', 'Excitement-seeking',
       'Outgoing', 'Gregariousness', 'Altruism', 'Cooperation', 'Modesty',
       'Uncompromising', 'Sympathy', 'Trust', 'Fiery', 'Prone to worry',
       'Melancholy', 'Immoderation', 'Self-consciou

In [None]:
twit_aud = twit_aud[['candidate','Post Count', 'Platform', 'Challenge', 'Closeness',
       'Curiosity', 'Excitement', 'Harmony', 'Ideal', 'Liberty', 'Love',
       'Practicality', 'Self-expression', 'Stability', 'Structure',
       'Conservation', 'Openness to change', 'Hedonism', 'Self-enhancement',
       'Self-transcendence', 'Openness', 'Conscientiousness', 'Extraversion',
       'Agreeableness', 'Emotional range', 'Adventurousness',
       'Artistic interests', 'Emotionality', 'Imagination', 'Intellect',
       'Authority-challenging', 'Achievement striving', 'Cautiousness',
       'Dutifulness', 'Orderliness', 'Self-discipline', 'Self-efficacy',
       'Activity level', 'Assertiveness', 'Cheerfulness', 'Excitement-seeking',
       'Outgoing', 'Gregariousness', 'Altruism', 'Cooperation', 'Modesty',
       'Uncompromising', 'Sympathy', 'Trust', 'Fiery', 'Prone to worry',
       'Melancholy', 'Immoderation', 'Self-consciousness',
       'Susceptible to stress']].rename(columns={'candidate':'Candidate','Post Count':'Count'})

In [None]:
fb_aud = fb_pia_data[['Candidate', 'Comment Count', 'Platform', 'Challenge', 'Closeness',
       'Curiosity', 'Excitement', 'Harmony', 'Ideal', 'Liberty', 'Love',
       'Practicality', 'Self-expression', 'Stability', 'Structure',
       'Conservation', 'Openness to change', 'Hedonism', 'Self-enhancement',
       'Self-transcendence', 'Openness', 'Conscientiousness', 'Extraversion',
       'Agreeableness', 'Emotional range', 'Adventurousness',
       'Artistic interests', 'Emotionality', 'Imagination', 'Intellect',
       'Authority-challenging', 'Achievement striving', 'Cautiousness',
       'Dutifulness', 'Orderliness', 'Self-discipline', 'Self-efficacy',
       'Activity level', 'Assertiveness', 'Cheerfulness', 'Excitement-seeking',
       'Outgoing', 'Gregariousness', 'Altruism', 'Cooperation', 'Modesty',
       'Uncompromising', 'Sympathy', 'Trust', 'Fiery', 'Prone to worry',
       'Melancholy', 'Immoderation', 'Self-consciousness',
       'Susceptible to stress']].rename(columns={'Comment Count':'Count'})

In [None]:
aud = twit_aud.merge(fb_aud, how='left', on='Candidate')
aud.head()

Unnamed: 0,Candidate,Count_x,Platform_x,Challenge_x,Closeness_x,Curiosity_x,Excitement_x,Harmony_x,Ideal_x,Liberty_x,...,Modesty_y,Uncompromising_y,Sympathy_y,Trust_y,Fiery_y,Prone to worry_y,Melancholy_y,Immoderation_y,Self-consciousness_y,Susceptible to stress_y
0,Vangie Abejo,13,Twitter,0.040886,0.502701,0.444015,0.284566,0.796434,0.47786,0.183793,...,,,,,,,,,,
1,Abner Afuang,199,Twitter,0.2252,0.753283,0.90773,0.403599,0.920531,0.49291,0.472906,...,0.563389,0.333343,0.601964,0.446972,0.602583,0.754094,0.81438,0.237014,0.784075,0.767739
2,Freddie Aguilar,1701,Twitter,0.182894,0.673239,0.850364,0.420639,0.734997,0.376927,0.584469,...,,,,,,,,,,
3,Shariff Albani,119,Twitter,0.275165,0.740147,0.956393,0.438261,0.853061,0.518954,0.531703,...,0.185674,0.579268,0.87725,0.701384,0.491036,0.488637,0.406366,0.000155,0.369092,0.372654
4,Gary Alejano,10837,Twitter,0.287582,0.632948,0.966161,0.440037,0.694518,0.36545,0.680693,...,0.538558,0.314489,0.584003,0.434789,0.614552,0.770177,0.806279,0.081753,0.821161,0.773933


In [None]:
aud.shape

(62, 109)

In [None]:
scores = ['Challenge', 'Closeness',
       'Curiosity', 'Excitement', 'Harmony', 'Ideal', 'Liberty', 'Love',
       'Practicality', 'Self-expression', 'Stability', 'Structure',
       'Conservation', 'Openness to change', 'Hedonism', 'Self-enhancement',
       'Self-transcendence', 'Openness', 'Conscientiousness', 'Extraversion',
       'Agreeableness', 'Emotional range', 'Adventurousness',
       'Artistic interests', 'Emotionality', 'Imagination', 'Intellect',
       'Authority-challenging', 'Achievement striving', 'Cautiousness',
       'Dutifulness', 'Orderliness', 'Self-discipline', 'Self-efficacy',
       'Activity level', 'Assertiveness', 'Cheerfulness', 'Excitement-seeking',
       'Outgoing', 'Gregariousness', 'Altruism', 'Cooperation', 'Modesty',
       'Uncompromising', 'Sympathy', 'Trust', 'Fiery', 'Prone to worry',
       'Melancholy', 'Immoderation', 'Self-consciousness',
       'Susceptible to stress']

In [None]:
aud = aud.fillna(0).replace(np.nan, 0)
for col in scores:
    aud[col] = ((aud[col+'_x']*aud['Count_x']) + (aud[col+'_y']*aud['Count_y']))/(aud['Count_x']+aud['Count_y'])

In [None]:
scores.extend(['Candidate', 'Count_x', 'Count_y'])
aud_data = aud[scores]

In [None]:
aud_data.columns

Index(['Challenge', 'Closeness', 'Curiosity', 'Excitement', 'Harmony', 'Ideal',
       'Liberty', 'Love', 'Practicality', 'Self-expression', 'Stability',
       'Structure', 'Conservation', 'Openness to change', 'Hedonism',
       'Self-enhancement', 'Self-transcendence', 'Openness',
       'Conscientiousness', 'Extraversion', 'Agreeableness', 'Emotional range',
       'Adventurousness', 'Artistic interests', 'Emotionality', 'Imagination',
       'Intellect', 'Authority-challenging', 'Achievement striving',
       'Cautiousness', 'Dutifulness', 'Orderliness', 'Self-discipline',
       'Self-efficacy', 'Activity level', 'Assertiveness', 'Cheerfulness',
       'Excitement-seeking', 'Outgoing', 'Gregariousness', 'Altruism',
       'Cooperation', 'Modesty', 'Uncompromising', 'Sympathy', 'Trust',
       'Fiery', 'Prone to worry', 'Melancholy', 'Immoderation',
       'Self-consciousness', 'Susceptible to stress', 'Candidate', 'Count_x',
       'Count_y'],
      dtype='object')

In [None]:
aud_data=aud_data[['Candidate', 'Count_x','Count_y', 'Challenge', 'Closeness', 
        'Curiosity', 'Excitement', 'Harmony', 'Ideal',
       'Liberty', 'Love', 'Practicality', 'Self-expression', 'Stability',
       'Structure', 'Conservation', 'Openness to change', 'Hedonism',
       'Self-enhancement', 'Self-transcendence', 'Openness',
       'Conscientiousness', 'Extraversion', 'Agreeableness', 'Emotional range',
       'Adventurousness', 'Artistic interests', 'Emotionality', 'Imagination',
       'Intellect', 'Authority-challenging', 'Achievement striving',
       'Cautiousness', 'Dutifulness', 'Orderliness', 'Self-discipline',
       'Self-efficacy', 'Activity level', 'Assertiveness', 'Cheerfulness',
       'Excitement-seeking', 'Outgoing', 'Gregariousness', 'Altruism',
       'Cooperation', 'Modesty', 'Uncompromising', 'Sympathy', 'Trust',
       'Fiery', 'Prone to worry', 'Melancholy', 'Immoderation',
       'Self-consciousness', 'Susceptible to stress']].rename(columns={'Count_x': 'Twitter_count', 'Count_y': 'Facebook_count'})

In [None]:
aud_data.to_csv('AudienceScores_Percentiles.csv')

If using raw scores, weigh average of FB and Twitter raw scores. Twitter has data for audience of all personalities, FB misses some personalities

In [None]:
twit_aud_raw = twit_pia_rawdata.merge(candidates, left_on='Candidate_num', right_on='#')

In [None]:
twit_aud_raw = twit_aud_raw[['candidate','Post Count', 'Platform', 'Challenge', 'Closeness',
       'Curiosity', 'Excitement', 'Harmony', 'Ideal', 'Liberty', 'Love',
       'Practicality', 'Self-expression', 'Stability', 'Structure',
       'Conservation', 'Openness to change', 'Hedonism', 'Self-enhancement',
       'Self-transcendence', 'Openness', 'Conscientiousness', 'Extraversion',
       'Agreeableness', 'Emotional range', 'Adventurousness',
       'Artistic interests', 'Emotionality', 'Imagination', 'Intellect',
       'Authority-challenging', 'Achievement striving', 'Cautiousness',
       'Dutifulness', 'Orderliness', 'Self-discipline', 'Self-efficacy',
       'Activity level', 'Assertiveness', 'Cheerfulness', 'Excitement-seeking',
       'Outgoing', 'Gregariousness', 'Altruism', 'Cooperation', 'Modesty',
       'Uncompromising', 'Sympathy', 'Trust', 'Fiery', 'Prone to worry',
       'Melancholy', 'Immoderation', 'Self-consciousness',
       'Susceptible to stress']].rename(columns={'candidate':'Candidate','Post Count':'Count'})

In [None]:
fb_pia_rawdata.columns

Index(['Candidate', 'Comment', 'Comment Count', 'Platform', 'Translated_text',
       'word_count', 'clean_text', 'pia', 'Challenge', 'Closeness',
       'Curiosity', 'Excitement', 'Harmony', 'Ideal', 'Liberty', 'Love',
       'Practicality', 'Self-expression', 'Stability', 'Structure',
       'Conservation', 'Openness to change', 'Hedonism', 'Self-enhancement',
       'Self-transcendence', 'Openness', 'Conscientiousness', 'Extraversion',
       'Agreeableness', 'Emotional range', 'Adventurousness',
       'Artistic interests', 'Emotionality', 'Imagination', 'Intellect',
       'Authority-challenging', 'Achievement striving', 'Cautiousness',
       'Dutifulness', 'Orderliness', 'Self-discipline', 'Self-efficacy',
       'Activity level', 'Assertiveness', 'Cheerfulness', 'Excitement-seeking',
       'Outgoing', 'Gregariousness', 'Altruism', 'Cooperation', 'Modesty',
       'Uncompromising', 'Sympathy', 'Trust', 'Fiery', 'Prone to worry',
       'Melancholy', 'Immoderation', 'Self-consci

In [None]:
fb_aud_raw = fb_pia_rawdata[['Candidate', 'Comment Count', 'Platform', 'Challenge', 'Closeness',
       'Curiosity', 'Excitement', 'Harmony', 'Ideal', 'Liberty', 'Love',
       'Practicality', 'Self-expression', 'Stability', 'Structure',
       'Conservation', 'Openness to change', 'Hedonism', 'Self-enhancement',
       'Self-transcendence', 'Openness', 'Conscientiousness', 'Extraversion',
       'Agreeableness', 'Emotional range', 'Adventurousness',
       'Artistic interests', 'Emotionality', 'Imagination', 'Intellect',
       'Authority-challenging', 'Achievement striving', 'Cautiousness',
       'Dutifulness', 'Orderliness', 'Self-discipline', 'Self-efficacy',
       'Activity level', 'Assertiveness', 'Cheerfulness', 'Excitement-seeking',
       'Outgoing', 'Gregariousness', 'Altruism', 'Cooperation', 'Modesty',
       'Uncompromising', 'Sympathy', 'Trust', 'Fiery', 'Prone to worry',
       'Melancholy', 'Immoderation', 'Self-consciousness',
       'Susceptible to stress']].rename(columns={'Comment Count':'Count'})

In [None]:
fb_aud_raw.columns==twit_aud_raw.columns

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True])

In [None]:
scores = ['Challenge', 'Closeness',
       'Curiosity', 'Excitement', 'Harmony', 'Ideal', 'Liberty', 'Love',
       'Practicality', 'Self-expression', 'Stability', 'Structure',
       'Conservation', 'Openness to change', 'Hedonism', 'Self-enhancement',
       'Self-transcendence', 'Openness', 'Conscientiousness', 'Extraversion',
       'Agreeableness', 'Emotional range', 'Adventurousness',
       'Artistic interests', 'Emotionality', 'Imagination', 'Intellect',
       'Authority-challenging', 'Achievement striving', 'Cautiousness',
       'Dutifulness', 'Orderliness', 'Self-discipline', 'Self-efficacy',
       'Activity level', 'Assertiveness', 'Cheerfulness', 'Excitement-seeking',
       'Outgoing', 'Gregariousness', 'Altruism', 'Cooperation', 'Modesty',
       'Uncompromising', 'Sympathy', 'Trust', 'Fiery', 'Prone to worry',
       'Melancholy', 'Immoderation', 'Self-consciousness',
       'Susceptible to stress']

In [None]:
aud_raw = twit_aud_raw.merge(fb_aud_raw, how='left', on='Candidate')
aud_raw.head()

Unnamed: 0,Candidate,Count_x,Platform_x,Challenge_x,Closeness_x,Curiosity_x,Excitement_x,Harmony_x,Ideal_x,Liberty_x,...,Modesty_y,Uncompromising_y,Sympathy_y,Trust_y,Fiery_y,Prone to worry_y,Melancholy_y,Immoderation_y,Self-consciousness_y,Susceptible to stress_y
0,Vangie Abejo,13,Twitter,0.653725,0.793661,0.812945,0.63998,0.837881,0.688416,0.702758,...,,,,,,,,,,
1,Abner Afuang,199,Twitter,0.697935,0.823759,0.851573,0.665819,0.857899,0.69021,0.733965,...,0.456795,0.605489,0.673346,0.58031,0.549936,0.640993,0.505073,0.473075,0.588075,0.514222
2,Freddie Aguilar,1701,Twitter,0.691224,0.813285,0.843969,0.669297,0.830942,0.676153,0.744504,...,,,,,,,,,,
3,Shariff Albani,119,Twitter,0.704998,0.821937,0.861663,0.672862,0.845502,0.693313,0.739492,...,0.401205,0.635249,0.707824,0.60757,0.534487,0.592169,0.433434,0.361965,0.529126,0.446926
4,Gary Alejano,10837,Twitter,0.706648,0.808438,0.864739,0.67322,0.826823,0.67471,0.754105,...,0.453484,0.603013,0.671579,0.579037,0.55164,0.644539,0.50318,0.447038,0.595114,0.515524


In [None]:
aud_raw.columns 

Index(['Candidate', 'Count_x', 'Platform_x', 'Challenge_x', 'Closeness_x',
       'Curiosity_x', 'Excitement_x', 'Harmony_x', 'Ideal_x', 'Liberty_x',
       ...
       'Modesty_y', 'Uncompromising_y', 'Sympathy_y', 'Trust_y', 'Fiery_y',
       'Prone to worry_y', 'Melancholy_y', 'Immoderation_y',
       'Self-consciousness_y', 'Susceptible to stress_y'],
      dtype='object', length=109)

In [None]:
aud_raw = aud_raw.fillna(0).replace(np.nan, 0)

In [None]:
aud_raw.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 62 entries, 0 to 61
Columns: 161 entries, Candidate to Susceptible to stress
dtypes: float64(157), int64(1), object(3)
memory usage: 78.5+ KB


In [None]:
for col in scores:
    aud_raw[col] = ((aud_raw[col+'_x']*aud_raw['Count_x']) + (aud_raw[col+'_y']*aud_raw['Count_y']))/(aud_raw['Count_x']+aud_raw['Count_y'])

In [None]:
aud_raw.shape

(62, 161)

In [None]:
scores.extend(['Candidate', 'Count_x', 'Count_y'])
scores

['Challenge',
 'Closeness',
 'Curiosity',
 'Excitement',
 'Harmony',
 'Ideal',
 'Liberty',
 'Love',
 'Practicality',
 'Self-expression',
 'Stability',
 'Structure',
 'Conservation',
 'Openness to change',
 'Hedonism',
 'Self-enhancement',
 'Self-transcendence',
 'Openness',
 'Conscientiousness',
 'Extraversion',
 'Agreeableness',
 'Emotional range',
 'Adventurousness',
 'Artistic interests',
 'Emotionality',
 'Imagination',
 'Intellect',
 'Authority-challenging',
 'Achievement striving',
 'Cautiousness',
 'Dutifulness',
 'Orderliness',
 'Self-discipline',
 'Self-efficacy',
 'Activity level',
 'Assertiveness',
 'Cheerfulness',
 'Excitement-seeking',
 'Outgoing',
 'Gregariousness',
 'Altruism',
 'Cooperation',
 'Modesty',
 'Uncompromising',
 'Sympathy',
 'Trust',
 'Fiery',
 'Prone to worry',
 'Melancholy',
 'Immoderation',
 'Self-consciousness',
 'Susceptible to stress',
 'Candidate',
 'Count_x',
 'Count_y']

In [None]:
aud_raw_data = aud_raw[scores]

In [None]:
aud_raw_data.columns

Index(['Challenge', 'Closeness', 'Curiosity', 'Excitement', 'Harmony', 'Ideal',
       'Liberty', 'Love', 'Practicality', 'Self-expression', 'Stability',
       'Structure', 'Conservation', 'Openness to change', 'Hedonism',
       'Self-enhancement', 'Self-transcendence', 'Openness',
       'Conscientiousness', 'Extraversion', 'Agreeableness', 'Emotional range',
       'Adventurousness', 'Artistic interests', 'Emotionality', 'Imagination',
       'Intellect', 'Authority-challenging', 'Achievement striving',
       'Cautiousness', 'Dutifulness', 'Orderliness', 'Self-discipline',
       'Self-efficacy', 'Activity level', 'Assertiveness', 'Cheerfulness',
       'Excitement-seeking', 'Outgoing', 'Gregariousness', 'Altruism',
       'Cooperation', 'Modesty', 'Uncompromising', 'Sympathy', 'Trust',
       'Fiery', 'Prone to worry', 'Melancholy', 'Immoderation',
       'Self-consciousness', 'Susceptible to stress', 'Candidate', 'Count_x',
       'Count_y'],
      dtype='object')

In [None]:
aud_raw_data = aud_raw_data[['Candidate', 'Count_x','Count_y', 'Challenge', 'Closeness', 
        'Curiosity', 'Excitement', 'Harmony', 'Ideal',
       'Liberty', 'Love', 'Practicality', 'Self-expression', 'Stability',
       'Structure', 'Conservation', 'Openness to change', 'Hedonism',
       'Self-enhancement', 'Self-transcendence', 'Openness',
       'Conscientiousness', 'Extraversion', 'Agreeableness', 'Emotional range',
       'Adventurousness', 'Artistic interests', 'Emotionality', 'Imagination',
       'Intellect', 'Authority-challenging', 'Achievement striving',
       'Cautiousness', 'Dutifulness', 'Orderliness', 'Self-discipline',
       'Self-efficacy', 'Activity level', 'Assertiveness', 'Cheerfulness',
       'Excitement-seeking', 'Outgoing', 'Gregariousness', 'Altruism',
       'Cooperation', 'Modesty', 'Uncompromising', 'Sympathy', 'Trust',
       'Fiery', 'Prone to worry', 'Melancholy', 'Immoderation',
       'Self-consciousness', 'Susceptible to stress']].rename(columns={'Count_x': 'Twitter_count', 'Count_y': 'Facebook_count'})

In [None]:
aud_raw_data.to_csv('AudienceScores_Raw.csv')