## Profiling Candidates based on their Facebook posts

In [None]:
import pandas as pd
import numpy as np
import glob
import re

from watson_developer_cloud import PersonalityInsightsV3 as PersonalityInsights
import json

import nltk
import string
from nltk.stem.wordnet import WordNetLemmatizer

In [None]:
df = pd.read_csv('ibm_df.csv')
df

Unnamed: 0.1,Unnamed: 0,Candidate,Translated_text
0,0,Bong Go,Happy Mother's Day From Tatay Digong! Thank yo...
1,1,Charlie Gaddi,"My countrymen, the sovereign will to change th..."
2,2,Bernard Austria,"#13 AUSTRIA, Bernard Fernandez @ Butuan City ..."
3,3,Abner Afuang,Thank you very much for voting and supporting ...
4,4,Raffy Alunan,"""I will continue serving you until my last dyi..."
5,5,Richard Alfajora,ALFAJORA POLICY FOCUS: 1. Political Party Law ...
6,6,Pilo Hilbay,Many enjoyed the long weekend. There are plans...
7,7,Samira Gutoc,LIVE at Balay for the Peace Jam ni Sam. LIVE a...
8,8,Emily Mallillin,PEOPLE APPLICABLE ... SHOULD YOU VOTE THE PATI...
9,9,Elmer Francisco,These kinds of problems give serious unnecessa...


In [None]:
!pip install watson_developer_cloud



In [None]:
!pip install --upgrade watson-developer-cloud

Requirement already up-to-date: watson-developer-cloud in /opt/venv/lib/python3.7/site-packages (2.10.1)


In [None]:
#copy Personality Insights API credentials here
# url=''
# apikey = ''

personality_insights = PersonalityInsights(url=url, iam_apikey=apikey, version='2017-10-13') 

In [None]:
df.columns

Index(['Unnamed: 0', 'Candidate', 'Translated_text'], dtype='object')

In [None]:
stopwords = set(nltk.corpus.stopwords.words('english'))
punctuation = string.punctuation
lemmatizer = WordNetLemmatizer()


def text_cleaner(row):
    words=[]
    for sent in nltk.sent_tokenize(row):
        for word in nltk.wordpunct_tokenize(sent):
            word = word.lower()
            word = lemmatizer.lemmatize(word)
            if word not in stopwords and word not in punctuation:
                words.append(word)
    word_count=len(words)
    clean_text=' '.join(words)
    return word_count, clean_text

In [None]:
df['word_count']=df['Translated_text'].apply(text_cleaner).apply(lambda x: x[0])
df['clean_text']=df['Translated_text'].apply(text_cleaner).apply(lambda x: x[1])

In [None]:
len(df[df['word_count']<=100])

3

In [None]:
profile=df[df['word_count']>=100]

In [None]:
profile['pia']=profile['clean_text'].apply(lambda x: personality_insights.profile(x, content_type='text/plain',raw_scores=True).get_result())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [None]:
profile['pia']

0     {'word_count': 716, 'processed_language': 'en'...
1     {'word_count': 151, 'word_count_message': 'The...
2     {'word_count': 649, 'processed_language': 'en'...
3     {'word_count': 1008, 'processed_language': 'en...
4     {'word_count': 6199, 'processed_language': 'en...
5     {'word_count': 1111, 'processed_language': 'en...
6     {'word_count': 2339, 'processed_language': 'en...
7     {'word_count': 4090, 'processed_language': 'en...
8     {'word_count': 1422, 'processed_language': 'en...
9     {'word_count': 430, 'word_count_message': 'The...
10    {'word_count': 348, 'word_count_message': 'The...
11    {'word_count': 177, 'word_count_message': 'The...
12    {'word_count': 2584, 'processed_language': 'en...
13    {'word_count': 289, 'word_count_message': 'The...
14    {'word_count': 6737, 'processed_language': 'en...
15    {'word_count': 976, 'processed_language': 'en'...
17    {'word_count': 845, 'processed_language': 'en'...
18    {'word_count': 1927, 'processed_language':

In [None]:
profiles=pd.DataFrame([])

for i in profile.index:
    row=profile['pia'][i]
    cand = profile['Candidate'][i]
    needs = pd.DataFrame([{need['name'] : need['percentile'] for need in row['needs']}], index = [cand])
    val = pd.DataFrame([{val['name'] : val['percentile'] for val in row['values']}], index = [cand])
    ocean = pd.DataFrame([{big5['name'] : big5['percentile'] for big5 in row['personality']}], index = [cand])

    traits={}

    for big5 in row['personality']:
        for child in big5['children']:
            traits[str(child['name'])] = child['percentile']

    traits = pd.DataFrame([traits], index = [cand])
    temp = needs.join([val,ocean,traits])
    profiles=pd.concat([profiles,temp])

In [None]:
pia_scores = profiles.reset_index()

In [None]:
profile=profile.drop(columns='Unnamed: 0')

In [None]:
pia_scores.columns

Index(['index', 'Challenge', 'Closeness', 'Curiosity', 'Excitement', 'Harmony',
       'Ideal', 'Liberty', 'Love', 'Practicality', 'Self-expression',
       'Stability', 'Structure', 'Conservation', 'Openness to change',
       'Hedonism', 'Self-enhancement', 'Self-transcendence', 'Openness',
       'Conscientiousness', 'Extraversion', 'Agreeableness', 'Emotional range',
       'Adventurousness', 'Artistic interests', 'Emotionality', 'Imagination',
       'Intellect', 'Authority-challenging', 'Achievement striving',
       'Cautiousness', 'Dutifulness', 'Orderliness', 'Self-discipline',
       'Self-efficacy', 'Activity level', 'Assertiveness', 'Cheerfulness',
       'Excitement-seeking', 'Outgoing', 'Gregariousness', 'Altruism',
       'Cooperation', 'Modesty', 'Uncompromising', 'Sympathy', 'Trust',
       'Fiery', 'Prone to worry', 'Melancholy', 'Immoderation',
       'Self-consciousness', 'Susceptible to stress'],
      dtype='object')

In [None]:
pia_data = profile.merge(pia_scores, left_on='Candidate', right_on='index')
pia_data

Unnamed: 0,Candidate,Translated_text,word_count,clean_text,pia,index,Challenge,Closeness,Curiosity,Excitement,...,Modesty,Uncompromising,Sympathy,Trust,Fiery,Prone to worry,Melancholy,Immoderation,Self-consciousness,Susceptible to stress
0,Bong Go,Happy Mother's Day From Tatay Digong! Thank yo...,722,happy mother day tatay digong thank much kuya ...,"{'word_count': 716, 'processed_language': 'en'...",Bong Go,0.141421,0.832906,0.659935,0.23601,...,0.022878,0.530885,0.85459,0.90505,0.124568,0.233059,0.245693,0.001494,0.087825,0.073782
1,Charlie Gaddi,"My countrymen, the sovereign will to change th...",152,countryman sovereign change system government ...,"{'word_count': 151, 'word_count_message': 'The...",Charlie Gaddi,0.040551,0.387213,0.329422,0.142953,...,0.431028,0.908798,0.872395,0.532179,0.026158,0.169301,0.177068,0.000211,0.101355,0.112196
2,Bernard Austria,"#13 AUSTRIA, Bernard Fernandez @ Butuan City ...",676,13 austria bernard fernandez butuan city senad...,"{'word_count': 649, 'processed_language': 'en'...",Bernard Austria,0.065542,0.304705,0.526191,0.086654,...,0.058117,0.585367,0.938764,0.860808,0.061911,0.234604,0.182737,0.000115,0.095604,0.064762
3,Abner Afuang,Thank you very much for voting and supporting ...,1125,thank much voting supporting .. real change op...,"{'word_count': 1008, 'processed_language': 'en...",Abner Afuang,0.087518,0.44079,0.514522,0.052225,...,0.044948,0.577684,0.899082,0.535818,0.395281,0.59861,0.278543,0.000314,0.28027,0.199321
4,Raffy Alunan,"""I will continue serving you until my last dyi...",6585,continue serving last dying breath raffy aluna...,"{'word_count': 6199, 'processed_language': 'en...",Raffy Alunan,0.217337,0.618739,0.628562,0.166955,...,0.030476,0.638782,0.947581,0.862411,0.278354,0.40654,0.163718,4.4e-05,0.158059,0.110354
5,Richard Alfajora,ALFAJORA POLICY FOCUS: 1. Political Party Law ...,1191,alfajora policy focus 1 political party law go...,"{'word_count': 1111, 'processed_language': 'en...",Richard Alfajora,0.171468,0.464065,0.700166,0.119284,...,0.027702,0.381649,0.753928,0.766508,0.268852,0.459916,0.238122,3.8e-05,0.318016,0.214511
6,Pilo Hilbay,Many enjoyed the long weekend. There are plans...,2418,many enjoyed long weekend plan go vacation reg...,"{'word_count': 2339, 'processed_language': 'en...",Pilo Hilbay,0.303153,0.466774,0.946416,0.402202,...,0.523353,0.315066,0.590036,0.474502,0.614693,0.772058,0.804767,0.126119,0.806287,0.763096
7,Samira Gutoc,LIVE at Balay for the Peace Jam ni Sam. LIVE a...,4185,live balay peace jam ni sam live balay peace j...,"{'word_count': 4090, 'processed_language': 'en...",Samira Gutoc,0.324163,0.904438,0.965348,0.45363,...,0.287972,0.311009,0.560863,0.79354,0.684608,0.865727,0.710661,0.001383,0.847364,0.793946
8,Emily Mallillin,PEOPLE APPLICABLE ... SHOULD YOU VOTE THE PATI...,1549,people applicable ... vote patient leave surve...,"{'word_count': 1422, 'processed_language': 'en...",Emily Mallillin,0.302857,0.699438,0.985664,0.435673,...,0.298238,0.302531,0.558365,0.45846,0.56844,0.756484,0.673494,0.001209,0.81619,0.693078
9,Elmer Francisco,These kinds of problems give serious unnecessa...,477,kind problem give serious unnecessary doubt re...,"{'word_count': 430, 'word_count_message': 'The...",Elmer Francisco,0.333684,0.520931,0.783696,0.316411,...,0.009234,0.354085,0.359114,0.961735,0.157122,0.247268,0.177306,4.6e-05,0.353675,0.06565


In [None]:
pia_data = pia_data.drop(columns='index')

In [None]:
pia_data.to_csv('Personality_Scores_Percentiles.csv')

In [None]:
profiles_raw=pd.DataFrame([])

for i in profile.index:
    row=profile['pia'][i]
    cand = profile['Candidate'][i]
    needs = pd.DataFrame([{need['name'] : need['raw_score'] for need in row['needs']}], index = [cand])
    val = pd.DataFrame([{val['name'] : val['raw_score'] for val in row['values']}], index = [cand])
    ocean = pd.DataFrame([{big5['name'] : big5['raw_score'] for big5 in row['personality']}], index = [cand])

    traits={}

    for big5 in row['personality']:
        for child in big5['children']:
            traits[str(child['name'])] = child['raw_score']

    traits = pd.DataFrame([traits], index = [cand])
    temp = needs.join([val,ocean,traits])
    profiles_raw=pd.concat([profiles_raw,temp])

In [None]:
pia_scores_raw = profiles_raw.reset_index()
pia_data_raw = profile.merge(pia_scores_raw, left_on='Candidate', right_on='index').drop(columns='index')
pia_data_raw.to_csv('Personality_Scores_Raw.csv')

In [None]:
personality_raw = pia_data_raw[['Candidate', 'word_count', 'Challenge', 'Closeness', 'Curiosity', 'Excitement', 'Harmony', 'Ideal',
       'Liberty', 'Love', 'Practicality', 'Self-expression', 'Stability',
       'Structure', 'Conservation', 'Openness to change', 'Hedonism',
       'Self-enhancement', 'Self-transcendence', 'Openness',
       'Conscientiousness', 'Extraversion', 'Agreeableness', 'Emotional range',
       'Adventurousness', 'Artistic interests', 'Emotionality', 'Imagination',
       'Intellect', 'Authority-challenging', 'Achievement striving',
       'Cautiousness', 'Dutifulness', 'Orderliness', 'Self-discipline',
       'Self-efficacy', 'Activity level', 'Assertiveness', 'Cheerfulness',
       'Excitement-seeking', 'Outgoing', 'Gregariousness', 'Altruism',
       'Cooperation', 'Modesty', 'Uncompromising', 'Sympathy', 'Trust',
       'Fiery', 'Prone to worry', 'Melancholy', 'Immoderation',
       'Self-consciousness', 'Susceptible to stress']]

In [None]:
personality_q = pia_data[['Candidate', 'word_count', 'Challenge', 'Closeness', 'Curiosity', 'Excitement', 'Harmony', 'Ideal',
       'Liberty', 'Love', 'Practicality', 'Self-expression', 'Stability',
       'Structure', 'Conservation', 'Openness to change', 'Hedonism',
       'Self-enhancement', 'Self-transcendence', 'Openness',
       'Conscientiousness', 'Extraversion', 'Agreeableness', 'Emotional range',
       'Adventurousness', 'Artistic interests', 'Emotionality', 'Imagination',
       'Intellect', 'Authority-challenging', 'Achievement striving',
       'Cautiousness', 'Dutifulness', 'Orderliness', 'Self-discipline',
       'Self-efficacy', 'Activity level', 'Assertiveness', 'Cheerfulness',
       'Excitement-seeking', 'Outgoing', 'Gregariousness', 'Altruism',
       'Cooperation', 'Modesty', 'Uncompromising', 'Sympathy', 'Trust',
       'Fiery', 'Prone to worry', 'Melancholy', 'Immoderation',
       'Self-consciousness', 'Susceptible to stress']]

In [None]:
personality_raw.to_csv('PersonalityScores_Raw.csv')
personality_q.to_csv('PersonalityScores_Percentiles.csv')