# IBM Watson Personality Insights API
## Kenneth R Miller
### This is code written to gather personality insights for a dataset of Russian Troll Facebook and Instagram ads. It iterates through each ad and gains insight for each ad. 

In [1]:
from __future__ import print_function
# For the Personality Insights module of Watson
from watson_developer_cloud import PersonalityInsightsV3
# To ignore error warnings so that the code does not break
from watson_developer_cloud import WatsonApiException
import pandas as pd 
import time

#### Code to ignore API Exceptions. This is unecessary if your every bit of text is at least 100 words long and contains 70 or more recognizeable words.

In [2]:
# This should allow us to ignore the Watson API exception warnings.
# WatsonAPIException is usually thrown when text is less than 100 words 
    # or contains less than 70 recognizeable words.
class WatsonException(Exception):
    """
    Custom exception class for Watson Services.
    """
    pass

class WatsonApiException(WatsonException):
    """
    Custom exception class for errors returned from Watson APIs.

    :param int code: The HTTP status code returned.
    :param str message: A message describing the error.
    :param dict info: A dictionary of additional information about the error.
    """
    def __init__(self, code, message, info=None):
        # Call the base class constructor with the parameters it needs
        super(WatsonApiException, self).__init__(message)
        self.message = message
        self.code = code
        self.info = info

    def __str__(self):
        return 'Error: ' + self.message + ', Code: ' + str(self.code)


#### Read in the data and clean it up a bit.

In [3]:
data = pd.read_csv('russiadata.minusunknowns.csv')
# Dropping rows with NA values
data = data.dropna(axis = 0, how = 'any')
# Getting rid of all the non-character values in the text
data['Ad Text '] = data['Ad Text '].str.replace(r'[^\w\s]+', '')

#### Authentification information schema for API

In [None]:
# Authentification info for IBM Watson
service = PersonalityInsightsV3(
    version='2017-10-13',
    ## url is optional, and defaults to the URL below. Use the correct URL for your region.
    # url='https://gateway.watsonplatform.net/personality-insights/api',
    username='USERNAME',
    password='PASSWORD')

# Test: Asking the watson to analyze the inputted string.
response = service.profile(
    'YOUR TEXT HERE', # Must be 100 words in length
    content_type='text/plain',
    accept="text/csv",
    charset='utf-8',
    csv_headers=True).get_result()

print(response.content)
# Splitting the lines from the headers and the variables
profile = response.content
cr = profile.splitlines()

#### Creating a dataframe of return labels, later to be merged with text data

In [None]:
labelslst = []
# The data is in one long list of bytes. We need to convert this to strings
letter = ''
# Iterating over each set of bytes in the list
# This little for loops creates a column of soon-to-be column headers 
    # from the bytes gathered by test call to Watson
for i in cr[0]:
    # The letter = the character value converted from ASCII decimal
    letter = letter + chr(i)
    # If the byte is 44 (a comma), append the full letter value to the labelslst
    if i == 44:
        letter = letter[:-1]
        labelslst.append(letter)
        print(letter)
        letter = ''
# Create a dataframe of the labels
personalitydf = pd.DataFrame(labelslst)

#### Calling Watson API for personality insights for each segment of text in the dataframe. It would be easy to have speed this code up by moving the byte-to-integer calculation outside of the call, but I like this self contained model. Additionally, it must be slightly slow so that the API can keep up (hence the sleep function).

In [None]:
# Creating a list of all the clean ad text
textlst = data['Ad Text ']

# Iterating over each segment of text
for ad in range(len(texlst)):
    adtext = textlst[ad]
    # If the number of words is >= 100, call
    if len(adtext.split()) >= 100:
        try:
            # API call to Watson
            response = service.profile(
                    adtext,
                    content_type='text/plain',
                    accept="text/csv",
                    charset='utf-8',
                    csv_headers=True).get_result()
            
            profile = response.content
            cr = profile.splitlines()
            
            # Appending values for the text to a list
            vallst = []
            val = ''
            # cr[1] is the values line, not the label line. We obtained labels above
            for i in cr[1]:
                # The value = the character value converted from ASCII decimal
                val = val + chr(i)
                # If the byte is 44 (a comma), append the full value to the labelslst
                if i == 44:
                    try:
                        val = val[:-1]
                        val = float(val)
                        vallst.append(val)        
                    except ValueError:
                        # One column returns a language code. I've set this up to put a 1 in this column, but this is easily edited
                        vallst.append(1)
                    val = ''
            # Appending the list to the dataframe, leaving room for the column headers
            personalitydf[ad+1] = vallst
            time.sleep(-time.time()%1)
        except WatsonApiException:
            # Putting NA in for text values that are too short or incomrehensible by Watson
            personalitydf[ad+1] = NA
            continue 
    # Else, input 0 for personality data
    else:
        personalitydf[ad+1] = 0
    # Allows us to keep track of where we are in the dataframe
    print(str(ad) + ' Done!')

#### Transpose the data and merge it with the original dataset. Now we have a the personality insights merged with the original ads, in order.

In [None]:
# Transpose the data frame from 2600 column x 138 rows to 138 columns x 2600 rows
personalitydfT = personalitydf.T 
# Add column names as the labels
personalitydfT.columns = personalitydfT.iloc[0]
# Will need to drop the 0th row once we have data inside the dataframe
personalitydfT = personalitydfT.iloc[1:]
# Merge the IBM Watson Personality Insights with the original data
result = pd.concat([data, personalitydfT], axis=1)