In [1]:
from googleapiclient import discovery
from googleapiclient.errors import HttpError
import json
import pandas as pd
import time

# Toxicity Classification

In [2]:
with open('/Users/lorenapiedras/Documents/credentials/api_creds.json', 'r') as file:
    api_keys = json.load(file)
API_KEY = api_keys['perspective_api']

In [3]:
sentences = ['Testing example for toxicity']
data = pd.DataFrame(sentences, columns=['text'])

In [4]:
score_type_map_full = {
    "toxicity_score": "TOXICITY",
    "identity_score": "IDENTITY_ATTACK",
    "insult_score": "INSULT",
    "threat_score": "THREAT",
    "sex_score": "SEXUALLY_EXPLICIT",
    "flirtation_score": "FLIRTATION"
}

score_type_map_toxicity = {
    "toxicity_score": "TOXICITY",
}

In [5]:
client = discovery.build(
  "commentanalyzer",
  "v1alpha1",
  developerKey=API_KEY,
  discoveryServiceUrl="https://commentanalyzer.googleapis.com/$discovery/rest?version=v1alpha1",
  static_discovery=False,
)

def score_row(row, score_type_list, add_delay=False):
    if add_delay:
        time.sleep(1)
    try:
        score_type_attribute = {value: {} for value in score_type_list}
        analyze_request = {
            'comment': { 'text': row['text'] },
            'requestedAttributes': score_type_attribute
        }
        response = client.comments().analyze(body=analyze_request).execute()
    except HttpError as err:
        if err.resp.status == 429:
            print('Quota limit exceeded')
            time.sleep(10)
            response = client.comments().analyze(body=analyze_request).execute()
        else:
            raise
    value_list = []
    for score_type in score_type_list:
        value_list.append(
            response['attributeScores'][score_type]['summaryScore']['value'])
    return tuple(value_list)

In [6]:
score_col_names = list(score_type_map_toxicity.keys())
score_type_list = list(score_type_map_toxicity.values())

results = data.apply(lambda row: score_row(
    row, 
    score_type_list=score_type_list, 
    add_delay=True), axis=1, result_type='expand')
results.columns = score_col_names
data = data.join(results)

# Sentiment Analysis

In [1]:
from helper import predict_sentiment

In [2]:
# The text to analyze
text = "I hate potatoes"
predict_sentiment(text)

Text: I hate potatoes
Sentiment: -0.800000011920929, 0.800000011920929


magnitude: 0.8
score: -0.8