In [1]:
from googleapiclient import discovery
from googleapiclient.errors import HttpError
import json
import pandas as pd
import time

with open('../api_key.txt', 'r') as file:
    API_KEY = file.read().rstrip()

### Import Benchmark CSV as Dataframe

In [2]:
data = pd.read_csv('../data/raw/full_toxicity_benchmark.tsv', sep='\t')
data = data.dropna(subset='text').reset_index(drop=True)

### Score Mapping
Here are the different scores that we may want to examine

In [3]:
score_type_map_full = {
    "toxicity_score": "TOXICITY",
    "identity_score": "IDENTITY_ATTACK",
    "insult_score": "INSULT",
    "threat_score": "THREAT",
    "sex_score": "SEXUALLY_EXPLICIT",
    "flirtation_score": "FLIRTATION"
}

score_type_map_toxicity = {
    "toxicity_score": "TOXICITY",
}

### Runner
Idea for future me: Pooling api keys to make the script run faster

In [4]:
client = discovery.build(
  "commentanalyzer",
  "v1alpha1",
  developerKey=API_KEY,
  discoveryServiceUrl="https://commentanalyzer.googleapis.com/$discovery/rest?version=v1alpha1",
  static_discovery=False,
)

def score_row(row, score_type_list, add_delay=False):
    if add_delay:
        time.sleep(1)
    try:
        score_type_attribute = {value: {} for value in score_type_list}
        analyze_request = {
            'comment': { 'text': row['text'] },
            'requestedAttributes': score_type_attribute
        }
        response = client.comments().analyze(body=analyze_request).execute()
    except HttpError as err:
        if err.resp.status == 429:
            print('Quota limit exceeded')
            time.sleep(10)
            response = client.comments().analyze(body=analyze_request).execute()
        else:
            raise
    value_list = []
    for score_type in score_type_list:
        value_list.append(
            response['attributeScores'][score_type]['summaryScore']['value'])
    return tuple(value_list)

In [5]:
score_col_names = list(score_type_map_full.keys())
score_type_list = list(score_type_map_full.values())

results = data.apply(lambda row: score_row(
    row, 
    score_type_list=score_type_list, 
    add_delay=True), axis=1, result_type='expand')
results.columns = score_col_names
data = data.join(results)

In [6]:
data.to_csv('toxicity_benchmark_scores.csv')

### Stats Calculation
Now that everything is in a nice dataframe, we can do some stats.

In [54]:
data.groupby(['category']).mean()

Unnamed: 0_level_0,toxicity_score
category,Unnamed: 1_level_1
Dark Humor,0.291444
Stereotyping,0.264163


In [55]:
data.groupby(['category']).std()

Unnamed: 0_level_0,toxicity_score
category,Unnamed: 1_level_1
Dark Humor,0.255194
Stereotyping,0.105798
