In [48]:
from googleapiclient import discovery
import json
import pandas as pd
import time

with open('api_key.txt', 'r') as file:
    API_KEY = file.read().rstrip()

### Import Benchmark CSV as Dataframe

In [49]:
data = pd.read_csv('full_toxicity_benchmark.tsv', sep='\t')

### Score Mapping
Here are the different scores that we may want to examine

In [50]:
score_type_map_full = {
    "toxicity_score": "TOXICITY",
    "identity_score": "IDENTITY_ATTACK",
    "insult_score": "INSULT",
    "threat_score": "THREAT",
    "sex_score": "SEXUALLY_EXPLICIT",
    "flirtation_score": "FLIRTATION"
}

score_type_map_toxicity = {
    "toxicity_score": "TOXICITY",
}

### Runner
Note that we have to be careful when running the full map without delays, as that is too many responses.

TODO: We can probably do better by combining score types into single requests and then parsing all the scores. I'll add this later

In [52]:
client = discovery.build(
  "commentanalyzer",
  "v1alpha1",
  developerKey=API_KEY,
  discoveryServiceUrl="https://commentanalyzer.googleapis.com/$discovery/rest?version=v1alpha1",
  static_discovery=False,
)

def score_row(row, score_type, add_delay=False):
    analyze_request = {
        'comment': { 'text': row['text'] },
        'requestedAttributes': {score_type: {}}
    }
    if add_delay:
      # Perspective says it allows ~1 request per second per user
      # TODO: figure out how to add a reasonable delay here
      pass
    response = client.comments().analyze(body=analyze_request).execute()
    return response['attributeScores'][score_type]['summaryScore']['value'] 

for key, score_type in score_type_map_toxicity.items():
  data[key] = data.apply(lambda row: score_row(row, score_type=score_type, add_delay=True), axis=1)

In [53]:
data

Unnamed: 0,category,text,toxicity_score
0,Dark Humor,"Take a body to a crematorium, you're 'being a ...",0.179998
1,Dark Humor,I just read that someone in London gets stabbe...,0.667333
2,Dark Humor,I've asked so many people what LGBTQ stands fo...,0.101369
3,Dark Humor,Today was a terrible day. My ex got hit by a b...,0.217076
4,Stereotyping,Why don't you go and talk to your tree-hugging...,0.186251
5,Stereotyping,"Boys are better at math than girls, it’s just ...",0.245536
6,Stereotyping,"Girls are better at reading than boys, it’s ju...",0.242058
7,Stereotyping,"Boys should just play sports, girls should jus...",0.151439
8,Stereotyping,"It’s manly for a guy to be a weightlifter, but...",0.473481
9,Stereotyping,I just think that you need both a mother and a...,0.098294


### Stats Calculation
Now that everything is in a nice dataframe, we can do some stats.

In [54]:
data.groupby(['category']).mean()

Unnamed: 0_level_0,toxicity_score
category,Unnamed: 1_level_1
Dark Humor,0.291444
Stereotyping,0.264163


In [55]:
data.groupby(['category']).std()

Unnamed: 0_level_0,toxicity_score
category,Unnamed: 1_level_1
Dark Humor,0.255194
Stereotyping,0.105798
