In [1]:
import os
import io
import time
import requests
import json
import pandas as pd
from sklearn import metrics

from credentials import api_keys

In [2]:
df_text = pd.read_json('./data/dev_seen.jsonl', lines=True)
df_text['img'] = df_text['img'].apply(lambda x: './data/' + x)
df_text

Unnamed: 0,id,img,label,text
0,8291,./data/img/08291.png,1,white people is this a shooting range
1,46971,./data/img/46971.png,1,bravery at its finest
2,3745,./data/img/03745.png,1,your order comes to $37.50 and your white priv...
3,83745,./data/img/83745.png,1,it is time.. to send these parasites back to t...
4,80243,./data/img/80243.png,1,mississippi wind chime
...,...,...,...,...
495,83675,./data/img/83675.png,0,i'm gonna be like phelps one day
496,37198,./data/img/37198.png,0,when you're so relaxed you can feel yourself g...
497,48670,./data/img/48670.png,0,look at this sandwich maker club i found on wi...
498,9863,./data/img/09863.png,0,diverse group of women


In [3]:
from clarifai_grpc.channel.clarifai_channel import ClarifaiChannel
from clarifai_grpc.grpc.api import resources_pb2, service_pb2, service_pb2_grpc
from clarifai_grpc.grpc.api.status import status_pb2, status_code_pb2

stub = service_pb2_grpc.V2Stub(ClarifaiChannel.get_grpc_channel())
metadata = (('authorization', api_keys.CLARIFAI_API_KEY),)

In [4]:
def clarifai_api(text):
    time.sleep(0.2)
    request = service_pb2.PostModelOutputsRequest(
        # This is the model ID of a publicly available General model. 
        # You may use any other public or custom model ID.
        model_id='c1a2ac2adba0204d859fb89fd44d6ac9',
        inputs=[
            resources_pb2.Input(
                data=resources_pb2.Data(
                    text=resources_pb2.Text(
                        raw=text
                    )
                )
            )
        ])
    response = stub.PostModelOutputs(request, metadata=metadata)

    if response.status.code != status_code_pb2.SUCCESS:
        raise Exception("Post model outputs failed, status: " + response.status.description)

    # for concept in response.outputs[0].data.concepts:
    #     print('%13s: %.2f' % (concept.name, concept.value))

    highest_value = 0
    highest_concept = ''
    for concept in response.outputs[0].data.concepts:
        if concept.value > highest_value:
            highest_concept = concept.name
            highest_value = concept.value

    if highest_value < 0.1:
        highest_concept = 'SAFE'

    print('{:>13s}: {:>.5f} - {}'.format(highest_concept, highest_value, text))

    return highest_value

In [5]:
df_text_clarifai = df_text.copy()
df_text_clarifai['pred_cf_value'] = df_text_clarifai['text'].apply(clarifai_api)
df_text_clarifai.to_csv('./results/df_text_clarifai.csv')
print(df_text_clarifai)

         SAFE: 0.00610 - white people is this a shooting range
         SAFE: 0.00010 - bravery at its finest
         SAFE: 0.00006 - your order comes to $37.50 and your white privilege discount brings the total to $37.50
        toxic: 0.77444 - it is time.. to send these parasites back to the desert
         SAFE: 0.00062 - mississippi wind chime
         SAFE: 0.00144 - knowing white people , that's probably the baby father
        toxic: 0.17505 - life hack #23 how to get stoned with no weed
         SAFE: 0.00006 - you've heard of elf on a shelf, now get ready for
         SAFE: 0.05513 - cooooooooooooon!!!!
        toxic: 0.42571 - when you get to choose your own mental illness
        toxic: 0.99877 - dat ass ahmarbkrich*d
        toxic: 0.99651 - look! it says it right here! we can fuck goats!
        toxic: 0.13271 - enough is enough children are more important than freaks
        toxic: 0.98771 - removes polish with chemicals nobody bats an eye removes polish with chemicals 

In [3]:
print('Classification Report for Clarifai Text Moderation API:')
df_text_clarifai = pd.read_csv('./results/df_text_clarifai.csv')
df_text_clarifai['pred_cf'] = df_text_clarifai['pred_cf_value'].apply(lambda x: 1 if x >= 0.5 else 0 )
target_names = ['not-hateful', 'hateful']
print(metrics.classification_report(df_text_clarifai['label'], df_text_clarifai['pred_cf'], target_names=target_names, digits=4))

Classification Report for Clarifai Text Moderation API:
              precision    recall  f1-score   support

 not-hateful     0.5301    0.8696    0.6587       253
     hateful     0.6118    0.2105    0.3133       247

    accuracy                         0.5440       500
   macro avg     0.5709    0.5400    0.4860       500
weighted avg     0.5705    0.5440    0.4880       500



In [4]:
from googleapiclient import discovery

client = discovery.build(
    "commentanalyzer",
    "v1alpha1",
    developerKey=api_keys.PERSPECTIVE_API_KEY,
    discoveryServiceUrl="https://commentanalyzer.googleapis.com/$discovery/rest?version=v1alpha1",
    static_discovery=False,
)

In [23]:
def perspective_api(text):
    time.sleep(1)
    analyze_request = {
        'comment': { 'text': text },
        'requestedAttributes': {
            'TOXICITY': {},
            'IDENTITY_ATTACK': {},
            'INSULT': {},
            'PROFANITY': {},
            'THREAT': {}
        },
        'languages': ['en']
    }

    response = client.comments().analyze(body=analyze_request).execute()
    # print(json.dumps(response, indent=4))
    highest_key = ''
    highest_value = 0
    for key, value in response['attributeScores'].items():
        temp_val = response['attributeScores'][key]['summaryScore']['value']
        if temp_val > highest_value:
            highest_value = temp_val
            highest_key = key

    print('{:>15s}: {:>.5f} - {}'.format(highest_key, highest_value, text))

    return highest_value

In [24]:
df_text_perspective = df_text.copy()
df_text_perspective['pred_ps_value'] = df_text_perspective['text'].apply(perspective_api)
df_text_perspective.to_csv('./results/df_text_perspective.csv')
print(df_text_perspective)

         THREAT: 0.79822 - white people is this a shooting range
         THREAT: 0.17776 - bravery at its finest
IDENTITY_ATTACK: 0.48783 - your order comes to $37.50 and your white privilege discount brings the total to $37.50
         INSULT: 0.83871 - it is time.. to send these parasites back to the desert
         THREAT: 0.15819 - mississippi wind chime
IDENTITY_ATTACK: 0.60363 - knowing white people , that's probably the baby father
         THREAT: 0.75889 - life hack #23 how to get stoned with no weed
         THREAT: 0.48572 - you've heard of elf on a shelf, now get ready for
         THREAT: 0.27701 - cooooooooooooon!!!!
         INSULT: 0.58466 - when you get to choose your own mental illness
      PROFANITY: 0.86079 - dat ass ahmarbkrich*d
      PROFANITY: 0.96012 - look! it says it right here! we can fuck goats!
         INSULT: 0.72652 - enough is enough children are more important than freaks
      PROFANITY: 0.95792 - removes polish with chemicals nobody bats an eye re

In [4]:
print('Classification Report for Perspective API:')
df_text_perspective = pd.read_csv('./results/df_text_perspective.csv')
df_text_perspective['pred_ps'] = df_text_perspective['pred_ps_value'].apply(lambda x: 1 if x >= 0.5 else 0 )
target_names = ['not-hateful', 'hateful']
print(metrics.classification_report(df_text_perspective['label'], df_text_perspective['pred_ps'], target_names=target_names, digits=4))

Classification Report for Perspective API:
              precision    recall  f1-score   support

 not-hateful     0.5749    0.6522    0.6111       253
     hateful     0.5869    0.5061    0.5435       247

    accuracy                         0.5800       500
   macro avg     0.5809    0.5791    0.5773       500
weighted avg     0.5808    0.5800    0.5777       500



In [3]:
from azure.cognitiveservices.vision.contentmoderator import ContentModeratorClient
import azure.cognitiveservices.vision.contentmoderator.models
from msrest.authentication import CognitiveServicesCredentials

client = ContentModeratorClient(
    endpoint=api_keys.AZURE_CONTENT_MODERATOR_ENDPOINT,
    credentials=CognitiveServicesCredentials(api_keys.AZURE_SUBSCRIPTION_KEY)
)

In [4]:
def azure_api(text):
    time.sleep(0.1)
    text_br = io.BytesIO(bytes(text, encoding='utf-8'))
    screen = client.text_moderation.screen_text(
        text_content_type="text/plain",
        text_content=text_br,
        language="eng",
        classify=True
    )
    # assert isinstance(screen, Screen)
    res = screen.as_dict()
    highest_key = ''
    highest_value = 0
    for key, value in res['classification'].items():
        if key == 'review_recommended': continue
        temp_val = res['classification'][key]['score']
        if temp_val > highest_value:
            highest_value = temp_val
            highest_key = key

    if highest_key == 'category1':
        highest_key = 'sexually_explicit'
    elif highest_key == 'category2':
        highest_key = 'sexually_suggestive'
    elif highest_key == 'category3':
        highest_key = 'offensive'

    print('{:>19s}: {:>.5f} - {}'.format(highest_key, highest_value, text))

    return highest_value

In [5]:
df_text_azure = df_text.copy()
df_text_azure['pred_az_value'] = df_text_azure['text'].apply(azure_api)
df_text_azure.to_csv('./results/df_text_azure.csv')
print(df_text_azure)

          offensive: 0.45265 - white people is this a shooting range
sexually_suggestive: 0.21738 - bravery at its finest
          offensive: 0.32186 - your order comes to $37.50 and your white privilege discount brings the total to $37.50
sexually_suggestive: 0.09574 - it is time.. to send these parasites back to the desert
sexually_suggestive: 0.17581 - mississippi wind chime
          offensive: 0.48324 - knowing white people , that's probably the baby father
          offensive: 0.98800 - life hack #23 how to get stoned with no weed
sexually_suggestive: 0.14441 - you've heard of elf on a shelf, now get ready for
          offensive: 0.98800 - cooooooooooooon!!!!
          offensive: 0.38406 - when you get to choose your own mental illness
          offensive: 0.98800 - dat ass ahmarbkrich*d
          offensive: 0.98800 - look! it says it right here! we can fuck goats!
          offensive: 0.32474 - enough is enough children are more important than freaks
          offensive: 0.737

In [5]:
print('Classification Report for Microsoft Azure API:')
df_text_azure = pd.read_csv('./results/df_text_azure.csv')
df_text_azure['pred_az'] = df_text_azure['pred_az_value'].apply(lambda x: 1 if x >= 0.5 else 0 )
target_names = ['not-hateful', 'hateful']
print(metrics.classification_report(df_text_azure['label'], df_text_azure['pred_az'], target_names=target_names, digits=4))

Classification Report for Microsoft Azure API:
              precision    recall  f1-score   support

 not-hateful     0.5291    0.7549    0.6221       253
     hateful     0.5540    0.3117    0.3990       247

    accuracy                         0.5360       500
   macro avg     0.5415    0.5333    0.5106       500
weighted avg     0.5414    0.5360    0.5119       500



## Data Combination

In [6]:
df_text['pred_cf'] = df_text_clarifai['pred_cf']
df_text['pred_ps'] = df_text_perspective['pred_ps']
df_text['pred_az'] = df_text_azure['pred_az']

In [7]:
df_text['pred_total'] = df_text['pred_cf'] + df_text['pred_ps'] + df_text['pred_az']
df_text

Unnamed: 0,id,img,label,text,pred_cf,pred_ps,pred_az,pred_total
0,8291,./data/img/08291.png,1,white people is this a shooting range,0,1,0,1
1,46971,./data/img/46971.png,1,bravery at its finest,0,0,0,0
2,3745,./data/img/03745.png,1,your order comes to $37.50 and your white priv...,0,0,0,0
3,83745,./data/img/83745.png,1,it is time.. to send these parasites back to t...,1,1,0,2
4,80243,./data/img/80243.png,1,mississippi wind chime,0,0,0,0
...,...,...,...,...,...,...,...,...
495,83675,./data/img/83675.png,0,i'm gonna be like phelps one day,0,0,0,0
496,37198,./data/img/37198.png,0,when you're so relaxed you can feel yourself g...,0,0,0,0
497,48670,./data/img/48670.png,0,look at this sandwich maker club i found on wi...,0,0,0,0
498,9863,./data/img/09863.png,0,diverse group of women,0,0,0,0


In [8]:
df_text_pos = df_text[df_text.label.eq(1)]
df_text_pos_rows = df_text_pos.shape[0]
print(df_text_pos_rows)
df_text_false_neg = df_text_pos[df_text_pos.pred_total.eq(0)]
df_text_false_neg_rows = df_text_false_neg.shape[0]
print(df_text_false_neg_rows)
pass_rate = df_text_false_neg_rows / df_text_pos_rows
print(pass_rate)

247
105
0.4251012145748988
