In [1]:
import os
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = './credentials/google_application_credentials.json'

In [2]:
from credentials import api_keys

In [3]:
import io
import time
import requests
import numpy as np
import pandas as pd
from sklearn import metrics
from google.cloud import vision
from tqdm.notebook import tqdm
from tqdm.notebook import trange, tqdm
import warnings
warnings.filterwarnings('ignore')

## Data Preprocessing

In [4]:
df1 = pd.read_csv('csv_3000_hate/sample_3000_labeled.csv')
df2 = pd.read_csv('csv_2000_no_politics/sample_2000_labeled.csv')

In [5]:
df1 = df1[['id', 'sample_path', 'label']]
df2 = df2[['id', 'sample_path', 'label']]

In [6]:
# combine 50,000 images
df = df1.append(df2)
# shuffle the images
df = df.sample(frac=1).reset_index(drop=True)

In [7]:
df['label'] = df['label'].apply(int)

In [8]:
# change column path to 'img'
df = df.rename(columns={'sample_path': 'img'})

In [9]:
print(df.shape)

(5000, 3)


## Google Cloud Vision API

In [11]:
def google_vision_api_without_spoof(path):
    """Google Cloud Vision API"""
    client = vision.ImageAnnotatorClient()

    with io.open(path, 'rb') as image_file:
        content = image_file.read()

    image = vision.Image(content=content)

    response = client.safe_search_detection(image=image)
    # print(response)
    safe = response.safe_search_annotation

    if response.error.message:
        raise Exception(response.error.message)

    # if safe.adult > 3 or safe.medical > 3 or safe.violence > 3 or safe.racy > 3:
    # #     print('Image: {}, UNSAFE'.format(path))
    #     return 1
    # else:
    # #     print('Image: {}, SAFE'.format(path))
    #     return 0

    # list all safe search labels:

    # print(safe.adult)
    # print(safe.medical)
    # print(safe.violence)
    # print(safe.racy)
    return(int(safe.adult), int(safe.medical), int(safe.violence), int(safe.racy))

In [12]:
df_google = df.copy()

tqdm.pandas()
df_google['label_pred'] = df_google['img'].progress_apply(
    google_vision_api_without_spoof)

In [13]:
df_google['label_pred'] = df_google['label_pred'].astype('str')
df_google[['pred_adult', 'pred_medical', 'pred_viol', 'pred_racy']
          ] = df_google['label_pred'].str.split(',', expand=True)

In [14]:
df_google['pred_adult'] = df_google['pred_adult'].replace(
    to_replace='\(', value="", regex=True)
df_google['pred_racy'] = df_google['pred_racy'].replace(
    to_replace='\)', value="", regex=True)
df_google['pred_adult'] = df_google['pred_adult'].apply(int)
df_google['pred_medical'] = df_google['pred_medical'].apply(int)
df_google['pred_viol'] = df_google['pred_viol'].apply(int)
df_google['pred_racy'] = df_google['pred_racy'].apply(int)

In [15]:
# df_google = df_google.drop(columns=['label_pred'])
# # print(df_google)

In [16]:
# save the result to './results/df_google.csv'
df_google.to_csv('./results/df_google.csv')
# print(df_google)

In [17]:
df_test = pd.read_csv('./results/df_google.csv')
print(df_test)

      Unnamed: 0    id                                           img  label  \
0              0  1438  ./annot_2000_no_politics/EQw2J_mX0AAxvoq.jpg      1   
1              1   172  ./annot_2000_no_politics/EWAuxVaWkAIv7Nv.jpg      0   
2              2  2156         ./annot_3000_hate/EeoBkhCXoAAc5IQ.jpg      0   
3              3  2749         ./annot_3000_hate/EgbTs6AWsAAHAj9.jpg      1   
4              4   738  ./annot_2000_no_politics/E0KvNiSVIAQ4vZR.jpg      1   
...          ...   ...                                           ...    ...   
4995        4995   853         ./annot_3000_hate/EfQTejhX0AIt7Hp.jpg      1   
4996        4996  1291  ./annot_2000_no_politics/EoOmgneWEAAkSy3.jpg      0   
4997        4997  2136         ./annot_3000_hate/Et4HU8vUcAEY7y5.jpg      0   
4998        4998   516         ./annot_3000_hate/EUt652pUYAEkpdI.jpg      0   
4999        4999  1540  ./annot_2000_no_politics/EeqFVFTXYAA3Bs1.jpg      0   

      pred_adult  pred_medical  pred_viol  pred_rac

### Define the safety criteria

In [18]:
# if safe.adult > 3 or safe.medical > 3 or safe.violence > 3 or safe.racy > 3, we record it as 1 which means unsafe
df_test['pre_label'] = df_test[['pred_adult',
                                'pred_medical', 'pred_viol', 'pred_racy']].max(axis=1)
df_test['pre_label'] = df_test['pre_label'].apply(lambda x: 1 if x > 3 else 0)
# check the number of unsafe labels from google
df_test['pre_label'].value_counts()

0    4850
1     150
Name: pre_label, dtype: int64

### Get the report

In [19]:
print('Classification Report for Google Cloud Vision API with Threshold (\'possible\')')
target_names = ['safe', 'not-safe']
metricsr_google = metrics.classification_report(
    df_test['label'], df_test['pre_label'], target_names=target_names, digits=4)
print(metricsr_google)

Classification Report for Google Cloud Vision API with Threshold ('possible')
              precision    recall  f1-score   support

        safe     0.7332    0.9719    0.8358      3659
    not-safe     0.3133    0.0350    0.0630      1341

    accuracy                         0.7206      5000
   macro avg     0.5233    0.5034    0.4494      5000
weighted avg     0.6206    0.7206    0.6286      5000



## Yahoo Open NSFW

In [None]:
print('Classification Report for Yahoo Open NSFW:')
df_yahoo = pd.read_csv('./results/df_yahoo.csv')
df_yahoo['pred_yh'] = df_yahoo['pred_yh_value'].apply(
    lambda x: 1 if x >= 0.5 else 0)
target_names = ['safe', 'not-safe']
print(metrics.classification_report(
    df_yahoo['label'], df_yahoo['pred_yh'], target_names=target_names, digits=4))

Classification Report for Yahoo Open NSFW:
              precision    recall  f1-score   support

        safe     0.5051    0.9802    0.6667       253
    not-safe     0.4444    0.0162    0.0312       247

    accuracy                         0.5040       500
   macro avg     0.4748    0.4982    0.3490       500
weighted avg     0.4751    0.5040    0.3528       500



## Clarifai NSFW API

In [20]:
%env GRPC_DEFAULT_SSL_ROOTS_FILE_PATH = /home/keyan/jupter/hateful_memes-main/roots.pem

In [21]:
from clarifai_grpc.channel.clarifai_channel import ClarifaiChannel
from clarifai_grpc.grpc.api import resources_pb2, service_pb2, service_pb2_grpc
from clarifai_grpc.grpc.api.status import status_pb2, status_code_pb2

channel = ClarifaiChannel.get_grpc_channel()
stub = service_pb2_grpc.V2Stub(channel)
metadata = (('authorization', api_keys.CLARIFAI_API_KEY),)

In [22]:
def clarifai_api(path):
    time.sleep(0.2)
    with open(path, "rb") as f:
        file_bytes = f.read()

    request = service_pb2.PostModelOutputsRequest(
        # This is the model ID of a publicly available General model.
        # You may use any other public or custom model ID.
        model_id='e9576d86d2004ed1a38ba0cf39ecb4b1',
        inputs=[
            resources_pb2.Input(
                data=resources_pb2.Data(
                    image=resources_pb2.Image(
                        base64=file_bytes
                    )
                )
            )
        ])
    response = stub.PostModelOutputs(request, metadata=metadata)

    if response.status.code != status_code_pb2.SUCCESS:
        raise Exception("Post model outputs failed, status: " +
                        response.status.description)
    # for concept in response.outputs[0].data.concepts:
    #     print('%12s: %.2f' % (concept.name, concept.value))

    # if response.outputs[0].data.concepts[1].value >= 0.4:
    #     print('Image: {}, Offensive: {:.2f}'.format(path, response.outputs[0].data.concepts[1].value))
    # else:
    #     print('Image: {}, not-Offensive: {:.2f}'.format(path, response.outputs[0].data.concepts[1].value))

    return response.outputs[0].data.concepts[1].name, response.outputs[0].data.concepts[1].value

In [23]:
df_clarifai = df.copy()
header = ["id", "img", "label"]
df_clarifai.to_csv('./df_clarifai.csv', columns=header)
# print(df_clarifai)

In [25]:
df_c = pd.read_csv('./df_clarifai.csv')
df_c = df_c.loc[:, ~df_c.columns.str.contains('^Unnamed')]
# print(df_c)

In [26]:
df_c1 = df_c[:500]
df_c2 = df_c[500:1000]
df_c3 = df_c[1000:1500]
df_c4 = df_c[1500:2000]
df_c5 = df_c[2000:2500]
df_c6 = df_c[2500:3000]
df_c7 = df_c[3000:3500]
df_c8 = df_c[3500:4000]
df_c9 = df_c[4000:4500]
df_c10 = df_c[4500:5000]

# df_c10

In [27]:
tqdm.pandas()
df_c1['pred_cf'] = df_c1['img'].progress_apply(clarifai_api)
df_c1.to_csv('./results/df_clarifai.csv')
df_c2['pred_cf'] = df_c2['img'].progress_apply(clarifai_api)
df_c2.to_csv('./results/df_clarifai.csv', mode='a', header=False)
df_c3['pred_cf'] = df_c3['img'].progress_apply(clarifai_api)
df_c3.to_csv('./results/df_clarifai.csv', mode='a', header=False)
df_c4['pred_cf'] = df_c4['img'].progress_apply(clarifai_api)
df_c4.to_csv('./results/df_clarifai.csv', mode='a', header=False)
df_c5['pred_cf'] = df_c5['img'].progress_apply(clarifai_api)
df_c5.to_csv('./results/df_clarifai.csv', mode='a', header=False)
df_c6['pred_cf'] = df_c6['img'].progress_apply(clarifai_api)
df_c6.to_csv('./results/df_clarifai.csv', mode='a', header=False)
df_c7['pred_cf'] = df_c7['img'].progress_apply(clarifai_api)
df_c7.to_csv('./results/df_clarifai.csv', mode='a', header=False)
df_c8['pred_cf'] = df_c8['img'].progress_apply(clarifai_api)
df_c8.to_csv('./results/df_clarifai.csv', mode='a', header=False)
df_c9['pred_cf'] = df_c9['img'].progress_apply(clarifai_api)
df_c9.to_csv('./results/df_clarifai.csv', mode='a', header=False)

df_c10['pred_cf'] = df_c10['img'].progress_apply(clarifai_api)
df_c10.to_csv('./results/df_clarifai.csv', mode='a', header=False)

In [28]:
df_clarifai = pd.read_csv('./results/df_clarifai.csv')
df_clarifai = df_clarifai.loc[:, ~df_clarifai.columns.str.contains('^Unnamed')]

In [29]:
df_clarifai[['nsfw', 'pred_cf']
            ] = df_clarifai['pred_cf'].str.split(',', expand=True)

In [30]:
df_clarifai = df_clarifai.drop(columns='nsfw')
df_clarifai['pred_cf'] = df_clarifai['pred_cf'].replace(
    to_replace='\)', value="", regex=True)

In [31]:
df_clarifai = df_clarifai.sort_values('pred_cf', ascending=False)
df_clarifai['pred_cf'] = df_clarifai['pred_cf'].apply(float)
pd.set_option('display.float_format', lambda x: '%0.5f' % x)
df_clarifai['pred_cf']

504    0.00010
2839   0.00010
4348   0.00010
3134   0.00010
2339   0.00000
         ...  
1071   0.00010
3871   0.00010
1183   0.00010
454    0.00010
4221   0.00010
Name: pred_cf, Length: 5000, dtype: float64

In [32]:
# If we set the threashold as 0.5
df_clarifai['pred_clar'] = df_clarifai['pred_cf'].apply(
    lambda x: 1 if x >= 0.5 else 0)
print('Classification Report for Clarifai NSFW API with Treshhold (0.5):')
target_names = ['safe', 'not-safe']
metrics_clarifai_05 = metrics.classification_report(
    df_clarifai['label'], df_clarifai['pred_clar'], target_names=target_names, digits=4)
print(metrics_clarifai_05)

Classification Report for Clarifai NSFW API with Treshhold (0.5):
              precision    recall  f1-score   support

        safe     0.7318    1.0000    0.8451      3659
    not-safe     0.0000    0.0000    0.0000      1341

    accuracy                         0.7318      5000
   macro avg     0.3659    0.5000    0.4226      5000
weighted avg     0.5355    0.7318    0.6185      5000



In [33]:
# If we set the threashold as 0.4
df_clarifai['pred_clar'] = df_clarifai['pred_cf'].apply(
    lambda x: 1 if x >= 0.4 else 0)
print('Classification Report for Clarifai NSFW API with Treshhold (0.4):')
target_names = ['safe', 'not-safe']
metrics_clarifai_04 = metrics.classification_report(
    df_clarifai['label'], df_clarifai['pred_clar'], target_names=target_names, digits=4)
print(metrics_clarifai_04)

Classification Report for Clarifai NSFW API with Treshhold (0.4):
              precision    recall  f1-score   support

        safe     0.7328    0.9954    0.8441      3659
    not-safe     0.4333    0.0097    0.0190      1341

    accuracy                         0.7310      5000
   macro avg     0.5831    0.5025    0.4315      5000
weighted avg     0.6525    0.7310    0.6228      5000



## DeepAI API

In [34]:
def deep_ai_api(path):
    deep_ai_request = requests.post(
        'https://api.deepai.org/api/content-moderation',
        files={
            'image': open(path, 'rb'),
        },
        headers={'api-key': api_keys.DEEPAI_API_KEY}
    )
    # print(deep_ai_request.json())
    nsfw_score = deep_ai_request.json()['output']['nsfw_score']
#    print('Image: {}, nsfw_score: {:.2f}'.format(path, nsfw_score))
    return nsfw_score

In [35]:
df_deepai = df.copy()

tqdm.pandas()
df_deepai['pred_da_value'] = df_deepai['img'].progress_apply(deep_ai_api)

In [36]:
df_deepai.to_csv('./results/df_deepai.csv')
print(df_deepai)

In [37]:
df_deepai = pd.read_csv('./results/df_deepai.csv')
# threshold set as 0.5
print('Classification Report for DeepAI API with Treshold (0.5):')
df_deepai['pred_da'] = df_deepai['pred_da_value'].apply(
    lambda x: 1 if x >= 0.5 else 0)
target_names = ['safe', 'not-safe']
metrics_deepai_05 = metrics.classification_report(
    df_deepai['label'], df_deepai['pred_da'], target_names=target_names, digits=4)
print(metrics_deepai_05)

Classification Report for DeepAI API with Treshold (0.5):
              precision    recall  f1-score   support

        safe     0.7319    0.9932    0.8428      3659
    not-safe     0.2857    0.0075    0.0145      1341

    accuracy                         0.7288      5000
   macro avg     0.5088    0.5003    0.4286      5000
weighted avg     0.6123    0.7288    0.6206      5000



In [38]:
# threshold set as 0.4
print('Classification Report for DeepAI API with Treshold (0.4):')
df_deepai['pred_da'] = df_deepai['pred_da_value'].apply(
    lambda x: 1 if x >= 0.4 else 0)
target_names = ['safe', 'not-safe']
metrics_deepai_04 = metrics.classification_report(
    df_deepai['label'], df_deepai['pred_da'], target_names=target_names, digits=4)
print(metrics_deepai_04)

Classification Report for DeepAI API with Treshold (0.4):
              precision    recall  f1-score   support

        safe     0.7323    0.9913    0.8423      3659
    not-safe     0.3191    0.0112    0.0216      1341

    accuracy                         0.7284      5000
   macro avg     0.5257    0.5012    0.4320      5000
weighted avg     0.6215    0.7284    0.6222      5000



## Amazon Rekognition

In [40]:
import boto3

In [41]:
def amz_reko_api_content(path):
    with open(path, "rb") as f:
        file_bytes = f.read()

    client = boto3.client('rekognition')

    response = client.detect_moderation_labels(Image={'Bytes': file_bytes})

    if len(response['ModerationLabels']) == 0:
        #print('Image: {}, SAFE'.format(path))
        return 0

    for item in response['ModerationLabels']:
        # Once amz_rek recognizes any hate symbol we return the value as 1
        if item['Name'] == 'Hate Symbols':
            #print('Image: {}, Hateful'.format(path))
            return 1

    return 0

In [42]:
df_amz_hateful = df.copy()

tqdm.pandas()
df_amz_hateful['pred_amz_value'] = df_amz_hateful['img'].progress_apply(
    amz_reko_api_content)
df_amz_hateful.to_csv('./results/df_amz_hateful.csv')
# print(df_amz_hateful)

In [43]:
df_amz_hateful = pd.read_csv('./results/df_amz_hateful.csv')
df_amz_hateful.sort_values('pred_amz_value',ascending=False)

Unnamed: 0.1,Unnamed: 0,id,img,label,pred_amz_value
2823,2823,155,./annot_2000_no_politics/EfAGhDPUMAE6jQW.jpg,1,1
4110,4110,179,./annot_2000_no_politics/EfuBFdTUEAAUYYM.jpg,1,1
726,726,104,./annot_2000_no_politics/EU6Red0UwAINz7Q.png,0,1
2432,2432,2292,./annot_3000_hate/EfuybhzU4AAogni.jpg,0,1
3492,3492,1716,./annot_2000_no_politics/EV7AjuPU4AIsVMa.jpg,0,1
...,...,...,...,...,...
1667,1667,2894,./annot_3000_hate/Ez4fAOKXMAAu45A.jpg,0,0
1666,1666,505,./annot_2000_no_politics/EP49fQ6XsAIX6N_.jpg,0,0
1665,1665,1238,./annot_2000_no_politics/EoWelRoVgAE6n2K.jpg,0,0
1664,1664,1146,./annot_3000_hate/Eyx_tjEUcAUiu0C.jpg,1,0


In [44]:
print('Classification Report for Amazon Rekognition on Hate Content:')
target_names = ['safe', 'not-safe ']
metrics_amazon = metrics.classification_report(
    df_amz_hateful['label'], df_amz_hateful['pred_amz_value'], target_names=target_names, digits=4)
print(metrics_amazon)

Classification Report for Amazon Rekognition on Hate Content:
              precision    recall  f1-score   support

        safe     0.7331    0.9932    0.8435      3659
   not-safe      0.4186    0.0134    0.0260      1341

    accuracy                         0.7304      5000
   macro avg     0.5759    0.5033    0.4348      5000
weighted avg     0.6488    0.7304    0.6243      5000



## Data Combination

In [45]:
df['pred_gv'] = df_test['pre_label']
# df['pred_yh'] = df_yahoo['pred_yh']
df['pred_cl'] = df_clarifai['pred_clar']
df['pred_da'] = df_deepai['pred_da']
df['pred_amz_value'] = df_amz_hateful['pred_amz_value']

In [46]:
df['pred_total'] = df['pred_gv'] + \
    df['pred_cl'] + df['pred_da'] + df['pred_amz_value']

In [47]:
final_df = df.sort_values(by=['id'], ascending=True)
final_df.to_csv('./results/comparison.csv', index=False)

In [78]:
df_pos = df[df.label.eq(1)]
df_pos_rows = df_pos.shape[0]
print(df_pos_rows)
df_false_neg = df_pos[df_pos.pred_total.eq(0)]
df_false_neg_rows = df_false_neg.shape[0]
print(df_false_neg_rows)
pass_rate = df_false_neg_rows / df_pos_rows
print(pass_rate)

1341
1129
0.8419090231170768


In [79]:
df_neg = df[df.label.eq(0)]
df_neg_rows = df_neg.shape[0]
print(df_neg_rows)
df_false_pos = df_neg[df_neg.pred_total.gt(0)]
df_false_pos_rows = df_false_pos.shape[0]
print(df_false_pos_rows)
false_rate = df_false_pos_rows / df_neg_rows
print(false_rate)

3659
587
0.1604263459961738


In [88]:
# print('google:\n',metricsr_google,'\nclarifai:\n', metrics_clarifai_04, metrics_clarifai_05 ,'\ndeep AI:\n', metrics_deepai_04, metrics_deepai_05 ,'\nAmazon:',metrics_amazon)
print('Measurement of Google Cloud API:\n', metricsr_google)
print('======================================================\n')
print('Measurement of Amazon Rekognition API:\n', metrics_amazon)
print('======================================================\n')
print('Measurement of Clarifai NSFW API (Threshold 0.4):\n', metrics_clarifai_04)
print('Measurement of Clarifai NSFW API (Threshold 0.5):\n', metrics_clarifai_05)
print('======================================================\n')
print('Measurment 0f Deep AI NSFW API (Threshold 0.4):\n', metrics_deepai_04)
print('Measurment 0f Deep AI NSFW API (Threshold 0.5):\n', metrics_deepai_05)


Measurement of Google Cloud API:
               precision    recall  f1-score   support

        safe     0.7358    0.8653    0.7953      3659
    not-safe     0.2927    0.1521    0.2002      1341

    accuracy                         0.6740      5000
   macro avg     0.5142    0.5087    0.4977      5000
weighted avg     0.6169    0.6740    0.6357      5000


Measurement of Amazon Rekognition API:
               precision    recall  f1-score   support

        safe     0.7331    0.9932    0.8435      3659
   not-safe      0.4186    0.0134    0.0260      1341

    accuracy                         0.7304      5000
   macro avg     0.5759    0.5033    0.4348      5000
weighted avg     0.6488    0.7304    0.6243      5000


Measurement of Clarifai NSFW API (Threshold 0.4):
               precision    recall  f1-score   support

        safe     0.7328    0.9954    0.8441      3659
    not-safe     0.4333    0.0097    0.0190      1341

    accuracy                         0.7310      5000
 

### table for safe lable prediction

| APIs | precision | recall | f1-score | 
| --- | --- | --- | --- |
| **Google Cloud Vision** |0.7332 | 0.9719 | 0.8358 |
| **Amazon Rekognition** | 0.7331 | 0.9932 | 0.8435 |
| **Clarifai NSFW**  | 0.7328 | 0.9954 | 0.8441 |
| **DeepAI NSFW** | 0.7323 | 0.9913 | 0.8423 |


### table for not-safe lable prediction

| APIs| precision | recall | f1-score | 
| --- | --- | --- | --- | 
| **Google Cloud Vision**| 0.3133| 0.0350 | 0.0630 |
| **Amazon Rekognition**| 0.4186 | 0.0134 | 0.0260 | 
| **Clarifai NSFW (0.4)**| 0.4333| 0.0097 | 0.0190 |
| **Clarifai NSFW (0.5)**| 0.0000| 0.0000 | 0.0000 |
| **DeepAI NSFW (0.4)** | 0.3191| 0.0112 | 0.0216 |
| **DeepAI NSFW (0.5)** | 0.2857| 0.0075 | 0.0145 |
