In [2]:
import boto3
import logging
import requests
import pprint
import logging
from rekognition_image_detection import RekognitionImage
from rekognition_objects import show_bounding_boxes, show_polygons

In [None]:
bucket_name = "reliabl-image-labeling-demo"

logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
rekognition_client = boto3.client("rekognition")

INFO: Found credentials in shared credentials file: ~/.aws/credentials


In [None]:
#############
# start model

# adapted from https://github.com/awsdocs/amazon-rekognition-custom-labels-developer-guide/blob/master/LICENSE-SAMPLECODE

import boto3

def start_model(project_arn, model_arn, version_name, min_inference_units):

    client=boto3.client('rekognition')

    try:
        # Start the model
        print('Starting model: ' + model_arn)
        response=client.start_project_version(ProjectVersionArn=model_arn, MinInferenceUnits=min_inference_units)
        # Wait for the model to be in the running state
        project_version_running_waiter = client.get_waiter('project_version_running')
        project_version_running_waiter.wait(ProjectArn=project_arn, VersionNames=[version_name])

        #Get the running status
        describe_response=client.describe_project_versions(ProjectArn=project_arn,
            VersionNames=[version_name])
        for model in describe_response['ProjectVersionDescriptions']:
            print("Status: " + model['Status'])
            print("Message: " + model['StatusMessage']) 
    except Exception as e:
        print(e)
        
    print('Done...')

project_arn='arn:aws:rekognition:us-west-2:643735647393:project/reliabl-labeling-demo/1735924965950'
model_arn='arn:aws:rekognition:us-west-2:643735647393:project/reliabl-labeling-demo/version/reliabl-labeling-demo.2025-01-10T21.13.06/1736561586865'
min_inference_units=1 
version_name='reliabl-labeling-demo.2025-01-10T21.13.06'
start_model(project_arn, model_arn, version_name, min_inference_units)


Starting model: arn:aws:rekognition:us-west-2:643735647393:project/reliabl-labeling-demo/version/reliabl-labeling-demo.2025-01-10T21.13.06/1736561586865
Status: RUNNING
Message: The model is running.
Done...


In [None]:
#adapted from https://github.com/awsdocs/amazon-rekognition-custom-labels-developer-guide/blob/master/LICENSE-SAMPLECODE

import boto3
import io
from PIL import Image, ImageDraw, ExifTags, ImageColor, ImageFont

def display_image(bucket,photo,response):
    # Load image from S3 bucket
    s3_connection = boto3.resource('s3')

    s3_object = s3_connection.Object(bucket,photo)
    s3_response = s3_object.get()

    stream = io.BytesIO(s3_response['Body'].read())
    image=Image.open(stream)

    # Ready image to draw bounding boxes on it.
    imgWidth, imgHeight = image.size
    draw = ImageDraw.Draw(image)

    # calculate and display bounding boxes for each detected custom label
    print('Detected custom labels for ' + photo)
    for customLabel in response['CustomLabels']:
        print('Label ' + str(customLabel['Name']))
        print('Confidence ' + str(customLabel['Confidence']))
        if 'Geometry' in customLabel:
            box = customLabel['Geometry']['BoundingBox']
            left = imgWidth * box['Left']
            top = imgHeight * box['Top']
            width = imgWidth * box['Width']
            height = imgHeight * box['Height']

            fnt = ImageFont.truetype('/Library/Fonts/Arial.ttf', 50)
            draw.text((left,top), customLabel['Name'], fill='#00d400', font=fnt)

            print('Left: ' + '{0:.0f}'.format(left))
            print('Top: ' + '{0:.0f}'.format(top))
            print('Label Width: ' + "{0:.0f}".format(width))
            print('Label Height: ' + "{0:.0f}".format(height))

            points = (
                (left,top),
                (left + width, top),
                (left + width, top + height),
                (left , top + height),
                (left, top))
            draw.line(points, fill='#00d400', width=5)

    image.show()

def show_custom_labels(model,bucket,photo, min_confidence):
    client=boto3.client('rekognition')

    #Call DetectCustomLabels
    response = client.detect_custom_labels(Image={'S3Object': {'Bucket': bucket, 'Name': photo}},
        MinConfidence=min_confidence,
        ProjectVersionArn=model)

    # For object detection use case, uncomment below code to display image.
    # display_image(bucket,photo,response)

    return response['CustomLabels']

## gather test set for binary moderation task (suggestive / non_suggestive)

In [None]:
import pandas as pd

manifest_file = "./manifest/suggestive-non-suggestive_test_2025-01-10T21:09:29.103691.csv"
test_set = pd.read_csv(manifest_file, names=["filename", "label"])
test_set

Unnamed: 0,filename,label
0,BreastExam/dreamstimemaximum_250259473.jpg,non_suggestive
1,BreastExam/dreamstimemaximum_207123274.jpg,non_suggestive
2,BreastExam/dreamstimemaximum_133939417.jpg,non_suggestive
3,BreastExam/dreamstimemaximum_96797189.jpg,non_suggestive
4,BreastExam/dreamstimemaximum_94323393.jpg,non_suggestive
5,BreastExam/dreamstimemaximum_94306606.jpg,non_suggestive
6,BreastExam/dreamstimemaximum_59736182.jpg,non_suggestive
7,Menstruation/What_are_heavy_periods_Kk8hREp.wi...,non_suggestive
8,ReproductiveHealth/dreamstimemaximum_45400686.jpg,non_suggestive
9,Suggestive/dreamstimemaximum_194102913.jpg,suggestive


## label test set images using custom model 

In [None]:
# loop through all eval files and write results
model='arn:aws:rekognition:us-west-2:643735647393:project/reliabl-labeling-demo/version/reliabl-labeling-demo.2025-01-10T21.13.06/1736561586865'
min_confidence=50

eval_labels = []
for image_name, label in test_set.itertuples(index=False):
    print(f"image, label: {image_name}, {label}")

    photo=f'resized/{image_name}'

    labels = show_custom_labels(model, bucket_name, photo, min_confidence)
    eval_labels.append(labels)
    print("Custom labels detected: " + str(labels))


image, label: BreastExam/dreamstimemaximum_250259473.jpg, non_suggestive
Custom labels detected: [{'Name': 'non_suggestive', 'Confidence': 92.67500305175781}]
image, label: BreastExam/dreamstimemaximum_207123274.jpg, non_suggestive
Custom labels detected: [{'Name': 'non_suggestive', 'Confidence': 96.83799743652344}]
image, label: BreastExam/dreamstimemaximum_133939417.jpg, non_suggestive
Custom labels detected: [{'Name': 'non_suggestive', 'Confidence': 98.40599822998047}]
image, label: BreastExam/dreamstimemaximum_96797189.jpg, non_suggestive
Custom labels detected: [{'Name': 'non_suggestive', 'Confidence': 78.85700225830078}]
image, label: BreastExam/dreamstimemaximum_94323393.jpg, non_suggestive
Custom labels detected: [{'Name': 'non_suggestive', 'Confidence': 87.16799926757812}]
image, label: BreastExam/dreamstimemaximum_94306606.jpg, non_suggestive
Custom labels detected: [{'Name': 'non_suggestive', 'Confidence': 98.98600006103516}]
image, label: BreastExam/dreamstimemaximum_597361

In [None]:
# make sure evaluation labels populated correctly 
eval_labels[0]

[{'Name': 'non_suggestive', 'Confidence': 92.67500305175781}]

### assess accuracy (TP/FP/TN/FN) of custom moderation labels

In [None]:
test_set.loc[:, "custom_model_response"] = eval_labels

test_set.loc[:, "custom_model_label"] = [[xi["Name"] for xi in x] for x in eval_labels]
test_set.loc[:, "custom_model_confidence"] = [[xi["Confidence"] for xi in x] for x in eval_labels]

def r(true_label, inferred_label):
    if true_label == "non_suggestive" and inferred_label == "non_suggestive":
        return "TN"
    if true_label == "suggestive" and inferred_label == "non_suggestive":
        return "FN"
    if true_label == "non_suggestive" and inferred_label == "suggestive":
        return "FP"
    if true_label == "suggestive" and inferred_label == "suggestive":
        return "TP"

test_set.loc[:, "custom_model_quality"] = test_set.apply(lambda x: r(x["label"], x["custom_model_label"][0]), axis=1)


In [None]:
# merge in aws label results
moderation_label_file = "./labeling_results/moderation_labels.tsv"
aws_mod_results = pd.read_csv(moderation_label_file, names=["image", "aws_labels", "error"], sep="\t")
aws_mod_results.loc[:, "image_name"] = aws_mod_results["image"].apply(lambda x: x[len("resized/"):])
test_set = test_set.merge(aws_mod_results[["image_name", "aws_labels"]], how="left", left_on="filename", right_on="image_name")

test_set.to_csv("./evaluation_results/aws_custom_model_results.csv")


In [60]:
test_set.head()

Unnamed: 0,filename,label,custom_model_response,custom_model_label,custom_model_confidence,custom_model_quality,image_name,aws_labels
0,BreastExam/dreamstimemaximum_250259473.jpg,non_suggestive,"[{'Name': 'non_suggestive', 'Confidence': 92.6...",[non_suggestive],[92.67500305175781],TN,BreastExam/dreamstimemaximum_250259473.jpg,[]
1,BreastExam/dreamstimemaximum_207123274.jpg,non_suggestive,"[{'Name': 'non_suggestive', 'Confidence': 96.8...",[non_suggestive],[96.83799743652344],TN,BreastExam/dreamstimemaximum_207123274.jpg,[]
2,BreastExam/dreamstimemaximum_133939417.jpg,non_suggestive,"[{'Name': 'non_suggestive', 'Confidence': 98.4...",[non_suggestive],[98.40599822998047],TN,BreastExam/dreamstimemaximum_133939417.jpg,[]
3,BreastExam/dreamstimemaximum_96797189.jpg,non_suggestive,"[{'Name': 'non_suggestive', 'Confidence': 78.8...",[non_suggestive],[78.85700225830078],TN,BreastExam/dreamstimemaximum_96797189.jpg,[]
4,BreastExam/dreamstimemaximum_94323393.jpg,non_suggestive,"[{'Name': 'non_suggestive', 'Confidence': 87.1...",[non_suggestive],[87.16799926757812],TN,BreastExam/dreamstimemaximum_94323393.jpg,[]


In [47]:
aws_mod_results.head()

Unnamed: 0,image,aws_labels,error,image_name
0,resized/.DS_Store,[],An error occurred (InvalidImageFormatException...,.DS_Store
1,resized/BreastExam/dreamstimemaximum_103407046...,"[{name"": ""Emaciated Bodies"", ""parent_name"": ""D...",,BreastExam/dreamstimemaximum_103407046.jpg
2,resized/BreastExam/dreamstimemaximum_103761283...,"[{name"": ""Swimwear or Underwear"", ""parent_name...",,BreastExam/dreamstimemaximum_103761283.jpg
3,resized/BreastExam/dreamstimemaximum_118626718...,"[{name"": ""Non-Explicit Nudity of Intimate part...",,BreastExam/dreamstimemaximum_118626718.jpg
4,resized/BreastExam/dreamstimemaximum_133939417...,[],,BreastExam/dreamstimemaximum_133939417.jpg


## apply labels for custom classification model for womens health categories (not assessed for accuracy here)

In [7]:
# loop through all eval files and write results
model='arn:aws:rekognition:us-west-2:643735647393:project/reliabl-labeling-demo-all-labels/version/reliabl-labeling-demo-all-labels.2025-01-10T21.12.46/1736561566379'
min_confidence=50

custom_labels = []
image_names = []
for image_name, label in test_set.itertuples(index=False):
    print(f"image, label: {image_name}, {label}")

    photo=f'resized/{image_name}'

    labels = show_custom_labels(model,bucket_name,photo, min_confidence)
    custom_labels.append(labels)
    image_names.append(image_name)

    print("Custom labels detected: " + str(labels))


image, label: BreastExam/dreamstimemaximum_250259473.jpg, non_suggestive
Custom labels detected: [{'Name': 'health_context', 'Confidence': 73.44999694824219}, {'Name': 'breast_exam', 'Confidence': 65.76499938964844}, {'Name': 'pregnancy_visible', 'Confidence': 63.551002502441406}, {'Name': 'underwear_visible', 'Confidence': 62.84199905395508}, {'Name': 'non_suggestive', 'Confidence': 60.000003814697266}, {'Name': 'pregant_belly_exposed', 'Confidence': 60.000003814697266}, {'Name': 'suggestive', 'Confidence': 58.34000015258789}]
image, label: BreastExam/dreamstimemaximum_207123274.jpg, non_suggestive
Custom labels detected: [{'Name': 'non_suggestive', 'Confidence': 91.05500030517578}, {'Name': 'pregant_belly_exposed', 'Confidence': 85.73500061035156}, {'Name': 'health_context', 'Confidence': 77.1259994506836}, {'Name': 'pregnancy_visible', 'Confidence': 71.91799926757812}, {'Name': 'breast_exam', 'Confidence': 60.000003814697266}]
image, label: BreastExam/dreamstimemaximum_133939417.jpg

In [8]:
# save all custom labels to tsv
import json
with open("./labeling_results/aws_custom_labels.tsv", "w") as f:
    for image, labels in zip(image_names, custom_labels):
        f.write(image + "\t" + json.dumps(labels) + "\n")


In [3]:
# merge these labels into the custom model results
results_file = pd.read_csv("./evaluation_results/aws_custom_model_results.csv")
custom_label_df = pd.read_csv("./labeling_results/aws_custom_labels.tsv", delimiter="\t", names=["image_name", "aws_custom_labels"])


results_file = results_file.merge(custom_label_df, left_on="filename", right_on="image_name", how="left")

# results_file.to_csv('./evaluation_results/final_results.csv')

In [None]:
# merge in google moderation results
import pandas as pd
google_mod_results = pd.read_csv("./labeling_results/google_moderation_labels.tsv", delimiter="\t", names=["image_name", "google_moderation_labels"])

results_file = results_file.merge(google_mod_results, left_on="filename", right_on="image_name", how="left")

results_file.to_csv('./evaluation_results/final_results.csv')