Step 1 : Setup

In [1]:
!pip install awscli --upgrade
!pip install botocore --upgrade
!pip install boto3 --upgrade
!pip install amazon-textract-response-parser

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com


In [2]:
import boto3
import botocore
import sagemaker as sm
import os
import io
import datetime
import uuid

# variables
data_bucket = sm.Session().default_bucket()
region = boto3.session.Session().region_name

os.environ["BUCKET"] = data_bucket
os.environ["REGION"] = region
role = sm.get_execution_role()

print(f"SageMaker role is: {role}\nDefault SageMaker Bucket: s3://{data_bucket}")

s3=boto3.client('s3')
textract = boto3.client('textract', region_name=region)
comprehend=boto3.client('comprehend', region_name=region)
sagemaker=boto3.client('sagemaker', region_name=region)
a2i=boto3.client('sagemaker-a2i-runtime', region_name=region)

SageMaker role is: arn:aws:iam::395127396906:role/service-role/AmazonSageMaker-ExecutionRole-20230419T120710
Default SageMaker Bucket: s3://sagemaker-us-east-1-395127396906


In [3]:
# JSON structure to hold the Page 1 extraction result
page1 = {
          "first_name": None,
          "last_name": None,
          "date_of_birth": None,
          "group_id": None,
          "subscriber_id": None,
          "requesting_provider" : None,
          "rendering_provider" : None
        }

In [7]:
#s3_key = 'idp/textract/pa_request_form.jpg'
#s3_key = 'idp/textract/pa_request_form_1.pdf'
s3_key = 'idp/textract/pa_request_form_3.pdf'

In [8]:
# Upload images to S3 bucket:
#!aws s3 cp a2idata/Pre-authorization-Request-Form-medical-services-TEST-DCN_231248700001.pdf s3://{data_bucket}/{s3_key} --only-show-errors
#!aws s3 cp a2idata/PA_20230425_111313_50cc0fde-963e-465e-8cbb-5df5a99fb46b.pdf s3://{data_bucket}/{s3_key} --only-show-errors
!aws s3 cp a2idata/Pre-authorization-Request-Form-medical-services-Abby.pdf s3://{data_bucket}/{s3_key} --only-show-errors

In [None]:
from IPython.display import IFrame
#documentName = "a2idata/PA_20230425_111313_50cc0fde-963e-465e-8cbb-5df5a99fb46b.pdf"
documentName = "a2idata/Pre-authorization-Request-Form-medical-services-Abby.pdf"
IFrame(documentName, width=600, height=300)


In [10]:
image_width, image_height = 600,300

In [11]:
import time


def startasyncJob(s3BucketName, filename):
    response = None
    response = textract.start_document_text_detection(
        DocumentLocation={
            'S3Object': {
                'Bucket': s3BucketName,
                'Name': filename
            }
        })

    return response["JobId"]


def startAsyncAnalysisJob(bucket_name, document_file_name, feature_types):
    response = None
    response = textract.start_document_analysis(
        DocumentLocation={
            'S3Object': {
                'Bucket': bucket_name,
                'Name': document_file_name
            }
        },
        FeatureTypes=feature_types,
        QueriesConfig={
            'Queries': [
                {
                    'Text': 'What is the First?',
                    'Alias': 'First_Name'
                },
                {
                    'Text': 'What is the Patient Name Last (Last)?',
                    'Alias': 'Last_Name'
                },
                {
                    'Text': 'What is the Date of Birth?',
                    'Alias': 'Date_Of_Birth'
                },
                {
                    'Text': 'What is the Patient''s Regence Member ID #?',
                    'Alias': 'Subscriber_ID'
                },
                {
                    'Text': 'What is the Group?',
                    'Alias': 'Group_ID'
                },
                {
                    'Text': 'Does PROVIDER INFORMATION Requesting/Prescribing Provider checked?',
                    'Alias': 'Requesting_Provider'
                },
                {
                    'Text': 'Does PROVIDER INFORMATION Rendering/Treating Provider Provider checked?',
                    'Alias': 'Rendering_Provider'
                },
                {
                    'Text': 'What is the  PROVIDER INFORMATION Provider Name?',
                    'Alias': 'Provider_Name'
                },
                {
                    'Text': 'What is the  PROVIDER INFORMATION Tax ID #?',
                    'Alias': 'Provider_Tax_ID'
                },
                {
                    'Text': 'What is the  PROVIDER INFORMATION NPI #?',
                    'Alias': 'Provider_NPI'
                },
                {
                    'Text': 'What is the  PROVIDER INFORMATION Mailing Address?',
                    'Alias': 'Provider_Mailing_Address'
                },
                {
                    'Text': 'What is the  PROVIDER INFORMATION City?',
                    'Alias': 'Provider_City'
                },
                {
                    'Text': 'What is the  PROVIDER INFORMATION State?',
                    'Alias': 'Provider_State'
                },
                {
                    'Text': 'What is the  PROVIDER INFORMATION ZIP Code?',
                    'Alias': 'Provider_ZipCode'
                },
                {
                    'Text': 'What is the  PROVIDER INFORMATION Provider Specialty?',
                    'Alias': 'Provider_Specialty'
                },
                {
                    'Text': 'What is the  PROVIDER INFORMATION Provider Email Address?',
                    'Alias': 'Provider_Email_Address'
                },
                {
                    'Text': 'What is the  Who should we contact if we require additional information? Name?',
                    'Alias': 'Contact_Info_Name'
                },
                {
                    'Text': 'What is the  Who should we contact if we require additional information? Phone #?',
                    'Alias': 'Contact_Info_Phone_No'
                },
                {
                    'Text': 'Does Who should we contact if we require additional information? Confidential Voice Mail checked?',
                    'Alias': 'Contact_Info_Confidential'
                },
                {
                    'Text': 'What is the  Who should we contact if we require additional information? Fax #?',
                    'Alias': 'Contact_Info_Fax_No'
                },

                {
                    'Text': 'What is the  Facility or Independent Laboratory Name?',
                    'Alias': 'Provider2_Name'
                },
                {
                    'Text': 'What is the Tax ID # at the bottom of page 1?',
                    'Alias': 'Provider2_Tax_ID'
                },
                {
                    'Text': 'What is the NPI # at the bottom of page 1?',
                    'Alias': 'Provider2_NPI'
                },
                {
                    'Text': 'What is the Mailing Address at the bottom of page 1?',
                    'Alias': 'Provider2_Mailing_Address'
                },
                {
                    'Text': 'What is the Fax # at the bottom of page 1?',
                    'Alias': 'Provider2_Fax_No'
                },

                {
                    'Text': 'What is the City at the bottom of page 1?',
                    'Alias': 'Provider2_City'
                },
                {
                    'Text': 'What is the State at the bottom of page 1?',
                    'Alias': 'Provider2_State'
                },
                {
                    'Text': 'What is the ZIP Code at the bottom of page 1?',
                    'Alias': 'Provider2_ZipCode'
                },
                {
                    'Text': 'What is the Phone # at the bottom of page 1?',
                    'Alias': 'Provider2_Phone_No'
                },
                {
                    'Text': 'Does Confidential Voice Mail checked at the bottom of page 1?',
                    'Alias': 'Provider2_Confidential'
                },
                 {
                    'Text': 'What is the PREAUTHORIZATION REQUEST Date of Service Anticipated Admission date?',
                    'Alias': 'PA_Date_Of_Service',
                    'Pages': ['2']
                },
                 {
                    'Text': 'What is value in PREAUTHORIZATION REQUEST Please check one?',
                    'Alias': 'PA_Request_Type',
                    'Pages': ['2']
                    },
                {
                    'Text': 'Does PREAUTHORIZATION REQUEST Outpatient Hospital checked?',
                    'Alias': 'PA_Request_OP',
                    'Pages': ['2']
                    },
                {
                    'Text': 'Does PREAUTHORIZATION REQUEST Inpatient checked?',
                    'Alias': 'PA_Request_IP',
                    'Pages': ['2']
                    },

                {
                    'Text': 'Does PREAUTHORIZATION REQUEST ASC checked?',
                    'Alias': 'PA_Request_ASC',
                    'Pages': ['2']
                    },

                {
                    'Text': 'Does PREAUTHORIZATION REQUEST Office checked?',
                    'Alias': 'PA_Request_Office',
                    'Pages': ['2']
                    },
                {
                    'Text': 'Does PREAUTHORIZATION REQUEST Other checked?',
                    'Alias': 'PA_Request_Other',
                    'Pages': ['2']
                    },
                {
                    'Text': 'What is PREAUTHORIZATION REQUEST Other text?',
                    'Alias': 'PA_Request_Other_Text',
                    'Pages': ['2']
                    },
                {
                    'Text': 'What is Primary Diagnosis code?',
                    'Alias': 'PA_Request_Primary_Diagnosis_Code',
                    'Pages': ['2']
                    }, {
                    'Text': 'What is Second Diagnosis code?',
                    'Alias': 'PA_Request_Second_Diagnosis_Code',
                    'Pages': ['2']
                    }, {
                    'Text': 'What is Third Diagnosis code?',
                    'Alias': 'PA_Request_Third_Diagnosis_Code',
                    'Pages': ['2']
                    },
               {
                    'Text': 'What is CPT or HCPCS code(s) and description(s)?',
                    'Alias': 'PA_Request_CPT_Code',
                    'Pages': ['2']
                    },
                


            ]
        }

    )

    return response["JobId"]


def isAsyncJobComplete(jobId):
    response = textract.get_document_text_detection(JobId=jobId)
    status = response["JobStatus"]
    print("Job status: {}".format(status))

    while (status == "IN_PROGRESS"):
        time.sleep(10)
        response = textract.get_document_text_detection(JobId=jobId)
        status = response["JobStatus"]
        print("Job status: {}".format(status))

    return status


def isAsyncAnalysisJobComplete(jobId):
    response = textract.get_document_analysis(JobId=jobId)
    status = response["JobStatus"]
    print("Job status: {}".format(status))

    while (status == "IN_PROGRESS"):
        time.sleep(10)
        response = textract.get_document_analysis(JobId=jobId)
        status = response["JobStatus"]
        print("Job status: {}".format(status))

    return status


def getAsyncJobResult(jobId):
    pages = []
    response = textract.get_document_text_detection(JobId=jobId)

    pages.append(response)
    ntoken = None
    if ('NextToken' in response):
        ntoken = response['NextToken']

    while (ntoken):
        response = textract.get_document_text_detection(JobId=jobId, NextToken=ntoken)

        pages.append(response)
        print("Resultset page recieved: {}".format(len(pages)))
        nextToken = None
        if ('NextToken' in response):
            ntoken = response['NextToken']

    return pages


def getAsyncAnalysisJobResult(jobId):
    pages = []
    response = textract.get_document_analysis(JobId=jobId)

    pages.append(response)
    ntoken = None
    if ('NextToken' in response):
        ntoken = response['NextToken']

    while (ntoken):
        response = textract.get_document_analysis(JobId=jobId, NextToken=ntoken)

        pages.append(response)
        print("Resultset page recieved: {}".format(len(pages)))
        nextToken = None
        if ('NextToken' in response):
            ntoken = response['NextToken']

    return pages


In [12]:
#jobId = startasyncJob(data_bucket, s3_key)
#print("Started job with id: {}".format(jobId))
#if(isAsyncJobComplete(jobId)):
#    response = getAsyncJobResult(jobId)

# Print detected text
#for resultPage in response:
#    for item in resultPage["Blocks"]:
#        if item["BlockType"] == "LINE":
#            print ('\033[94m' +  item["Text"] + '\033[0m')

In [13]:
analysis_jobId = startAsyncAnalysisJob(data_bucket, s3_key, ["QUERIES", "FORMS"])
print("Started job with id: {}".format(analysis_jobId))
if(isAsyncAnalysisJobComplete(analysis_jobId)):
    response = getAsyncAnalysisJobResult(analysis_jobId)
    
    

Started job with id: 9153c851d32abb5fb31150218a249c6282e0fd401b22626e1560785728624712
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED


In [14]:
import json, os
with open('textract-response-1.json','w') as f:
    f.write(json.dumps(response))

In [15]:
# print(response[0]["Blocks"])

In [16]:
# Utility functions to allocate fields from Textract response JSON
# Find the Query item in block. Return text and confidence score
# return tuple contains (parsed_value, confidence_score, raw_block)
def get_query_ref(id):
    for b in response[0]["Blocks"]:
        if b["BlockType"] == "QUERY_RESULT" and b["Id"] == id:
            return {
                        "value": b.get("Text"), 
                        "confidence": b.get("Confidence"), 
                        "block": b
                    }
    return None
        
def get_query_answer(q_alias):
    for b in response[0]["Blocks"]:
        if b["BlockType"] == "QUERY" and b["Query"]["Alias"] == q_alias and "Relationships" in b:
                ref_id = b["Relationships"][0]["Ids"][0]
                
                return get_query_ref(ref_id)
    return None



page1['first_name'] = get_query_answer('First_Name')
page1['last_name'] = get_query_answer('Last_Name')
page1['date_of_birth'] = get_query_answer('Date_Of_Birth')
page1['subscriber_id'] = get_query_answer('Subscriber_ID')
page1['group_id'] = get_query_answer('Group_ID')
page1['requesting_provider'] = get_query_answer('Requesting_Provider')
page1['rendering_provider'] = get_query_answer('Rendering_Provider')

page1['provider_name'] = get_query_answer('Provider_Name')
page1['provider_tax_Id'] = get_query_answer('Provider_Tax_ID')
page1['provider_NPI'] = get_query_answer('Provider_NPI')
page1['provider_mailing_address'] = get_query_answer('Provider_Mailing_Address')
page1['provider_city'] = get_query_answer('Provider_City')
page1['provider_state'] = get_query_answer('Provider_State')
page1['provider_zipCode'] = get_query_answer('Provider_ZipCode')
page1['provider_specialty'] = get_query_answer('Provider_Specialty')
page1['provider_email_address'] = get_query_answer('Provider_Email_Address')

page1['contact_info_name'] = get_query_answer('Contact_Info_Name')
page1['contact_info_phone_no'] = get_query_answer('Contact_Info_Phone_No')
page1['contact_info_confidential'] = get_query_answer('Contact_Info_Confidential')
page1['contact_info_fax_no'] = get_query_answer('Contact_Info_Fax_No')

page1['Provider2_Name'] = get_query_answer('Provider2_Name')
page1['Provider2_Tax_ID'] = get_query_answer('Provider2_Tax_ID')
page1['Provider2_NPI'] = get_query_answer('Provider2_NPI')
page1['Provider2_Mailing_Address'] = get_query_answer('Provider2_Mailing_Address')
page1['Provider2_City'] = get_query_answer('Provider2_City')
page1['Provider2_State'] = get_query_answer('Provider2_State')
page1['Provider2_ZipCode'] = get_query_answer('Provider2_ZipCode')
page1['Provider2_Phone_No'] = get_query_answer('Provider2_Phone_No')
page1['Provider2_Confidential'] = get_query_answer('Provider2_Confidential')

page1['PA_Date_Of_Service'] = get_query_answer('PA_Date_Of_Service')
page1['PA_Request_Type'] = get_query_answer('PA_Request_Type')
page1['PA_Request_OP'] = get_query_answer('PA_Request_OP')
page1['PA_Request_IP'] = get_query_answer('PA_Request_IP')
page1['PA_Request_ASC'] = get_query_answer('PA_Request_ASC')
page1['PA_Request_Office'] = get_query_answer('PA_Request_Office')
page1['PA_Request_Other'] = get_query_answer('PA_Request_Other')
page1['PA_Request_Other_Text'] = get_query_answer('PA_Request_Other_Text')

page1['PA_Request_Primary_Diagnosis_Code'] = get_query_answer('PA_Request_Primary_Diagnosis_Code')
page1['PA_Request_Second_Diagnosis_Code'] = get_query_answer('PA_Request_Second_Diagnosis_Code')
page1['PA_Request_Third_Diagnosis_Code'] = get_query_answer('PA_Request_Third_Diagnosis_Code')
page1['PA_Request_CPT_Code'] = get_query_answer('PA_Request_CPT_Code')

page1

{'first_name': {'value': 'Abby',
  'confidence': 100.0,
  'block': {'BlockType': 'QUERY_RESULT',
   'Confidence': 100.0,
   'Text': 'Abby',
   'Geometry': {'BoundingBox': {'Width': 0.033686563372612,
     'Height': 0.011349360458552837,
     'Left': 0.41728124022483826,
     'Top': 0.3723084330558777},
    'Polygon': [{'X': 0.41728124022483826, 'Y': 0.37231865525245667},
     {'X': 0.45096173882484436, 'Y': 0.3723084330558777},
     {'X': 0.45096781849861145, 'Y': 0.3836476504802704},
     {'X': 0.4172872006893158, 'Y': 0.3836578130722046}]},
   'Id': '98a3917a-6fcb-4e13-9cb7-0485f6894466',
   'Page': 1}},
 'last_name': {'value': 'Anesthesia',
  'confidence': 97.0,
  'block': {'BlockType': 'QUERY_RESULT',
   'Confidence': 97.0,
   'Text': 'Anesthesia',
   'Geometry': {'BoundingBox': {'Width': 0.07253677397966385,
     'Height': 0.00935946311801672,
     'Left': 0.0325758270919323,
     'Top': 0.37241339683532715},
    'Polygon': [{'X': 0.0325758270919323, 'Y': 0.37243539094924927},
   

In [17]:
from utils.common import getformkeyvalue
get_form_keys = getformkeyvalue(response)
print(get_form_keys)

{"Patient's Phone #": '503-123-4567', 'Patient Name (Last)': 'Anesthesia', 'First': 'Abby', 'Email Address': 'demorrow@orclinic.com', 'Provider Name': 'Vivek Deshmukh, MD', 'Provider Specialty': 'Neurosurgery', 'NPI #': '1710922026', 'No': 'NOT_SELECTED', 'Fax #': '5036912324', 'Tax ID #': '264433865', 'Group #': '1 0 0 0 3 9 4 8', 'Rendering/Treating Provider': 'NOT_SELECTED', 'Yes': 'NOT_SELECTED', 'Date of Birth': '12/24/1965', 'Name': 'John Dow', 'Requesting/Prescribing Provider': 'NOT_SELECTED', 'Mailing Address': '9701 SW BARNES RD STE 310', 'City': 'PORTLAND', 'State': 'OR', "Patient's Regence Member ID #": '3 0 0 3 0 3 4 4', 'Facility or Independent Laboratory Name': 'The Oregon Clinic', 'Office Phone #': '503-963-2801', 'ZIP Code': '97225', 'Phone': '#503-963-2801', 'Fax to 1 (855) 240-6498.': 'NOT_SELECTED', 'Fax:': '1 (855) 232-0085', 'Mail to:': 'PO Box 1271, WW5-53 Portland, OR 97207-1271', 'Expedited is defined as:': "the standard timeframe could place the member's life, 

Step 3 : Define Rules

In [18]:
rules = [
     {
        "description": "First Name is required",
        "field_name": "first_name",
        "condition_category": "Required",
        "condition_type": "Required",
        "condition_setting": None,
    },
    {
        "description": "First Name confidence score should greater than 99",
        "field_name": "first_name",
        "field_name_regex": None, # support Regex: "_confidence$",
        "condition_category": "Confidence",
        "condition_type": "ConfidenceThreshold",
        "condition_setting": "99",
    },
     {
        "description": "Last Name is required",
        "field_name": "last_name",
        "condition_category": "Required",
        "condition_type": "Required",
        "condition_setting": None,
    },
   {
        "description": "Last Name confidence score should greater than 99",
        "field_name": "last_name",
        "field_name_regex": None, # support Regex: "_confidence$",
        "condition_category": "Confidence",
        "condition_type": "ConfidenceThreshold",
        "condition_setting": "99",
    },
      {
        "description": "Date of birth is required",
        "field_name": "date_of_birth",
        "condition_category": "Required",
        "condition_type": "Required",
        "condition_setting": None,
    },
     {
        "description": "Date of birth confidence score should greater than 99",
        "field_name": "date_of_birth",
        "field_name_regex": None, # support Regex: "_confidence$",
        "condition_category": "Confidence",
        "condition_type": "ConfidenceThreshold",
        "condition_setting": "99",
    },
      {
        "description": "Group Id is required",
        "field_name": "group_id",
        "condition_category": "Required",
        "condition_type": "Required",
        "condition_setting": None,
    },
     {
        "description": "Group Id confidence score should greater than 99",
        "field_name": "group_id",
        "field_name_regex": None, # support Regex: "_confidence$",
        "condition_category": "Confidence",
        "condition_type": "ConfidenceThreshold",
        "condition_setting": "99",
    },
]

Evaluate the data against the rules

In [19]:
from a2idata.condition import Condition

# Validate business rules:
con = Condition(page1, rules)
rule_missed, rule_satisfied = con.check_all()

In [20]:
# print out the list of failed business rules
rule_missed

[{'message': 'The field [last_name] confidence score 97.0 is lower than the threshold 99',
  'field_name': 'last_name',
  'field_value': 'Anesthesia',
  'condition_type': 'ConfidenceThreshold',
  'condition_setting': '99',
  'condition_category': 'Confidence',
  'block': {'BlockType': 'QUERY_RESULT',
   'Confidence': 97.0,
   'Text': 'Anesthesia',
   'Geometry': {'BoundingBox': {'Width': 0.07253677397966385,
     'Height': 0.00935946311801672,
     'Left': 0.0325758270919323,
     'Top': 0.37241339683532715},
    'Polygon': [{'X': 0.0325758270919323, 'Y': 0.37243539094924927},
     {'X': 0.10510873794555664, 'Y': 0.37241339683532715},
     {'X': 0.10511259734630585, 'Y': 0.3817509710788727},
     {'X': 0.0325794480741024, 'Y': 0.38177284598350525}]},
   'Id': 'a13246fb-b92f-4b12-92d0-bafdd3d17332',
   'Page': 1},
  'index': 1},
 {'message': 'The field [date_of_birth] confidence score 98.0 is lower than the threshold 99',
  'field_name': 'date_of_birth',
  'field_value': '12/24/1965',
 

Setup customized A2I UI template and workforce

In [21]:
# get the existing workforce arn
work_team_arn = sagemaker.list_workteams()["Workteams"][0]["WorkteamArn"]
work_team_arn

'arn:aws:sagemaker:us-east-1:395127396906:workteam/private-crowd/r621549-pa-demo'

In [22]:
# read the UI template from a2i-data directory
template = ""
with open('a2idata/a2i-custom-ui.html','r') as f:
    template = f.read()

resp = sagemaker.create_human_task_ui(
        HumanTaskUiName="a2i-custom-ui-demo",
        UiTemplate={'Content': template})

In [23]:
# Keep the new UI template ARN in a variable
ui_template_arn = resp["HumanTaskUiArn"]
ui_template_arn

'arn:aws:sagemaker:us-east-1:395127396906:human-task-ui/a2i-custom-ui-demo'

Create a new human review workflow to wrap up all the information A2I needed.

In [24]:
resp = sagemaker.create_flow_definition(
        FlowDefinitionName= "a2i-custom-ui-demo-workflow",
        RoleArn= role,
        HumanLoopConfig= {
            "WorkteamArn": work_team_arn,
            "HumanTaskUiArn": ui_template_arn,
            "TaskCount": 1,
            "TaskDescription": "A2I custom business rule and UI demo workflow",
            "TaskTitle": "Custom rule sample task"
        },
        OutputConfig={
            "S3OutputPath" : f's3://{data_bucket}/a2i/output/'
        }
    )

workflow_definition_arn = resp['FlowDefinitionArn']

The new A2I UI template and the Workflow definition are in place. Let's send the missed conditions to the Workflow, so a reviewer can verify the result using A2I.

In [25]:
import uuid
human_loop_name = 'custom-loop-' + str(uuid.uuid4())

# Construct the data send to the custom A2I human review task
a2i_payload = {
                "InputContent": json.dumps({
                    "Results": {
                        "ConditionMissed": rule_missed,
                        "ConditionSatisfied": rule_satisfied
                    },
                    "s3":{
                        "bucket":data_bucket,
                        "path":s3_key,
                        "url": f's3://{data_bucket}/{s3_key}',
                        "image_width": image_width,
                        "image_height": image_height
                    },
                    "text": "Prior Authorization Form Page 1",
                })
            }

# Start the human loop task
start_loop_response = a2i.start_human_loop(
            HumanLoopName=human_loop_name,
            FlowDefinitionArn=workflow_definition_arn,
            HumanLoopInput=a2i_payload)

In [26]:
human_loop_arn = start_loop_response["HumanLoopArn"]

In [27]:
a2i.describe_human_loop(HumanLoopName=human_loop_name)["HumanLoopStatus"]

'InProgress'

In [28]:
work_team_name = work_team_arn[work_team_arn.rfind('/') + 1:]
print("Navigate to the private worker portal and do the tasks. Make sure you've invited yourself to your workteam!")
print('https://' + sagemaker.describe_workteam(WorkteamName=work_team_name)['Workteam']['SubDomain'])

Navigate to the private worker portal and do the tasks. Make sure you've invited yourself to your workteam!
https://l9003ksduu.labeling.us-east-1.sagemaker.aws


In [29]:
a2i_resp = a2i.describe_human_loop(HumanLoopName=human_loop_name)
print("Human Loop task status: ", a2i_resp["HumanLoopStatus"])
print("Human Loop output: ", a2i_resp["HumanLoopOutput"]["OutputS3Uri"])

Human Loop task status:  InProgress
Human Loop output:  s3://sagemaker-us-east-1-395127396906/a2i/output/a2i-custom-ui-demo-workflow/2023/05/11/14/10/03/custom-loop-fda2fed6-4121-4675-a896-e8e1f0191044/output.json


In [29]:
s3.download_file(data_bucket, a2i_resp["HumanLoopOutput"]["OutputS3Uri"].replace(f's3://{data_bucket}/',''), 'a2i-output.json')

In [30]:
import json
with open('a2i-output.json','r') as f:
    print(json.dumps(json.loads(f.read()), indent=2))

{
  "flowDefinitionArn": "arn:aws:sagemaker:us-east-1:395127396906:flow-definition/a2i-custom-ui-demo-workflow",
  "humanAnswers": [
    {
      "acceptanceTime": "2023-05-09T20:39:05.994Z",
      "answerContent": {
        "Change Reason 1": "it looks good",
        "Change Reason 2": "group id looks good now, it just had extra spaces",
        "True Value 1": "Ducktest",
        "True Value 2": "2650002"
      },
      "submissionTime": "2023-05-09T20:40:07.677Z",
      "timeSpentInSeconds": 61.683,
      "workerId": "3da719ebe18cd5a7",
      "workerMetadata": {
        "identityData": {
          "identityProviderType": "Cognito",
          "issuer": "https://cognito-idp.us-east-1.amazonaws.com/us-east-1_3UlECgEC0",
          "sub": "a373cf8f-4113-429d-8c2a-061329e10874"
        }
      }
    }
  ],
  "humanLoopName": "custom-loop-6ac4cd6c-2295-4834-a7be-0f26085d4c0a",
  "inputContent": {
    "Results": {
      "ConditionMissed": [
        {
          "block": {
            "BlockTy