# CUSTOM MODELS

https://docs.microsoft.com/en-us/azure/cognitive-services/form-recognizer/quickstarts/client-library?tabs=preview%2Cv2-1&pivots=programming-language-rest-api

Scope of the script:
1. Send data for custom model processing
Endpoint like: "https://{Endpoint}/formrecognizer/v2.1-preview.3/custom/models/{model ID}/analyze?includeTextDetails=true"

In [None]:
import os
import datetime
import time
import requests
from pprint import pprint
import json

# endpoint access configuration

config_file = os.path.normpath(os.path.join(os.getcwd(), 'access_config.json'))
with open(config_file, 'r') as j:
     config = json.loads(j.read())
        
ENDPOINT = config['form.recognizer']['api']
KEY = config['form.recognizer']['key']

API = {}
API['custom'] = '/formrecognizer/v2.1-preview.3/custom/models'

# additionally from config file get trained model ID & SAS URL for the image
modelID = config['form.recognizer.custom']['modelID']
SAS_URL = config['form.recognizer.custom']['SAS_URL']

# add section like
# "form.recognizer.custom": {
#     "modelID" : "YOUR_TRAINED_CUSTOM_MODEL_ID",
#     "SAS_URL" : "YOUR_IMAGE_SAS_URL"
#     }


In [None]:
def send_image(image_path, api_type, parameters, modelID):
    """ Submits data for processing by service
    Params:
        image_path (str): url for the image OR path for the image
        api_type (str): indicate which type of call should be
        parameters (dict): request parameters
    Returns:
        response (dict): data returned from api
    """
    success = False
    response = {}
    
    try:
        start_ts = datetime.datetime.now()
    
        if api_type not in API.keys():
            print('ERROR: unsupported api type "%s" received.' %api_type)
            return success, response

        # set request headers
        headers = {}
        headers['Ocp-Apim-Subscription-Key'] = KEY
        headers['content-type'] = 'application/json'

        # construct endpoint to call based on desired api
        url = ENDPOINT + API[api_type] + '/' + modelID + '/analyze'
        print('\n%s URL: %s ' %(api_type.upper(), url))

        # contrsuct payload      
        payload = json.dumps({"source": image_path}) # image path  is SAS URL

        # send request    
        if parameters != {}:
            r = requests.post(url, data=payload, headers=headers, params=parameters)
        else:
            r = requests.post(url, data=payload, headers=headers)

        # for debugging
        print ('>>>>>>>>>>>>>>>>>>>>')
        print(r)
        print('>>>>>>>>>>>>>>>>>>>>>')

        # process request
        if r.status_code in [200, 201, 202]:
            print('INFO [%s]: data processed OK' %r.status_code)
        else:
            print('ERROR [%s]: %s, %r' %(r.status_code, r.text, r))
            return success, response 
        
    except Exception as e:
        print("ERROR: failed to send data for processing, %s" %e)
        return success, response 

    try:
        # result processing based on call - async APIs
        operation_location = r.headers['Operation-Location']
        print('GET RESULT Operation Location: %s' %operation_location)

        # poll for the result
        success, response = get_response(operation_location)

        delta = datetime.datetime.now() - start_ts
        print ('\nINFO: time elapsed %s' %(delta))

        # print raw result
        print()
        #pprint(response)
    except Exception as e:
        print("ERROR: failed to get processing result, %s" %e)
        return success, response 

    print('Image processing completed.')
    return success, response
    
def get_response(operation_location):
    """ Get data processing results (for async invocation)
    Params:
        operation_location (str): operation location (endpoint + operationId)
    Returns:
        success (bool): flag indicating image processing result
        result (dict): extracted data from image, full API response
    """
    polling_interval = 1 # in seconds
    result = {}
    success = False
    
    try:
        print('INFO: GET processing results.')
        # set request headers
        headers = {}
        headers['Ocp-Apim-Subscription-Key'] = KEY

        # operation location is endpoint + operation ID  
        r = requests.get(operation_location, headers=headers)

        if r.status_code != 200:
            print('ERROR GET [%s]: %s, %r' %(r.status_code, r.text, r)) 
            return success, result
        else:
            # poll for the results
            while r.json()['status'] in ["running", "notStarted"]:
                # statuses include "failed" & "succeeded"
                time.sleep(polling_interval)
                r = requests.get(operation_location, headers=headers)
                print('\tINFO GET [%s]: %s: %s' %(r.status_code, r.json().get('status').upper(), r.json()))

            if r.json().get('status') == "succeeded":
                success = True
                result = r.json()
                print('>>>>>> %s' %r.headers)
            else:
                print('FAILED GET: processing of the image')
                
    except Exception as e:
        print("ERROR: failed to retrieve processing result, %s" %e)
        return success, result 

    return success, result

In [None]:
parameters = {'includeTextDetails': True} # to include read Results

success, result = send_image(SAS_URL, 'custom', parameters, modelID)

with open('3_custommodel_out.json', 'w') as f:
    f.write(json.dumps(result))

In [None]:
def process_result(result):
    """
    Processes the output of Analyze operation
    Params:
        result (list): Custom Model API processing result. Result has following attributes 'status', 
                        'createdDateTime', 'lastUpdatedDateTime', 'analyzeResult'.
                        Analyze Result: 'version', 'readResults', 'pageResults', 'documentResults', 'errors'
                        'READ Results': all text extracted from the input.
                        'PAGE Results': Page-level information extracted from the input, contains tables
                        'documentResults': key/value pair associations
    Returns:
        bounding_boxes (list): list of bounding boxes for elements found
    """
    
#     print('\n> PAGE RESULTS >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
#     print(result.get('analyzeResult').get('pageResults'))
    
#     print('\n> READ RESULTS >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
#     print(result.get('analyzeResult').get('readResults'))
    
    print('\n> DOCUMENT RESULTS >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
    print(result.get('analyzeResult').get('documentResults'))
    
    print('\n')
    for doc in result.get('analyzeResult').get('documentResults'):
        #print(doc)
        if not doc.get('fields', False):
            print('No fields found...')
            return 
        for field,values in doc.get('fields').items():
            print('"%s" :  %s | (confidence %s)' %(field, values.get('text'), values.get('confidence')))
                


In [None]:
process_result(result)