# Form Recognizer

https://docs.microsoft.com/en-us/rest/api/formrecognizer/2.1preview2/analyzelayoutasync/analyzelayoutasync

Script below scope:
1. Loads FR endpoint access details from config file
2. Sends image for processing & polls for result
3. Processes result: extracts found keys & values
4. Plots bounding boxes of extracted text over image

## LAYOUT API

In [None]:
import os
import datetime
import time
import requests
from pprint import pprint
import json

# endpoint access configuration

config_file = os.path.normpath(os.path.join(os.getcwd(), 'access_config.json'))
with open(config_file, 'r') as j:
     config = json.loads(j.read())
        
ENDPOINT = config['form.recognizer']['api']
KEY = config['form.recognizer']['key']

API = {}
API['layout'] = '/formrecognizer/v2.1-preview.2/layout/analyze'

In [None]:
def send_image(image_path, api_type, parameters):
    """ Submits data for processing by service
    Params:
        image_path (str): url for the image OR path for the image
        api_type (str): indicate which type of call should be
        parameters (dict): request parameters
    Returns:
        response (dict): data returned from api
    """
    success = False
    response = {}
    
    try:
    
        if api_type not in API.keys():
            print('ERROR: unsupported api type "%s" received.' %api_type)
            return success, response

        # set request headers
        headers = {}
        headers['Ocp-Apim-Subscription-Key'] = KEY
        #headers['content-type'] = 'application/json'

        # image read from file
        headers['content-type'] = 'image/png'

        start_ts = datetime.datetime.now()

        # construct endpoint to call based on desired api
        url = ENDPOINT + API[api_type]
        print('\n%s URL: %s ' %(api_type.upper(), url))

        # contrsuct payload 

        # payload from URL
        #payload = json.dumps({"url": image_path}) # image on URL

        # payload from imagefile
        with open(image_path, 'rb') as file:
            payload = file.read()    


        # send request    
        if parameters != {}:
            r = requests.post(url, data=payload, headers=headers, params=parameters)
        else:
            r = requests.post(url, data=payload, headers=headers)

        # for debugging
        print ('>>>>>>>>>>>>>>>>>>>>')
        print(r)
        print('>>>>>>>>>>>>>>>>>>>>>')

        # process request
        if r.status_code in [200, 201, 202]:
            print('INFO [%s]: data processed OK' %r.status_code)
        else:
            print('ERROR [%s]: %s, %r' %(r.status_code, r.text, r))
            return success, response 
    except Exception as e:
        print("ERROR: failed to send data for processing, %s" %e)
        return success, response 

    try:
        # result processing based on call - async APIs
        operation_location = r.headers['Operation-Location']
        print('GET RESULT Operation Location: %s' %operation_location)

        # poll for the result
        success, response = get_response(operation_location)

        delta = datetime.datetime.now() - start_ts
        print ('\nINFO: time elapsed %s' %(delta))

        # print raw result
        print()
        #pprint(response)
    except Exception as e:
        print("ERROR: failed to get processing result, %s" %e)
        return success, response 

    print('Image processing completed.')
    return success, response
    
def get_response(operation_location):
    """ Get data processing results (for async invocation)
    Params:
        operation_location (str): operation location (endpoint + operationId)
    Returns:
        success (bool): flag indicating image processing result
        result (dict): extracted data from image, full API response
    """
    polling_interval = 1 # in seconds
    result = {}
    success = False
    
    try:
        print('INFO: GET processing results.')
        # set request headers
        headers = {}
        headers['Ocp-Apim-Subscription-Key'] = KEY

        # operation location is endpoint + operation ID  
        r = requests.get(operation_location, headers=headers)

        if r.status_code != 200:
            print('ERROR GET [%s]: %s, %r' %(r.status_code, r.text, r)) 
            return success, result
        else:
            # poll for the results
            while r.json()['status'] in ["running", "notStarted"]:
                # statuses include "failed" & "succeeded"
                time.sleep(polling_interval)
                r = requests.get(operation_location, headers=headers)
                print('\tINFO GET [%s]: %s: %s' %(r.status_code, r.json().get('status').upper(), r.json()))

            if r.json().get('status') == "succeeded":
                success = True
                result = r.json()
                print('>>>>>> %s' %r.headers)
            else:
                print('FAILED GET: processing of the image')
                
    except Exception as e:
        print("ERROR: failed to retrieve processing result, %s" %e)
        return success, result 

    return success, result

In [None]:
# Submit image for processing & retrieve result

image_path = os.path.normpath(os.path.join(os.getcwd(), 'BeaconReceipt_Demo.png'))

api_type = 'layout'
parameters = {}

success, result = send_image(image_path, api_type, parameters)

In [None]:
def find_key_value(pref, dictionary):
    """ Find key by prefix"""

    for k in dictionary.keys():
        if pref in k:            
            return dictionary[k]
                
    return None
    
def process_result(result):
    """
    Processes the output of Receipt API
    Params:
        result (list): Receipt API processing result. Result has following attributes 'status', 
                        'createdDateTime', 'lastUpdatedDateTime', 'analyzeResult'.
                        Analyze Result: 'version', 'readResults', 'pageResults'
                        'READ Results': Text extracted from the input.
                        'PAGE Results': Page-level information extracted from the input.
    Returns:
        bounding_boxes (list): list of bounding boxes for elements found
    """
    success = False
    bounding_boxes_lines = []
    bounding_boxes_selections = []
    bounding_boxes_tables = []
    
    print('RESULT elements: %s' %list(result.get('analyzeResult').keys()))
    
    print('> READ RESULTS >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
    print(result.get('analyzeResult').get('readResults'))
    
#     print('\n> PAGE RESULTS >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
#     print(result.get('analyzeResult').get('pageResults'))
    
    
    try:
        for doc in result.get('analyzeResult').get('readResults'):
#             print('- RAW DATA ---------------------------------------')
#             print(doc)
#             print('----------------------------------------------\n')
            
            if doc.get('lines', False):
                print('\nLINES: ----------------------------------------')
                for line in doc.get('lines'):
                    #print('>%s' %line)
                    bounding_boxes_lines.append(line.get('boundingBox'))
                    print('Line: %s' %line.get('text'))
                    
            if doc.get('selectionMarks', False):
                print('\nSELECTION: ----------------------------------------')
                for selection in doc.get('selectionMarks'):
                    #print('>%s' %selection)
                    bounding_boxes_selections.append(selection.get('boundingBox'))
                    
                    print('Selection: %s (confidence %s)' %(selection.get('state'), selection.get('confidence')))
     
            if doc.get('tables', False):
                print('\nTABLE: ----------------------------------------')
                for table in doc.get('tables'):
                    print('>%s' %table)
                                        
#         print('\n')                
#         pprint.pprint(data_out)
        success = True
    except Exception as e:
        print('ERROR: data post processing, %s' %e)
        return False, [], [], []

    return success, bounding_boxes_lines, bounding_boxes_selections, bounding_boxes_tables

# process result if data extraction is successful
if success:
    success_pr, bb_lines, bb_selection, bb_table = process_result(result)


In [None]:
# plot results
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from PIL import Image

%matplotlib inline

def create_patch(rectangle):
    """ Create patch coordinates from Rectangle returned by CV API
    Parameters:
        rectangle(dict): height, width, x,y coordinates
            {'height': 393, 'left': 350,'top': 403,'width': 393} # FaceRectangle
            {'h': 881, 'w': 1063, 'x': 83, 'y': 26} # objects rectange
            [135, 191, 815, 336, 795, 567, 137, 528] list of coordinates from bounding box
    Returns:
        patch (list): list of points of patch as tuple (x,y)
                    [(83, 26), (1146, 26), (1146, 907), (83, 907)]
    """
    patch = []
  
    # transform input
    
    if isinstance(rectangle, dict):
        keys_map = {'height':'h', 'width': 'w', 'left': 'x', 'top':'y'}
        
        if 'height' in rectangle.keys():
            rectangle_new = {}
            for k in rectangle.keys():
                rectangle_new[keys_map[k]] = rectangle[k]
            rectangle = rectangle_new
            
        # construct points x,y - lower right point
        point_1 = (rectangle['x'], rectangle['y'])
        point_2 = (rectangle['x'] + rectangle['w'], rectangle['y'])
        point_3 = (rectangle['x'] + rectangle['w'], rectangle['y'] + rectangle['h'])
        point_4 = (rectangle['x'], rectangle['y'] + rectangle['h'])
        
        patch = [point_1, point_2, point_3, point_4]  
        
    elif isinstance(rectangle, list):
        #bounding box coordinates returned
        patch = [(rectangle[i*2], rectangle[i*2+1]) for i in range(int(len(rectangle)/2))]

    return patch


def plot(filepath, bb_lines, bb_selection, bb_table):
    """ Plot bounding boxes over image
    Parameters:
        filepath (str): path to image
        bb_lines (list): bounding boxes for lines of text
        bb_selection (list): bounding boxes for selection marks (checkboxes)
        bb_table (list): bounding boxes for tables
    """
    
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(8,10))
    img = Image.open(filepath)
    ax.imshow(img)

    # add bounding boxes for the fields found in receipt
    for bb in bb_lines:
        rec = create_patch(bb)
        ax.add_patch(mpatches.Polygon(rec, fill=False, color='magenta'))
    
    for bb in bb_selection:
        rec = create_patch(bb)
        ax.add_patch(mpatches.Polygon(rec, fill=False, color='blue'))
    
    for bb in bb_table:
        rec = create_patch(bb)
        ax.add_patch(mpatches.Polygon(rec, fill=False, color='yellow'))

    plt.axis('off')
    plt.show()


In [None]:
if success_pr:
    plot(image_path, bb_lines, bb_selection, bb_table)