In [1]:
import logging


# Create a logger object, formatted log message , log to file 
logging.basicConfig(filename='ocr_barcode.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

logger.info('This is an info message')



In [2]:
import os 
from urllib.parse import urljoin, urlparse 
from storage.client import sasForBlob
import requests
import json

from PIL import ImageDraw
from PIL import Image as PILImage

from PIL import ImageFont
from io import BytesIO

from time import sleep


In [3]:
from dotenv import load_dotenv
load_dotenv()


True

In [4]:
forms_api = os.environ.get('forms_api', 'https://<region>.api.cognitive.microsoft.com/')
forms_key = os.environ.get('forms_key', '12233344445555566666677777779999')
forms_model = os.environ.get('forms_model', 'prebuilt-read')
forms_api_version = os.environ.get('forms_api_version', '2022-06-30-preview')
forms_api_model = f'/formrecognizer/documentModels/{forms_model}:analyze?api-version={forms_api_version}'


blob_images = [
    "https://godzillasinastorage.blob.core.windows.net/cv4/barcodes.png",
    "https://godzillasinastorage.blob.core.windows.net/cv4/barcode-1186578.jpg", 
    "https://godzillasinastorage.blob.core.windows.net/cv4/barcodes2.png",
]

In [5]:
from IPython.display import Image, display
for blob_image in blob_images:
    display(Image(url=blob_image + "?" + sasForBlob(blob_image), width=400))


In [8]:

def barcode_reader(blob_url): 
    image_url = blob_url + "?" + sasForBlob(blob_url)
    image = PILImage.open(BytesIO(requests.get(image_url).content))

    retvalue = []

    # https://learn.microsoft.com/en-us/javascript/api/@azure/ai-form-recognizer/documentbarcode?view=azure-node-preview 
    # https://learn.microsoft.com/en-us/azure/applied-ai-services/form-recognizer/concept-read?view=form-recog-3.0.0#barcode-extraction

    logger.info(f"Analyzing {image_url}")
    logger.info(f"Forms model: {forms_api_model}")

    resp = requests.post(
        urljoin(forms_api, forms_api_model),
        headers = { 
            'Ocp-Apim-Subscription-Key': forms_key,
            'Content-Type': 'application/json'
        }, 
        json = { 
            "urlSource": blob_url + "?" + sasForBlob(blob_url)
            }
        )

    retvalue.append(resp)


    logger.info(f"Response: {resp.status_code}, OK: {resp.ok}")

    if resp.ok != True:
        logger.info(f"Request failed, aborting!")
        return retvalue

    operationLocation = resp.headers['Operation-Location']
    logger.info(f"Operation location: {operationLocation}")

    formsresponse_status = 'running'

    while formsresponse_status == 'running':
        
        formsresponse = requests.get(
            operationLocation, 
            headers={'Ocp-Apim-Subscription-Key': forms_key}
        )
        retvalue.append(formsresponse)

        logger.info(f"Operation Location Response: {formsresponse.status_code}, OK: {formsresponse.ok}")
        
        if formsresponse.ok != True:
            logger.info(f"Opereation status failed, aborting!")
            return retvalue

        formsresponse_status = formsresponse.json()['status']
        logger.info(f"Operation status: {formsresponse_status}")
        
        if formsresponse_status == 'running':
            sleep(1)

    if formsresponse_status != 'succeeded':
        logger.info(f"Operation status failed, aborting!")
        return retvalue

    logger.info(f"Operation status succeeded, processing results!")
    

    # write json output to file
    # GET FILE NAME FROM blob_url 
    # https://stackoverflow.com/questions/8384737/extract-file-name-from-path-no-matter-what-the-os-path-format
    json_filename = f'forms_{os.path.splitext(os.path.basename(urlparse(blob_image).path))[0]}.json'
    logging.info(f"Writing JSON output to {json_filename}")
    
    with open(json_filename, 'w') as outfile:
        json.dump(formsresponse.json(), outfile)

    forms_analyzeResult = formsresponse.json()['analyzeResult']
    forms_barcodes = forms_analyzeResult['pages'][0].get('barcodes', [])

    logging.info(f"Found {len(forms_barcodes)} barcodes in {blob_url}")

    if len(forms_barcodes) == 0:
        return retvalue

    
    img2 = image.copy()
    for bc in forms_barcodes: 
        # print(bc.keys())
        bc_text = bc['value']
        bc_type = bc['kind']
        bc_top_left = (bc['polygon'][0], bc['polygon'][1])
        bc_top_right = (bc['polygon'][2], bc['polygon'][3])
        bc_bottom_right = (bc['polygon'][4], bc['polygon'][5])
        bc_bottom_left = (bc['polygon'][6], bc['polygon'][7])

        print(f"{bc_text}/{bc_type}: {bc_top_left}, {bc_bottom_right}")

        draw = ImageDraw.Draw(img2)
        draw.line([bc_top_left, bc_top_right, bc_bottom_right, bc_bottom_left, bc_top_left], fill='blue', width=5)
        # draw text on image: bc_text, bc_top_left, large font, blue
        font = ImageFont.truetype("arial.ttf", 24)
        draw.text(bc_top_left, f'  {bc_text}\n  {bc_type}', font=font, fill='red')

    retvalue.append(img2)
    # img2.show()

    return retvalue


In [9]:
for blob_image in blob_images:
    r = barcode_reader(blob_image)
    logging.info(f"Returned {len(r)} items")
    logging.info(f"Last item: {type(r[-1])}")

    # if last item is an image, show it
    if isinstance(r[-1], PILImage.Image):
        r[-1].show()
    

0123456789abcdefg/Code128: (336, 453), (786, 526)
123456789999/UPCA: (339, 617), (699, 706)
Form Recognizer/PDF417: (384, 835), (645, 880)
