In [1]:
# Dileep Gadiraju - Odisha ROI Transformation from VoTT Raw format
# Takes filename of VoTT raw format file and generates ROI configuration
# v1.5 saral ocr version

import uuid
import json

def get_annotation(filename):

    with open(filename) as f:
        data = json.load(f)
        f.close()
        return data['regions']

def get_rois(regions,tagGroup):
    rois  = []
    index = 0
    roiIndex = 1
    for region in regions:
        if region['tags'][0].startswith(tagGroup):
            rois.append({
#                 "annotationId": region['id'],
                "annotationTag": region['tags'][0],
                "extractionMethod": "NUMERIC_CLASSIFICATION",
                "roiId": roiIndex,
                "index": index,
                "rect": {

                    "top": int(region['boundingBox']['top']),

                    "left": int(region['boundingBox']['left']),

                    "bottom": int(region['boundingBox']['top']) + int(region['boundingBox']['height']),

                    "right": int(region['boundingBox']['left']) + int(region['boundingBox']['width'])

                }

            })
            index = index + 1
            roiIndex = roiIndex + 1
        
    return rois

def get_cells(regions,tagGroups,validationInfo):
    
    cells_data = []
    renderIndex = 1
    cellIndex = 1
    for tagGroup in tagGroups: 
              try:
                  validRegExp = validationInfo[str(tagGroup.rstrip('_'))]['regExp']
                  validName = validationInfo[str(tagGroup.rstrip('_'))]['name']
                  validErrorMsg = validationInfo[str(tagGroup.rstrip('_'))]['errorMessage']
                  validSource = validationInfo[str(tagGroup.rstrip('_'))]['source']
                  
              except KeyError as ke:
                  validRegExp = ""
                  validName = ""
                  validErrorMsg = ""
                  validSource = ""
              cells_data.append({
                          "cellId": cellIndex,
                          "rois": get_rois(regions,tagGroup),
                          "render": {
                              "index": renderIndex
                          },
                          "format": {
                              "name": tagGroup.rstrip('_'),
                              "value": tagGroup.replace("_", "")
                          },
                          "validate": {
                              "name": validName,
                              "regExp":  validRegExp,
                              "errorMessage": validErrorMsg,
                              "source": validSource
                          }
              })
              renderIndex = renderIndex +1
              cellIndex = cellIndex + 1
    return cells_data

def get_layout(cells,responseExcludeFields):
    layout_data = []
    layout_data.append({
        "layout": {
            "version": "1.0",
            "name": "Odisha SAT 20 Questions Exam Sheet Form",
            "type": "SAT_20_MARKSHEET",
            "tolerance": {
                "predictionMin": 0.95,
                "roiMinWidth": 15,
                "roiMinHeight": 15
            },
            "excludeFieldsInResponse": responseExcludeFields,
            #"identifiers": [{"name":"teacherId","value":"2321121"}],
            "cells": cells
        }
    })    
    return layout_data[0]

def pp_json(json_thing, sort=True, indents=4):
    if type(json_thing) is str:
        print(json.dumps(json.loads(json_thing), sort_keys=sort, indent=indents))
    else:
        print(json.dumps(json_thing, sort_keys=sort, indent=indents))
    return None

regions=get_annotation("sat_odisha_vottraw.json")
#regions



In [2]:
# Validation info may not be pre configured. These validations can be specific to school , exam.
# So these can be injected from backend during scanning time. format.name can be used as key ingest these validations.

validationInfo = {
    'STUDENTID': { 'name': 'Between 10 to 15 Digits' , 'regExp': '^[1-9][0-9]{9,14}$' , 'errorMessage': 'Should be 10 to 15 Digits', 'source': 'BACKEND_SCHOOL' },
    'QUESTION1': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'QUESTION1 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'QUESTION2': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'QUESTION2 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'QUESTION3': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'QUESTION3 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'QUESTION4': { 'name': 'Between 0 to 10 Marks' , 'regExp': '[0-10]' , 'errorMessage': 'QUESTION4 should range from 0 to 10 Marks', 'source': 'BACKEND_EXAM' },
    'QUESTION5': { 'name': 'Between 0 to 10 Marks' , 'regExp': '[0-10]' , 'errorMessage': 'QUESTION5 should range from 0 to 10 Marks', 'source': 'BACKEND_EXAM' },
    'QUESTION6': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'QUESTION6 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'QUESTION7': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'QUESTION7 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'QUESTION8': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'QUESTION8 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'QUESTION9': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'QUESTION9 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'QUESTION10': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'QUESTION10 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'QUESTION11': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'QUESTION11 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'QUESTION12': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'QUESTION12 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'QUESTION13': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'QUESTION13 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'QUESTION14': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'QUESTION14 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'QUESTION15': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'QUESTION15 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'QUESTION16': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'QUESTION16 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'QUESTION17': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'QUESTION17 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'QUESTION18': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'QUESTION18 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'QUESTION19': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'QUESTION19 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'QUESTION20': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'QUESTION20 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'MAX_MARKS': { 'name': 'Should be 110 Marks' , 'regExp': '^110$' , 'errorMessage': 'should be 110 Marks', 'source': 'BACKEND_EXAM' },
    'MARKS_OBTAINED': { 'name': 'Between 0 to 110 Marks' , 'regExp': '\b([0-9]|[1-9][0-9]|110)\b' , 'errorMessage': 'Should be MAX of 110 Marks', 'source': 'BACKEND_EXAM' },
}
#validationInfo['STUDENTID']
#validationInfo['STUDENTID']['regExp']


In [3]:
# Not all fields needed in response sent to backend for analytics/insights.
# So list of fields to be excluded in response can be defined for each layout.
responseExcludeFields= ['rois','validate','render']

In [4]:
tagGroups = ["STUDENTID", "QUESTION1", "QUESTION2","QUESTION3","QUESTION4","QUESTION5","QUESTION6","QUESTION7","QUESTION8","QUESTION9","QUESTION10","QUESTION11","QUESTION12","QUESTION13","QUESTION14","QUESTION15","QUESTION16","QUESTION17","QUESTION18","QUESTION19","QUESTION20","MAX_MARKS","MARKS_OBTAINED"]
#rois=get_rois(regions,tagGroups[0])
cells=get_cells(regions,tagGroups,validationInfo)

In [5]:
pp_json(get_layout(cells,responseExcludeFields))

{
    "layout": {
        "cells": [
            {
                "cellId": 1,
                "format": {
                    "name": "STUDENTID",
                    "value": "STUDENTID"
                },
                "render": {
                    "index": 1
                },
                "rois": [
                    {
                        "annotationTag": "STUDENTID_1",
                        "extractionMethod": "NUMERIC_CLASSIFICATION",
                        "index": 0,
                        "rect": {
                            "bottom": 50,
                            "left": 192,
                            "right": 210,
                            "top": 30
                        },
                        "roiId": 1
                    },
                    {
                        "annotationTag": "STUDENTID_2",
                        "extractionMethod": "NUMERIC_CLASSIFICATION",
                        "index": 1,
                        "rect": {
   