## Visual check of the family boxes detection

Extract box coordinates and generate a JSON file in LabelMe format, that can be used to visualize the bounding rectangles.

In [None]:
import json
import base64
import fitz
from deuxpots import CERFA_VARIABLES_PATH, CATEGORY_COORDS_PATH
from deuxpots.box import load_box_mapping
from deuxpots.pdf_tax_parser import _generate_family_box_coords, load_category_coords, _strip_duplicate_family_box_coords

In [None]:
BOX_MAPPING = load_box_mapping(CERFA_VARIABLES_PATH)
CATEGORY_COORDS = load_category_coords(CATEGORY_COORDS_PATH)

# PDF_PATH = "test/resources/declaration_2023.pdf"
PDF_PATH = "test/resources/declaration_2022.pdf"

In [None]:
doc = fitz.open(PDF_PATH)
family_page = doc[1]
pixmap = family_page.get_pixmap(clip=family_page.rect)
page_width, page_height = family_page.mediabox_size


In [None]:
family_box_coords = _generate_family_box_coords(family_page, CATEGORY_COORDS, BOX_MAPPING)
family_box_coords = _strip_duplicate_family_box_coords(family_box_coords)

In [None]:
labelme_dict = {
  "version": "5.2.0",
  "flags": {},
  "shapes" : [],
  "imageWidth": int(page_width),
  "imageHeight": int(page_height),
  "imageData": base64.b64encode(pixmap.tobytes()).decode(),
  "imagePath": "placeholder.png",
}

for box_code, box_coords in family_box_coords.items():
     shape = {
        "label": box_code,
        "points": [
          box_coords[0:2],
          box_coords[2:4],
        ],
        "group_id": None,
        "description": "",
        "shape_type": "rectangle",
        "flags": {}
      }
     labelme_dict['shapes'].append(shape)

with open('family_box_coords.json', 'w+') as f:
    json.dump(labelme_dict, f, indent=4)

The generated file can be opened for visual check (it should not be committed):
```bash
labelme family_box_coords.json
```

The bounding boxes must span (i.e. at least lightly overlap) the box value.