In [None]:
import json
import re
from pathlib import Path
from typing import List, Optional

import openpyxl

from discharge_summaries.schemas.prsb_guidelines import Element, Section

In [None]:
GUIDELINES_DIR = Path.cwd().parent / "guidelines"
GUIDELINES_EXCEL_PATH = GUIDELINES_DIR / "eDischarge-Summary-v2.1-1st-Feb-21.xlsx"
GUIDELINES_JSON_PATH = (
    GUIDELINES_DIR / "eDischarge-Summary-v2.1-1st-Feb-21_extract_text_elements.json"
)

In [None]:
sheet = openpyxl.load_workbook(GUIDELINES_EXCEL_PATH)["Sheet1"]
rows = list(sheet.iter_rows(values_only=True, min_row=4))

In [None]:
test = re.match(
    "This is a [a-z]+ record entry", "This is a Distribution list record entry."
)
test

In [None]:
def create_section_from_rows(section_rows: List[str]) -> Optional[Section]:
    SECTION_ROW = 1
    ELEMENT_HEADER_ROW = 2
    field_name_to_idx = {
        "name": 0,
        "description": 1,
        "cardinality": 2,
        "data_type": 3,
        "values": 4,
        "extract_from_free_text": 5,
    }

    section_row = section_rows[SECTION_ROW]
    if section_row[field_name_to_idx["extract_from_free_text"]] == "N":
        return None

    first_element_row = section_rows[ELEMENT_HEADER_ROW + 1]
    if (
        first_element_row[field_name_to_idx["values"]] == ""
        and "record entry" in first_element_row[field_name_to_idx["description"]]
    ):
        is_record = True
        start_idx = ELEMENT_HEADER_ROW + 2
    else:
        is_record = False
        start_idx = ELEMENT_HEADER_ROW + 1

    elements = []
    for element_row in section_rows[start_idx:]:
        if element_row[field_name_to_idx["extract_from_free_text"]] == "N":
            continue
        if all(field is None for field in element_row[1:]):
            continue

        elements.append(
            Element(
                name=element_row[field_name_to_idx["name"]].strip(),
                description=element_row[field_name_to_idx["description"]].strip(),
                cardinality=element_row[field_name_to_idx["cardinality"]].strip(),
                values=(
                    element_row[field_name_to_idx["values"]].strip()
                    if element_row[field_name_to_idx["values"]]
                    else ""
                ),
            )
        )
    return Section(
        name=section_row[field_name_to_idx["name"]].strip(),
        description=section_row[field_name_to_idx["description"]].strip(),
        is_record=is_record,
        elements=elements,
    )


sections = []
section_rows: List[str] = []
for row in rows:
    if all(element is None for element in row):
        section = create_section_from_rows(section_rows)
        if section:
            sections.append(section)
        section_rows = []
    else:
        section_rows.append(row)
section = create_section_from_rows(section_rows)

In [None]:
with open(GUIDELINES_JSON_PATH, "w") as f:
    json.dump([section.dict() for section in sections], f, indent=4)