In [None]:
import json
import re
from pathlib import Path
from typing import Dict, List

import openpyxl

from discharge_summaries.schemas.prsb_guidelines import Element, RecordEntry, Section

In [None]:
GUIDELINES_DIR = Path.cwd().parent / "guidelines"
GUIDELINES_EXCEL_PATH = GUIDELINES_DIR / "eDischarge-Summary-v2.1-1st-Feb-21.xlsx"
GUIDELINES_PYDANTIC_MODEL_PATH = (
    GUIDELINES_DIR / "eDischarge-Summary-v2.1-1st-Feb-21_pydantic.json"
)
GUIDELINES_JSON_SCHEMA_PATH = (
    GUIDELINES_DIR / "eDischarge-Summary-v2.1-1st-Feb-21_schema.json"
)

In [None]:
sheet = openpyxl.load_workbook(GUIDELINES_EXCEL_PATH)["Sheet1"]
rows = list(sheet.iter_rows(values_only=True, min_row=4))

In [None]:
def clean_text(text: str) -> str:
    text = text.replace("\n", " ")
    text = re.sub(r"\s{2,}", " ", text)
    text = text.encode("ascii", "ignore").decode()
    return text.strip()


def rows_to_dict(row: List[str]) -> Dict[str, str]:
    return {
        "name": clean_text(row[0]) if row[0] else "",
        "description": clean_text(row[1]) if row[1] else "",
        "cardinality": clean_text(row[2]) if row[2] else "",
        "data_type": clean_text(row[3]) if row[3] else "",
        "values": clean_text(row[4]) if row[4] else "",
        "extract_from_free_text": clean_text(row[5]) if row[5] else "",
    }


def rows_to_elements(
    rows: List[List[str]],
) -> list[Element]:
    elements = []
    for element_row_list in rows:
        element_row = rows_to_dict(element_row_list)
        if any(field == "" for field in element_row.values()):
            print(element_row)
            continue
        elif element_row["extract_from_free_text"] == "Y":
            elements.append(
                Element(
                    name=element_row["name"],
                    description=element_row["description"],
                    values=element_row["values"],
                    snomed_codes=(
                        re.findall(r"\d{6,}", element_row["values"])
                        if "SNOMED CT" in element_row["values"]
                        else None
                    ),
                )
            )
    return elements


def rows_to_elements_or_record_entry(
    rows: List[List[str]],
) -> RecordEntry | list[Element]:
    first_element_row = rows_to_dict(rows[0])
    if (
        first_element_row["values"] == ""
        and "record entry" in first_element_row["description"]
    ):
        return RecordEntry(
            name=first_element_row["name"],
            description=first_element_row["description"],
            elements=rows_to_elements(rows[1:]),
        )
    else:
        return rows_to_elements(rows)


def rows_to_section(section_rows: List[List[str]]) -> Section | None:
    SECTION_ROW = 1
    ELEMENT_HEADER_ROW = 2

    section_row = rows_to_dict(section_rows[SECTION_ROW])
    return (
        Section(
            name=section_row["name"],
            description=section_row["description"],
            elements=rows_to_elements_or_record_entry(
                section_rows[ELEMENT_HEADER_ROW + 1 :]
            ),
            restrict_to_last_note=(section_row["name"] == "Plan and requested actions"),
        )
        if section_row["extract_from_free_text"] == "Y"
        else None
    )


def rows_to_schema(rows: List[List[str]]) -> list[Section]:
    section_models = []
    section_rows: List[List[str]] = []
    for row in rows:
        if all(element is None for element in row):
            section_models.append(rows_to_section(section_rows))
            section_rows = []
        else:
            section_rows.append(row)
    section_models.append(rows_to_section(section_rows))
    return [section for section in section_models if section]

In [None]:
pydantic_model = rows_to_schema(rows)
pydantic_dict = [section.model_dump() for section in pydantic_model]
pydantic_dict[:3]

In [None]:
GUIDELINES_PYDANTIC_MODEL_PATH.write_text(json.dumps(pydantic_dict, indent=4))
GUIDELINES_PYDANTIC_MODEL_PATH

In [None]:
json_schema = {
    "type": "object",
    "properties": {
        section.name: {
            "type": "object",
            "description": section.description,
            "properties": (
                {
                    section.elements.name: {
                        "description": section.elements.description,
                        "type": "array",
                        "items": {
                            element.name: {
                                "description": element.description,
                                "type": "string",
                            }
                            for element in section.elements.elements
                        },
                    }
                }
                if type(section.elements) == RecordEntry
                else {
                    element.name: {
                        "description": element.description,
                        "type": "string",
                    }
                    for element in section.elements
                }
            ),
        }
        for section in pydantic_model
    },
}