In [None]:
import json
from pathlib import Path

import openpyxl
import pandas as pd
from openpyxl.styles import Font

In [None]:
OUTPUT_DIR = Path.cwd() / "output"

MIMIC_III_DIR = (
    Path.cwd().parent / "data" / "physionet.org" / "files" / "mimiciii" / "1.4"
)
PHYSICIAN_NOTE_FPATH = MIMIC_III_DIR / "physician_notes_mimic.csv"

In [None]:
json_fpath = OUTPUT_DIR / "mimic_154417.json"
json_fpath

In [None]:
response_json = json.loads(json_fpath.read_text())

In [None]:
rows = []

for section, fields_and_values in response_json.items():
    if isinstance(fields_and_values, dict):
        for field, value in fields_and_values.items():
            if isinstance(value, str):
                rows.append([section, field, value])
            elif isinstance(value, list):
                for item in value:
                    rows.append([section, field, item])
            else:
                raise NotImplementedError
    elif isinstance(fields_and_values, list):
        for item_idx, item in enumerate(fields_and_values):
            if isinstance(item, dict):
                for field, value in item.items():
                    if isinstance(value, str):
                        rows.append([section, f"{field} {item_idx}", value])
                    else:
                        raise NotImplementedError
            else:
                raise NotImplementedError
    rows.append(["", "", ""])

In [None]:
rows_df = pd.DataFrame(rows, columns=["Section", "Field", "Value"])

In [None]:
rows_df["Section"] = rows_df["Section"].drop_duplicates(keep="first")
rows_df["Field"] = rows_df["Field"].drop_duplicates(keep="first")

rows_df.fillna("", inplace=True)
rows_df[["Section", "Field"]] = rows_df[["Section", "Field"]].applymap(
    lambda x: x.replace("_", " ").title()
)

In [None]:
rows_df

In [None]:
excel_fpath = json_fpath.with_suffix(".xlsx")
rows_df.to_excel(excel_fpath, index=False)

In [None]:
wb = openpyxl.load_workbook(excel_fpath)
ws = wb.active

In [None]:
ws.insert_rows(1)
ws.cell(row=1, column=1, value="GPT Generation")
ws.cell(row=1, column=5, value="Evaluation")

In [None]:
for idx, eval_heading in enumerate(
    [
        "Missed- Safety Critical",
        "Missed- Minor",
        "Added- Hallucination",
        "Added- Not relevant",
        "Explanation",
    ]
):
    ws.cell(row=2, column=5 + idx, value=eval_heading)

In [None]:
for c in ws["A"]:
    c.font = Font(bold=True, sz=11)
for cell in ws.iter_rows(min_row=2, max_row=2, values_only=True):
    for c in cell:
        c.font = Font(bold=True, sz=12)
for cell in ws.iter_rows(min_row=1, max_row=1, values_only=True):
    for c in cell:
        c.font = Font(bold=True, sz=14)

In [None]:
for column in ws.columns:
    ws.column_dimensions[column[0].column_letter].width = 40
ws.column_dimensions["C"].width = 80

In [None]:
for row in ws.iter_rows():
    for cell in row:
        cell.alignment = openpyxl.styles.Alignment(wrap_text=True)

In [None]:
wb.save(excel_fpath)