In [None]:
import pandas as pd
import duckdb
from pathlib import Path
import os
import io

popisy = Path("POPISY")
data = Path("data")

pd.set_option('display.max_rows', 500)

def human_readable(df: pd.DataFrame, map_file: str) -> str:
    import json

    map_path = popisy / map_file

    with open(map_path, "r") as file:
        mapping = json.loads(file.read())
        return df.rename(columns=mapping)


args = {"sep": ";", "encoding": "cp1250"}

material = pd.read_csv(data / "vykazy" / "vyk_23_material_new.csv", **args)
display(material.head())
# display(human_readable(material, "vyk_material.json").head())

vykony = pd.read_csv(data / "vykazy" / "vyk_23_vykony_new.csv", **args)
display(vykony.head())
# display(human_readable(vykony, "vyk_vykony.json").head())

vykpac = pd.read_csv(data / "vykazy" / "vyk_23_vykpac_new.csv", **args)
display(vykpac.head())
# display(human_readable(vykpac, "vyk_vykpac.json").head())

Unnamed: 0,SERIAL,CDOKL,DATUM,TYP,KOD,ZVL,LOKALIZACE,ODDELENI,MNOZSTVI,CENAUZIV,CENA,SERIALCDB,CISPAC
0,262,12590,25.01.2023 00:00,1,516,0,,10626,0.1,0.0,0.24,262,1570034
1,263,12590,25.01.2023 00:00,1,93746,0,,10626,0.2,0.0,23.46,263,1570034
2,264,12590,25.01.2023 00:00,3,48092,0,,10626,1.0,0.0,154.0,264,1570034
3,349,14330,07.01.2023 00:00,1,516,0,,10626,0.1,0.0,0.24,349,2104772
4,350,14330,07.01.2023 00:00,1,93746,0,,10626,0.1,0.0,11.73,350,2104772


Unnamed: 0,SERIAL,CDOKL,DATUM,KOD,ODBORNOST,DG,LOKALIZACE,ODDELENI,CAS,MNOZSTVI,BODY,CENAMAT,CENAUZIV,CENA,SERIALCDB,CISPAC
0,6198,6384,17.01.2023 00:00,42022,402.0,C719,,10626,,1,435.0,0.0,0.0,391.5,6198.0,213292
1,6199,6384,17.01.2023 00:00,9543,402.0,C719,,10626,,1,0.0,0.0,0.0,0.0,6199.0,213292
2,6200,6386,18.01.2023 00:00,9511,402.0,C719,,10626,,1,45.0,0.0,0.0,40.5,6200.0,213292
3,6317,6442,17.01.2023 00:00,42022,402.0,N47,,10837,,1,435.0,0.0,0.0,391.5,6317.0,1892691
4,6318,6442,17.01.2023 00:00,9543,402.0,N47,,10837,,1,0.0,0.0,0.0,0.0,6318.0,1892691


  vykpac = pd.read_csv(data / "vykazy" / "vyk_23_vykpac_new.csv", **args)


Unnamed: 0,CDOKL,POJCDOKL,CYEAR,CCHOR1,CIDENT,CISPAC,POJISTOVNA,CISLOPOJISTKY,PRIPOJISTENI,POHLAVI,...,OBDOBIDOKL,PLATCE,TYPSUBJEKTU,KODSUBJEKTU,SYSTEMUCETZALOZIL,SOURCESYSTEM,CISPOJUZIS,UNISFAKT1,DATODSOUHLUCET,KDOODSOUHLUCET
0,6384,6384,2023,45968574,7290224.0,213292,207,2230734.0,0,0,...,20230101,0,5,10626,3,3,,0,17.01.2023 18:37,
1,6386,6386,2023,45980138,7290261.0,213292,207,2230734.0,0,0,...,20230101,0,5,10626,3,3,,0,18.01.2023 15:08,
2,6442,6442,2023,45964330,,1892691,201,,0,0,...,20230101,0,5,10837,219,219,,0,20.01.2023 09:51,
3,6630,6630,2023,46044195,,209185,207,,0,0,...,20230101,0,5,10370,3,3,,0,26.01.2023 11:38,
4,6790,6790,2023,45907288,7290108.0,1640608,209,8082470.0,0,0,...,20230101,0,5,10626,3,3,,0,09.01.2023 23:50,


In [2]:
import re
from pathlib import Path
from typing import TypedDict


class AmbulanceEvent(TypedDict):
    seq: str
    customer: str
    sse: str
    ambnum: str
    payload: str


def get_event(path: Path, *, prefix: str) -> list[AmbulanceEvent]:
    chunks: list[AmbulanceEvent] = []
    with open(path, "r", encoding="cp1250") as file:
        buffer: str = ""
        last_match: list[str] = []

        for line in file:
            if line.strip().startswith("===") and line.count("/") == 2:
                match = re.match(
                    r"=== (\d+)\. (\d+) / ([a-zA-Z0-9]{10}) / (\d+)", line.strip()
                )
                if last_match:
                    _, serial, sse, ambnum = last_match
                    chunks.append(
                        {
                            "FILE": os.path.splitext(path.name)[0].split(prefix)[1],
                            "SERIAL": int(serial),
                            "RC": sse,
                            "AMBNUM": int(ambnum),
                            "CONTENT": buffer,
                        }
                    )

                buffer = ""
                last_match = match.groups()
                continue

            buffer += line
    return chunks


zpravy = []
for file in sorted((data / "PAC" / "AMBULATNÍ ZPRÁVA" / "2023").glob("*.Txt")):
    zpravy.extend(get_event(file, prefix="Events_"))

zpravy = pd.DataFrame(zpravy)
display(zpravy.head())

leave = []
for file in sorted((data / "PAC" / "PROPOUŠTĚCÍ ZPRÁVA" / "2023").glob("*.Txt")):
    leave.extend(get_event(file, prefix="ZavZprav_"))
    
leave = pd.DataFrame(leave)
display(leave.head())


Unnamed: 0,FILE,SERIAL,RC,AMBNUM,CONTENT
0,2023_01,1430073,640XXXXXXX,53835,MUDr. Pajdlhauser Roman\ndomluva na hosital CH...
1,2023_01,1430084,716XXXXXXX,58891,MUDr. Hanušová Zuzana\nC19 \nPravidelná kontro...
2,2023_01,1387088,660XXXXXXX,55647,MUDr. Urie Petr\nC051 \nVýška: 170cm Váha: ...
3,2023_01,1423870,685XXXXXXX,26033,MUDr. Nohejlová Medková Anna\nC493 \nC810 \n3...
4,2023_01,1399503,660XXXXXXX,50413,MUDr. Urie Petr\nC162 \nVýška: 165cm Váha:...


Unnamed: 0,FILE,SERIAL,RC,AMBNUM,CONTENT
0,2023_01,275828,665XXXXXXX,59047,DŮVOD PŘIJETÍ K HOSPITALIZACI: \nPacientku při...
1,2023_01,275913,835XXXXXXX,50775,DŮVOD PŘIJETÍ K HOSPITALIZACI: \npřijata k apl...
2,2023_01,276472,560XXXXXXX,8460,DŮVOD PŘIJETÍ K HOSPITALIZACI: \npřijat k apli...
3,2023_01,276172,990XXXXXXX,59522,DŮVOD PŘIJETÍ K HOSPITALIZACI: \nzpomalené psy...
4,2023_01,277031,560XXXXXXX,54409,DŮVOD PŘIJETÍ K HOSPITALIZACI: \nPacient přija...


In [3]:
def read_txtevent_file(path: Path, *, with_header: bool = False) -> str:
    with open(path, "r", encoding="cp1250") as file:
        file_prefix = path.name.split(".")[0].split("TXTEVENT_")[1]
        return "".join(
            [
                (file_prefix if enum > 0 else "FILE") + ";" + line
                for enum, line in enumerate(file)
                if with_header or enum > 0
            ]
        )


lines = ""
for i, map_file in enumerate(sorted((data / "PAC" / "DOKUMENTACE" / "2023").glob("*.csv"))):
    lines += read_txtevent_file(map_file, with_header=i == 0)

dokumentace = pd.read_csv(io.StringIO(lines), sep=";")
dokumentace.head()

Unnamed: 0,FILE,POR,UDALOST_CISLO,UDALOST,DATUM,ODDEL,LEKAR,DATUM_CAS,DIAGNOZA,DATUM_OD,...,HODNOCENI,JMENO,RC,AMBNUM,CISPAC,PSC,DATUM_NAR,VEKR,VEKM,VEKD
0,2023_01,1,2,RATO,20230125,10248,22217,20230125_101800,,00010101_000000,...,0,,640XXXXXXX,53835,2124171,11000,10101,58,8,14
1,2023_01,2,5,RATP,20230125,10370,93786,20230125_102500,C19,00010101_000000,...,0,,716XXXXXXX,58891,2259382,28601,10101,51,3,14
2,2023_01,3,5,RATP,20230116,10370,22298,20230116_110000,C051,00010101_000000,...,0,,660XXXXXXX,55647,2193508,27201,10101,56,9,29
3,2023_01,4,5,RATP,20230104,10452,22422,20230104_112500,"C493, C810",00010101_000000,...,0,,685XXXXXXX,26033,508766,16000,10101,54,6,19
4,2023_01,5,5,RATP,20230105,10370,22298,20230105_110000,C162,00010101_000000,...,0,,660XXXXXXX,50413,298937,10000,10101,56,10,13


In [4]:
vazby = pd.read_csv(data / "PAC" / "events_date_2023.csv", sep=";")
vazby.head()


Unnamed: 0,SERIAL,AMBNUM,DATFR,DEPARTM,WHO
0,1387088,55647,16.01.2023 11:00,10370,22298
1,1399503,50413,05.01.2023 11:00,10370,22298
2,1399525,56795,12.01.2023 09:45,10370,22298
3,1407661,14010,17.01.2023 06:42,10370,22161
4,1408001,23454,26.01.2023 09:12,10452,22161


In [10]:
import datetime

not_found = 0
total = 1024

for d in range(total):
    item_zpravy = zpravy.iloc[d]

    item_vazby = vazby[
        (vazby["SERIAL"] == item_zpravy["SERIAL"])
        & (vazby["AMBNUM"] == item_zpravy["AMBNUM"])
    ]
    ## Jsem schopen přiřadit datum pro jednotlivé ambulantní zprávy
    # display(item_vazby)
    if len(item_vazby) == 0:
        not_found += 1
        continue

    item_vazby_dokumentace_date = datetime.datetime.strftime(
        pd.to_datetime(item_vazby["DATFR"], format="%d.%m.%Y %H:%M").squeeze(),
        "%Y%m%d",
    )
    item_vazby_vyk_date = datetime.datetime.strftime(
        pd.to_datetime(item_vazby["DATFR"], format="%d.%m.%Y %H:%M").squeeze(),
        "%d.%m.%Y 00:00",
    )

    item_dokumentace = dokumentace[
        (dokumentace["AMBNUM"] == item_zpravy["AMBNUM"])
        & (dokumentace["DATUM"] == int(item_vazby_dokumentace_date))
    ]
    # display(item_dokumentace)

    cispac = next(x for x in item_dokumentace["CISPAC"])

    zpravy_material = material[
        (material["CISPAC"] == cispac) & (material["DATUM"] == item_vazby_vyk_date)
    ]
    zpravy_vykony = vykony[
        (vykony["CISPAC"] == cispac) & (vykony["DATUM"] == item_vazby_vyk_date)
    ]

    zpravy_vykpac = vykpac[vykpac["CISPAC"] == cispac]

    if len(zpravy_material) == 0 and len(zpravy_vykony) == 0:
        if not zpravy_vykpac.empty:
            not_found += 1

print(f"Not found: {not_found} / {total}")


Not found: 288 / 1024


from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import pandas as pd
import datetime
import nest_asyncio
import uvicorn
from fastapi.middleware.cors import CORSMiddleware

app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Allows all origins
    allow_credentials=True,
    allow_methods=["*"],  # Allows all methods
    allow_headers=["*"],  # Allows all headers
)


class PatientDataRequest(BaseModel):
    iloc: int


@app.get("/get_patient_data/{iloc}")
async def get_patient_data(iloc: int):
    try:
        item_zpravy = zpravy.iloc[iloc]
    except IndexError:
        raise HTTPException(status_code=404, detail="Patient data not found")

    item_vazby = vazby[
        (vazby["SERIAL"] == item_zpravy["SERIAL"])
        & (vazby["AMBNUM"] == item_zpravy["AMBNUM"])
    ]

    item_vazby_dokumentace_date = datetime.datetime.strftime(
        pd.to_datetime(item_vazby["DATFR"], format="%d.%m.%Y %H:%M").squeeze(),
        "%Y%m%d_%H%M00",
    )
    item_vazby_vyk_date = datetime.datetime.strftime(
        pd.to_datetime(item_vazby["DATFR"], format="%d.%m.%Y %H:%M").squeeze(),
        "%d.%m.%Y 00:00",
    )

    item_dokumentace = dokumentace[
        (dokumentace["AMBNUM"] == item_zpravy["AMBNUM"])
        & (dokumentace["DATUM_CAS"] == item_vazby_dokumentace_date)
    ]

    cispac = next(x for x in item_dokumentace["CISPAC"])

    material_data = material[
        (material["CISPAC"] == cispac) & (material["DATUM"] == item_vazby_vyk_date)
    ]
    vykony_data = vykony[
        (vykony["CISPAC"] == cispac) & (vykony["DATUM"] == item_vazby_vyk_date)
    ]
    vykpac_data = vykpac[vykpac["CISPAC"] == cispac]

    response = {
        "zpravy_content": item_zpravy["CONTENT"],
        "vazby": item_vazby.fillna("").to_dict(orient="records"),
        "dokumentace": item_dokumentace.fillna("").to_dict(orient="records"),
        "material": material_data.fillna("").to_dict(orient="records"),
        "vykony": vykony_data.fillna("").to_dict(orient="records"),
        "vykpac": vykpac_data.fillna("").to_dict(orient="records"),
    }

    return response


if __name__ == "__main__":
    nest_asyncio.apply()
    uvicorn.run(app, host="0.0.0.0", port=9183)
