In [None]:
import pandas as pd
import os

In [None]:
path = "/home/honza/Downloads/Re_KPD_-_April_2024_-_Data_Entry_new"
txm_event_name = "45-TXM-2024-04"
# starting_pattern = "W-"
starting_pattern = "KPD2024_2024_4_10_"
delimiter=";"

In [None]:

acg_for_zero = ["A","B", "O"]
acg_for_other = ["A","B", "O", "AB"]
luminex_col = "RawData"

In [None]:
donors = [
    {
        'medical_id': row.DonorID,
        'blood_group': row.DonorBloodType,
        'hla_typing': row.DonorTyping.split(","),
        'donor_type': "DONOR",

        'related_recipient_medical_id': row.RecipientID
    }
    for _, row in dff.iterrows()
]


recipients = [
    {
        'medical_id': row.RecipientID,
        'blood_group': row.BloodType,
        'hla_typing': row.RecipientTyping.split(","),
        'year_of_birth': int(row.DOB.split(".")[-1]),
        'sex': row.Gender if not pd.isna(row.Gender) else None,
        # just putting fake acceptable blood groups as this will be filled in UI
        'acceptable_blood_groups': [],
        "hla_antibodies": [
            {
                "mfi": antibody[1],
                "name": antibody[0],
                "cutoff": 2000
            }
            for antibody in row.RecipientAntibodies
        ]
    }
    for _, row in dff.iterrows()
]

In [None]:
infos = []
donors = []
recipients = []
for i in os.listdir(path):
    if i.startswith(starting_pattern) and i.endswith(".csv"):
        print(f"processing file {i}")
        file_path = os.path.join(path, i)
        with open(file_path) as f:
            first_line = f.readline()
            if ";" in first_line:
                delimiter = ";"
            elif "," in first_line:
                delimiter = ","
            elif "\t" in first_line:
                delimiter = "\t"
            else:
                raise ValueError
                
        print(f"First line is: '{first_line[:20]}' assuming delimiter '{delimiter}'")
        df_one_patient = pd.read_csv(file_path, delimiter=delimiter)
        df_one_patient["FamilyID"] = df_one_patient["Disease"]
        df_donor = df_one_patient[["DonorID","DonorTyping"]].drop_duplicates()
        df_recipient = df_one_patient[["FamilyID","BloodType","PatientTyping","Gender", "DOB"]].drop_duplicates()
        assert len(df_donor) == 1
        assert len(df_recipient) == 1
        
        antibodies_df = (df_one_patient[["Specificity",luminex_col]]
         .assign(hla_code = lambda df: df.Specificity.str.split(",").apply(lambda x: [y for y in x if y!="-"]))
         .loc[lambda df: df.hla_code.apply(len)>0]
        )

        antibodies=[]
        for _, row in antibodies_df.iterrows():
            if len(row.hla_code) == 1:
                antibodies.append((row.hla_code[0], int(float(row[luminex_col].replace(",",".")))))
            elif len(row.hla_code) == 2:
                hlas = row.hla_code
                start_code = hlas[0][0:2]
                if start_code in {"DP", "DQ"}:
                    first_number_code = hlas[0].split("*")[-1]
                    second_number_code = hlas[1].split("*")[-1]
                    antibodies.append((
                        f"{start_code}[{first_number_code},{second_number_code}]",
                        int(float(row[luminex_col].replace(",",".")))
                    ))
                else:
                    raise ValueError(f"Unexpected start of hla code that has multiple values per row: {hlas[0][0:2]}")
            else:
                raise ValueError(f"Unexpected number of hla codes on one row: {len(row.hla_code)}")
        raw_typing = df_recipient.PatientTyping.iloc[0].split(",")
        typing=[]
        # clear cw codes and make them C
        for hla_code in raw_typing:
            hla_code = hla_code.strip()
            if hla_code.startswith("Cw"):
                hla_code = hla_code.replace("Cw","C")
            typing.append(hla_code)

        recipients.append(
            {
                'medical_id': df_recipient.FamilyID.iloc[0],
                'blood_group': df_recipient.BloodType.iloc[0],
                'hla_typing': typing,
                'year_of_birth': int(df_recipient.DOB.iloc[0].split(".")[-1]),
                'sex': df_recipient.Gender.iloc[0] if not pd.isna(df_recipient.Gender.iloc[0]) else None,
                # just putting fake acceptable blood groups as this will be filled in UI
                'acceptable_blood_groups': [],
                "hla_antibodies": [
                    {
                        "mfi": antibody[1],
                        "name": antibody[0],
                        "cutoff": 2000
                    }
                    for antibody in antibodies
                ]
            }
        )
        donors.append(
                    {
                'medical_id': df_donor.DonorID.iloc[0],
                'blood_group': "A",
                'hla_typing': df_donor.DonorTyping.iloc[0].split(","),
                'donor_type': "DONOR",

                'related_recipient_medical_id': df_recipient.FamilyID.iloc[0]
            }
        )


In [None]:
print(f"pocet prijemcu: {len(recipients)}, pocet donoru {len(donors)}")

import json
print(json.dumps(recipients[0],indent=4) )
print(json.dumps(donors[-1],indent=4) )

In [None]:
input_json = {
    "country": "AUT",
    "txm_event_name": txm_event_name,
    "donors": donors,
    "recipients": recipients
}

In [None]:
import json
with open(os.path.join(path, "wien_export.json"), 'w') as f:

    json.dump(input_json, f)