# Load all the pig dicom data and convert them to nifti

In [3]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
import io3d
from pathlib import Path
from loguru import logger
from pprint import pprint, pformat
import pandas as pd
import re
import numpy as np
import json
import tqdm

logger.enable("io3d")
force = True
# force = False

recreate_meta = False
recreate_meta = True
base_path = Path(r"H:\biomedical\orig\pilsen_pigs_all\transplantation_dicom")
base_path = Path(r"~/Downloads/_temp/").expanduser()  # used when the zip files are downloaded manually
raw_path = Path(r"H:\biomedical\orig\pilsen_pigs_all\transplantation_nii_transposed")
# transposed_path = Path(r"H:\biomedical\orig\pilsen_pigs_all\transplantation_nii_transposed")
output_path = Path(r"H:\biomedical\orig\pilsen_pigs")

assert base_path.exists()

# Convert all to nii

In [5]:

def touch_file(pth:Path):
    try:
        with open(pth, "rb") as f:
            # read just some part of the file
            f.read(1)
    except Exception as e:
        import traceback
        print(f"Error in touching file {pth}: {e}")
        traceback.print_exc()

In [6]:
from typing import Union

def get_projection(
        datap:io3d.image.DataPlus, axis:Union[int,str], method:str="max"
):
    """Get projection of 3D data to 2D."""
    if isinstance(axis, str):
        dict_axis = {"axial": 0, "coronal": 1, "sagittal": 2}
        if axis in dict_axis:
            axis = dict_axis[axis]
        else:
            raise ValueError(f"Unknown axis {axis}, use one of {list(dict_axis.keys())} or 0, 1, 2")

    data3d = datap.data3d
    axcodes = datap.orientation_axcodes
    data3d = io3d.image.transform_orientation(data3d, axcodes, "SPL")
    if method == "max":
        data2d = data3d.max(axis=axis)
    elif method == "mean":
        data2d = data3d.mean(axis=axis)
    else:
        raise ValueError(f"Unknown method {method}")
    return data2d



In [7]:
# data2d = get_projection(datap, 0, "max")
# from matplotlib import pyplot as plt
# plt.style.use('classic')
# plt.imshow(data2d, cmap="gray_r")
# plt.colorbar()
# plt.style.available


In [9]:
# fnlist = list(base_path.glob("*Tx0*D_V*"))
fnlist = sorted(list(base_path.glob("*Tx0*D_A*")) + list(base_path.glob("*Tx0*D_V*")))[::-1]

print(f"Number of files: {len(fnlist)}")
from joblib import Parallel, delayed
import tqdm
#
for fn in tqdm.tqdm(fnlist):

    # take all files in parallel in the fn directory and open them and close, to intiate cloud syncronization
    # import Parallel, delayed

    # logger.info(fn)
    fn_in = fn
    # fn_out = raw_path / fn.name / f"{fn.name}.mhd"
    fn_out = raw_path / fn.name / f"{fn.name}.nii.gz"
    fn_out.parent.mkdir(parents=True, exist_ok=True)
    fn_meta = fn_out.parent / "meta.json"
    if force or (not fn_out.exists()):

        try:
            tqdm.tqdm.write(f"Reading {fn_in} and writing {fn_out}")
            fn_fns = sorted(list(fn.glob("*")))
            tqdm.tqdm.write(f"Number of files in the directory: {len(fn_fns)}")
            # Parallel(n_jobs=2)(delayed(touch_file)(fn) for fn in tqdm.tqdm(fn_fns, desc="touching files"))

            axcodes = "IPL"
            # logger.debug(f"Reading {fn_in} with axcodes={axcodes}")
            datap = io3d.read(fn_in, series_number="first", orientation_axcodes=axcodes)
            # logger.debug(datap.keys())
            io3d.write(datap, fn_out)
            # logger.debug("writing done, creating projections")
            for axis in ["axial", "coronal", "sagittal"]:
                data2d = get_projection(datap, axis, "max")
                import skimage.io
                # change intensity to range 0..1

                data2d = (255 * (data2d - np.min(data2d)).astype(float) / (np.max(data2d) - np.min(data2d))).astype(np.uint8)
                skimage.io.imsave(fn_out.parent / f"{fn_out.stem}_{axis}.jpg", data2d)
            # logger.debug("projections done")
        except Exception as e:
            import traceback
            logger.error(f"Error in reading {fn_in}: {e}")
            traceback.print_exc()
            logger.debug(f"shape={datap.data3d.shape}, {datap.orientation_axcodes}")
        # with open(fn_meta, "w") as f:
        #     json.dump(dict(row), f)





Number of files: 14


  0%|          | 0/14 [00:00<?, ?it/s]

Reading C:\Users\Jirik\Downloads\_temp\Tx030D_Ven and writing H:\biomedical\orig\pilsen_pigs_all\transplantation_nii_transposed\Tx030D_Ven\Tx030D_Ven.nii.gz
Number of files in the directory: 1066


  7%|▋         | 1/14 [00:22<04:48, 22.20s/it]

Reading C:\Users\Jirik\Downloads\_temp\Tx030D_Art and writing H:\biomedical\orig\pilsen_pigs_all\transplantation_nii_transposed\Tx030D_Art\Tx030D_Art.nii.gz
Number of files in the directory: 678


 14%|█▍        | 2/14 [00:32<03:00, 15.06s/it]

Reading C:\Users\Jirik\Downloads\_temp\Tx029D_Ven and writing H:\biomedical\orig\pilsen_pigs_all\transplantation_nii_transposed\Tx029D_Ven\Tx029D_Ven.nii.gz
Number of files in the directory: 921


 21%|██▏       | 3/14 [00:47<02:44, 14.95s/it]

Reading C:\Users\Jirik\Downloads\_temp\Tx029D_Art and writing H:\biomedical\orig\pilsen_pigs_all\transplantation_nii_transposed\Tx029D_Art\Tx029D_Art.nii.gz
Number of files in the directory: 656


 29%|██▊       | 4/14 [00:58<02:13, 13.34s/it]

Reading C:\Users\Jirik\Downloads\_temp\Tx028D_Ven and writing H:\biomedical\orig\pilsen_pigs_all\transplantation_nii_transposed\Tx028D_Ven\Tx028D_Ven.nii.gz
Number of files in the directory: 1020


 36%|███▌      | 5/14 [01:14<02:10, 14.51s/it]

Reading C:\Users\Jirik\Downloads\_temp\Tx028D_Art and writing H:\biomedical\orig\pilsen_pigs_all\transplantation_nii_transposed\Tx028D_Art\Tx028D_Art.nii.gz
Number of files in the directory: 680


 43%|████▎     | 6/14 [01:25<01:45, 13.24s/it]

Reading C:\Users\Jirik\Downloads\_temp\Tx027D_Ven and writing H:\biomedical\orig\pilsen_pigs_all\transplantation_nii_transposed\Tx027D_Ven\Tx027D_Ven.nii.gz
Number of files in the directory: 990


 50%|█████     | 7/14 [01:40<01:37, 13.98s/it]

Reading C:\Users\Jirik\Downloads\_temp\Tx027D_Art and writing H:\biomedical\orig\pilsen_pigs_all\transplantation_nii_transposed\Tx027D_Art\Tx027D_Art.nii.gz
Number of files in the directory: 711


 57%|█████▋    | 8/14 [01:51<01:18, 13.04s/it]

Reading C:\Users\Jirik\Downloads\_temp\Tx026D_Ven and writing H:\biomedical\orig\pilsen_pigs_all\transplantation_nii_transposed\Tx026D_Ven\Tx026D_Ven.nii.gz
Number of files in the directory: 1124


 64%|██████▍   | 9/14 [02:11<01:15, 15.04s/it]

Reading C:\Users\Jirik\Downloads\_temp\Tx026D_Art and writing H:\biomedical\orig\pilsen_pigs_all\transplantation_nii_transposed\Tx026D_Art\Tx026D_Art.nii.gz
Number of files in the directory: 711


 71%|███████▏  | 10/14 [02:23<00:57, 14.27s/it]

Reading C:\Users\Jirik\Downloads\_temp\Tx025D_Ven and writing H:\biomedical\orig\pilsen_pigs_all\transplantation_nii_transposed\Tx025D_Ven\Tx025D_Ven.nii.gz
Number of files in the directory: 678


 79%|███████▊  | 11/14 [02:35<00:39, 13.29s/it]

Reading C:\Users\Jirik\Downloads\_temp\Tx025D_Art and writing H:\biomedical\orig\pilsen_pigs_all\transplantation_nii_transposed\Tx025D_Art\Tx025D_Art.nii.gz
Number of files in the directory: 883


 86%|████████▌ | 12/14 [02:50<00:27, 13.80s/it]

Reading C:\Users\Jirik\Downloads\_temp\Tx024D_Ven and writing H:\biomedical\orig\pilsen_pigs_all\transplantation_nii_transposed\Tx024D_Ven\Tx024D_Ven.nii.gz
Number of files in the directory: 1067


 93%|█████████▎| 13/14 [03:06<00:14, 14.55s/it]

Reading C:\Users\Jirik\Downloads\_temp\Tx024D_Art and writing H:\biomedical\orig\pilsen_pigs_all\transplantation_nii_transposed\Tx024D_Art\Tx024D_Art.nii.gz
Number of files in the directory: 340


100%|██████████| 14/14 [03:11<00:00, 13.67s/it]


In [None]:
# import tqdm
# import time
# for i in tqdm.tqdm(range(100)):
#     #wait 0.1s
#     time.sleep(0.1)
    

In [None]:
metafn = base_path / "meta.csv"

if metafn.exists():
    df = pd.read_csv(metafn)
else:
    df = pd.DataFrame()


In [None]:
def create_meta(base_path, metafn):
    data = {
        "dirname": [],
        "dataset_type": [],
        "id": [],
    }
    i_train = 0
    i_test = 0
    i_val = 0
    fnlist = base_path.glob("*Tx0*D_V*")
    logger.info(pformat(list(map(str, list(fnlist)))))
    fnlist = base_path.glob("*Tx0*D_V*")
    for fn in fnlist:
        ia = int(re.findall(r"([0-9]+)", fn.name)[0])
        ii = None
        tp = None
        if (ia % 4) == 0:
            i_test += 1
            ii = i_test
            tp = "test"
        elif (ia % 2) == 0:
            i_val += 1
            ii = i_val
            tp = "val"
        else:
            i_train += 1
            ii = i_train
            tp = "train"

        data["dirname"].append(fn.name)
        data["dataset_type"].append(tp)
        data["id"].append(ii)

    df = pd.DataFrame(data)
    df.to_csv(metafn, index=None)



In [None]:
if recreate_meta:
    create_meta(base_path, metafn)


In [None]:

logger.debug(metafn.exists())
df = pd.read_csv(metafn)
print(df)


In [None]:
# fnlist = base_path.glob("*debug*")
# fnlist = base_path.glob("*Tx041D_V*")


# for i, row in tqdm.tqdm(df.iterrows()):
#     fn_in = base_path / row["dirname"]
#     fn_out = (
#             output_path
#             / row["dataset_type"]
#             / f"PP_{row['id']:04}"
#             / "PATIENT_DICOM"
#             / f"PP_{row['id']:04}.mhd"
#     )
#     fn_meta = fn_out.parent / "meta.json"
#     logger.debug(fn_in)
#     logger.debug(fn_out)
#
#     fn_out.parent.mkdir(parents=True, exist_ok=True)
#     if (
#             not fn_meta.exists()
#     ):  # we do not need to read the data again if everything is done. We are checkin:w
#
#         tqdm.tqdm.write(f"Reading {fn_in} and writing {fn_out}")
#         datap = io3d.read(fn_in)
#         io3d.write(datap, fn_out)
#         with open(fn_meta, "w") as f:
#             json.dump(dict(row), f)
