In [2]:
import ast
import json
import os
import shutil

import albumentations
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pydicom as dicom
import tensorflow as tf
import tensorflow_addons as tfa
import torch
from IPython.core.interactiveshell import InteractiveShell
from PIL import Image
from sklearn.model_selection import StratifiedKFold
from tensorflow import keras
from tqdm import tqdm

InteractiveShell.ast_node_interactivity = "all"
import re
from itertools import chain

from pydicom.pixel_data_handlers.util import apply_voi_lut

In [3]:
class Test:
    def prepare_data(
        data_path, path_to_save, train_mode=True, n_train_sample=10, voi_lut=False
    ):
        if not os.path.exists(path_to_save):
            os.mkdir(path_to_save)
        test_df = pd.DataFrame()
        if train_mode:
            p = os.listdir(data_path)[:n_train_sample]
        else:
            p = os.listdir(data_path)
        for folder1 in tqdm(p):
            for folder2 in os.listdir(data_path + folder1):
                path = os.path.join(data_path, folder1, folder2)
                for img_file in os.listdir(path):
                    img_path = os.path.join(path, img_file)
                    test_df.loc[img_file, "path"] = img_path
                    data_file = dicom.dcmread(img_path)
                    test_df.loc[img_file, "StudyInstanceUID"] = folder1
                    test_df.loc[img_file, "id_image"] = img_file[:-4]
                    test_df.loc[img_file, "modality"] = data_file.Modality
                    test_df.loc[img_file, "PatientSex"] = data_file.PatientSex
                    test_df.loc[
                        img_file, "BodyPartExamined"
                    ] = data_file.BodyPartExamined
                    test_df.loc[
                        img_file, "PhotometricInterpretation"
                    ] = data_file.PhotometricInterpretation
                    test_df.loc[img_file, "width"] = data_file.pixel_array.shape[1]
                    test_df.loc[img_file, "height"] = data_file.pixel_array.shape[0]
                    if voi_lut:
                        img = apply_voi_lut(data_file.pixel_array, data_file)
                    else:
                        img = data_file.pixel_array
                    if data_file.PhotometricInterpretation == "MONOCHROME1":
                        img = img.max() - img
                    img = (img - img.min()) / (img.max() - img.min())
                    img = (np.array(img) * 255).astype("uint8")
                    img = np.stack([img, img, img], axis=-1)
                    img = Image.fromarray(img)
                    img.save(path_to_save + img_file[:-4] + ".jpg")
        test_df = test_df.reset_index(drop=True)
        test_df["path"] = test_df["path"].str.replace("\../", "/kaggle/")
        return test_df

    def make_classification(
        test_df_,
        jpg_path,
        metadata_path,
        model_path,
        thres=0.4,
        voi_lut=True,
        obj_det=True,
        classification=True,
        img_size=600,
        img_from_folder=True,
    ):
        sub_study = pd.DataFrame(columns=["id", "PredictionString"])
        gen = Generator(
            df=test_df_,
            img_size=img_size,
            jpg_path=jpg_path,
            metadata_path=metadata_path,
            voi_lut=voi_lut,
            img_from_folder = img_from_folder,
            batch_size=1,
        )
        print(gen.__getitem__(2)["img"].shape)
        plt.imshow(gen.__getitem__(1)["img"][0])
        plt.xticks([])
        plt.yticks([])
        plt.show()

        # check only object detection score
        if not classification:
            for ix in test_df_.StudyInstanceUID.unique().tolist():
                sub_study = sub_study.append(
                    {
                        "id": ix + "_study",
                        "PredictionString": "negative 1 0 0 1 1 typical 1 0 0 1 1 indeterminate 1 0 0 1 1 atypical 1 0 0 1 1",
                    },
                    ignore_index=True,
                )
        # classification
        else:
            eff_models = []
            for file in os.listdir(model_path):
                if ".h5" in file:
                    eff_models.append(keras.models.load_model(model_path + file))

            test_df_.study_pred = np.nan
            count_thres = len(eff_models) // 2 + 1

            for ix in tqdm(test_df_.index.tolist()):
                data = gen.__getitem__(ix)
                predictions = []
                for i in range(len(eff_models)):
                    pred = eff_models[i].predict(data)[0]
                    test_df_.loc[ix, "negative"] = np.round(pred[0], 5)
                    test_df_.loc[ix, "typical"] = np.round(pred[1], 5)
                    test_df_.loc[ix, "indeterminate"] = np.round(pred[2], 5)
                    test_df_.loc[ix, "atypical"] = np.round(pred[3], 5)
                    predictions.extend([labels[i] for i in range(4) if pred[i] > thres])
                test_df_.loc[ix, "study_pred"] = " ".join(
                    [
                        x
                        for x in list(set(predictions))
                        if predictions.count(x) >= count_thres
                    ]
                )
            test_df_["study_pred"] = test_df_["study_pred"].replace("", "negative")
            test_df_["study_pred"] = test_df_["study_pred"].fillna("negative")

            # PredictionString in format 'label threshold 0 0 1 1'
            groupped_df = test_df_.groupby("StudyInstanceUID")[
                ["negative", "typical", "indeterminate", "atypical"]
            ].mean()
            for ix in groupped_df.index.tolist():
                predictions = list(map(str, (np.round(groupped_df.loc[ix].values, 5))))
                pred = " ".join(
                    [
                        labels[i] + " " + predictions[i] + " 0 0 1 1"
                        for i in range(len(labels))
                    ]
                )
                sub_study = sub_study.append(
                    {"id": ix + "_study", "PredictionString": pred}, ignore_index=True
                )

        # check only classification score
        if not obj_det:
            sub_image = pd.DataFrame(columns=["id", "PredictionString"])
            for img_name in test_df_.id_image.tolist():
                sub_image = sub_image.append(
                    {"id": img_name + "_image", "PredictionString": "none 1 0 0 1 1"},
                    ignore_index=True,
                )
            return sub_study, sub_image, test_df_
        return sub_study, test_df_

    def make_bbox_df(test_df_, SAVE_BBOX_PATH):
        sub_image = pd.DataFrame(columns=["id", "PredictionString"])
        for file in os.listdir(SAVE_BBOX_PATH + "labels/"):
            img_name = file[:-4]
            w = test_df_[test_df_["id_image"] == img_name]["width"].values[0]
            h = test_df_[test_df_["id_image"] == img_name]["height"].values[0]
            with open(SAVE_BBOX_PATH + "labels/" + file, "r") as f:
                ls = f.read()
                ls = re.sub(r"[\n]", " ", ls).strip().split()
                ls = list(map(float, ls))
                list_pred = []
                for i in range(0, len(ls), 6):
                    x_c = ls[i + 1]
                    y_c = ls[i + 2]
                    w_p = ls[i + 3]
                    h_p = ls[i + 4]
                    conf = ls[i + 5]
                    xmin = int((x_c - w_p / 2) * w)
                    ymin = int((y_c - h_p / 2) * h)
                    xmax = int((x_c + w_p / 2) * w)
                    ymax = int((y_c + h_p / 2) * h)
                    list_pred.extend(["opacity", conf, xmin, ymin, xmax, ymax])
                sub_image = sub_image.append(
                    {
                        "id": img_name + "_image",
                        "PredictionString": " ".join(list(map(str, list_pred))),
                    },
                    ignore_index=True,
                )

        for img_name in test_df_.id_image.tolist():
            if img_name + "_image" not in sub_image.id.tolist():
                sub_image = sub_image.append(
                    {"id": img_name + "_image", "PredictionString": "none 1 0 0 1 1"},
                    ignore_index=True,
                )
        return sub_image

In [5]:
test_df = Test.prepare_data(
    data_path="/app/_data/test/",
    path_to_save="/app/_data/tmp/",
    voi_lut=True,
    train_mode=True,
)

100%|██████████| 10/10 [00:05<00:00,  1.86it/s]
  test_df["path"] = test_df["path"].str.replace("\../", "/kaggle/")


In [8]:
class Generator(keras.utils.Sequence):
    def __init__(
        self,
        df,
        img_size,
        jpg_path,
        metadata_path,
        img_from_folder=True,
        voi_lut=False,
        batch_size=1,
    ):
        self.df = df.reset_index(drop=True)
        self.batch_size = batch_size
        self.img_size = img_size
        self.voi_lut = voi_lut
        self.jpg_path = jpg_path
        self.metadata_path = metadata_path
        self.img_from_folder = img_from_folder

    def __len__(self):
        return self.df.shape[0] // self.batch_size

    def img_from_dicom(self, img_path, img_type):
        data_file = dicom.dcmread(img_path)
        if self.voi_lut:
            img = apply_voi_lut(data_file.pixel_array, data_file)
        else:
            img = data_file.pixel_array
        if img_type == "MONOCHROME1":
            img = img.max() - img
        img = (img - img.min()) / (img.max() - img.min())
        img = (np.array(img) * 255).astype("uint8")
        img = np.stack([img, img, img], axis=-1)
        img = tf.image.resize(
            img,
            (self.img_size, self.img_size),
        )
        img = tf.cast(img, tf.uint8)
        return img.numpy()

    def make_img(self, img_path, img_type):
        img_name = img_path.split("/")[-1].split(".dcm")[0]
        if self.img_from_folder:
            try:
                img = tf.io.read_file(self.jpg_path + img_name + ".jpg")
                img = tf.image.decode_jpeg(img, channels=3)
                img = tf.image.resize(
                    img,
                    (self.img_size, self.img_size),
                )
                img = tf.cast(img, tf.uint8)
                img = img.numpy()
            except:
                img = self.img_from_dicom(img_path, img_type)
        else:
            img = self.img_from_dicom(img_path, img_type)

        return img

    def _get_one(self, ix):
        img_name = self.df.loc[ix, "id_image"]
        img_path = self.df.loc[ix, "path"]
        modality = self.df.loc[ix, "modality"]
        PatientSex = self.df.loc[ix, "PatientSex"]
        body_part = self.df.loc[ix, "BodyPartExamined"]
        img_type = self.df.loc[ix, "PhotometricInterpretation"]
        img = self.make_img(img_path, img_type)
        x = {}
        with open(self.metadata_path, "r") as f:
            dict_metadata = json.load(f)

        patient_sex_x = np.zeros(len(dict_metadata["PatientSex"]), dtype="uint8")
        body_part_x = np.zeros(len(dict_metadata["BodyPartExamined"]), dtype="uint8")
        modality_x = np.zeros(len(dict_metadata["PatientSex"]), dtype="uint8")

        if PatientSex in dict_metadata["PatientSex"].keys():
            patient_sex_x[dict_metadata["PatientSex"][PatientSex]] = 1
        else:
            patient_sex_x[dict_metadata["PatientSex"]["unknown"]] = 1
        if body_part in dict_metadata["BodyPartExamined"].keys():
            body_part_x[dict_metadata["BodyPartExamined"][body_part]] = 1
        else:
            body_part_x[dict_metadata["BodyPartExamined"]["unknown"]] = 1
        if modality in dict_metadata["modality"].keys():
            modality_x[dict_metadata["modality"][modality]] = 1
        else:
            modality_x[dict_metadata["modality"]["unknown"]] = 1
        x["img"] = img
        x["data"] = np.concatenate([patient_sex_x, body_part_x, modality_x])
        y = np.zeros(4, dtype="uint8")
        return x, y

    def __getitem__(self, batch_ix):

        x, y = {}, []
        b_x_img = []
        b_x_data = []
        for i in range(self.batch_size):
            x_dict, y_ = self._get_one(i + self.batch_size * batch_ix)
            b_x_img.append(x_dict["img"])
            b_x_data.append(x_dict["data"])
            y.append(y_)
        x["img"] = np.array(b_x_img)
        x["data"] = np.array(b_x_data)
        y = np.array(y)

        return x

In [9]:
test_df

Unnamed: 0,path,StudyInstanceUID,id_image,modality,PatientSex,BodyPartExamined,PhotometricInterpretation,width,height
0,/app/_data/test/fe64182ae21d/d56579abcb25/a82c...,fe64182ae21d,a82ca8f37fb6,CR,F,CHEST,MONOCHROME2,4248.0,3480.0
1,/app/_data/test/6117058c3931/d51d2a5e7f7a/b74f...,6117058c3931,b74f81d65e79,DX,M,CHEST,MONOCHROME2,3032.0,3032.0
2,/app/_data/test/b83eaac8a377/326c25201eea/1c13...,b83eaac8a377,1c13336fc8a9,DX,M,CHEST,MONOCHROME2,3032.0,2520.0
3,/app/_data/test/9fab41ffbc39/5b8d372f6714/fa0c...,9fab41ffbc39,fa0c84ee4577,CR,F,CHEST,MONOCHROME1,4240.0,3480.0
4,/app/_data/test/4e4ee0341fab/70ae5f9ad500/dc68...,4e4ee0341fab,dc6834a1efa6,CR,F,CHEST,MONOCHROME2,2621.0,2597.0
5,/app/_data/test/5a74a91d9877/25998b541988/eff9...,5a74a91d9877,eff9f15c7e9b,DX,M,TORAX,MONOCHROME2,3712.0,3395.0
6,/app/_data/test/e43ccd956545/64ed4d9be391/87d8...,e43ccd956545,87d8baf120a6,CR,M,CHEST,MONOCHROME2,4240.0,3480.0
7,/app/_data/test/a8fe3043e449/73453ff7181d/64ea...,a8fe3043e449,64ea82b1343f,DX,M,CHEST,MONOCHROME2,4256.0,3488.0
8,/app/_data/test/1d8b4a15135f/cfe99f0be9cb/acc3...,1d8b4a15135f,acc3e6eff7c8,DX,M,CHEST,MONOCHROME2,2979.0,2837.0
9,/app/_data/test/994aa93b88d6/2138dd9d65e4/36ba...,994aa93b88d6,36ba388a18df,CR,M,CHEST,MONOCHROME2,4248.0,3480.0


In [10]:
gen = Generator(
    df=test_df,
    img_size=600,
    jpg_path="/app/_data/tmp/",
    metadata_path="/app/_data/dict_metadata.json",
    batch_size=1,
)

In [11]:
a = gen.__getitem__(7)

InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run ExpandDims: Dst tensor is not initialized. [Op:ExpandDims]

In [48]:
a

{'img': array([[[[  3,   3,   3],
          [  3,   3,   3],
          [  3,   3,   3],
          ...,
          [  3,   3,   3],
          [  3,   3,   3],
          [  3,   3,   3]],
 
         [[  3,   3,   3],
          [  3,   3,   3],
          [  3,   3,   3],
          ...,
          [  3,   3,   3],
          [  3,   3,   3],
          [  3,   3,   3]],
 
         [[  3,   3,   3],
          [  3,   3,   3],
          [  3,   3,   3],
          ...,
          [  3,   3,   3],
          [  3,   3,   3],
          [  3,   3,   3]],
 
         ...,
 
         [[137, 137, 137],
          [145, 145, 145],
          [151, 151, 151],
          ...,
          [ 27,  27,  27],
          [ 20,  20,  20],
          [ 13,  13,  13]],
 
         [[139, 139, 139],
          [146, 146, 146],
          [151, 151, 151],
          ...,
          [ 29,  29,  29],
          [ 22,  22,  22],
          [ 15,  15,  15]],
 
         [[ 98,  98,  98],
          [105, 105, 105],
          [106, 106, 10