In [None]:
!pip install -q timm==0.6.5 --no-index --find-links=kaggle/input/rsna-bc-pip-requirements
!pip install -q albumentations==1.2.1 --no-index --find-links=kaggle/input/rsna-bc-pip-requirements
!pip install -q pylibjpeg-libjpeg==1.3.1 --no-index --find-links=/aggle/input/rsna-bc-pip-requirements
!pip install -q pydicom==2.0.0 --no-index --find-links=kaggle/input/rsna-bc-pip-requirements
!pip install -q python-gdcm==3.0.20 --no-index --find-links=kaggle/input/rsna-bc-pip-requirements
!pip install -q dicomsdl==0.109.1 --no-index --find-links=kaggle/input/rsna-bc-pip-requirements

!pip install -q kaggle/input/nvidia-dali-nightly-cuda110-1230dev/nvidia_dali_nightly_cuda110-1.23.0.dev20230203-7187866-py3-none-manylinux2014_x86_64.whl

In [2]:
from types import SimpleNamespace
import pickle
import os

import pandas as pd

cfg = SimpleNamespace(**{})
cfg.IMG_SIZE = (850, 1300)

cfg.DATA_DIR = "/home/pawel/Projects/rsna-breast-cancer-detection/data/"

cfg.SAVE_DIR_TRAIN = cfg.DATA_DIR + "DALI_VOI_train" #"/tmp/out/"
cfg.SAVE_DIR_TRAIN_ROI = cfg.DATA_DIR + "DALI_VOI_train_roi" #"/tmp/out/"
cfg.SAVE_DIR_TRAIN_ROI_RESIZED = cfg.DATA_DIR + f"DALI_VOI_train_roi_{cfg.IMG_SIZE[0]}x{cfg.IMG_SIZE[1]}"
cfg.SAVE_DIR_TRAIN_ROI_RESIZED_LB = cfg.DATA_DIR + f"DALI_VOI_train_roi_{cfg.IMG_SIZE[0]}x{cfg.IMG_SIZE[1]}_LB"

cfg.IMAGES_DIR = cfg.DATA_DIR + "/train_images"
cfg.DF_PATH = cfg.DATA_DIR + "/train.csv"
cfg.JPG_DIR = "/home/pawel/Projects/rsna-breast-cancer-detection/train_tmp/jpg/"

cfg.ROI_MODEL_PATH = '/home/pawel/Projects/rsna-breast-cancer-detection/ROI_detection/roi_detector.pt'
cfg.YOLO_DIR = '/home/pawel/Projects/rsna-breast-cancer-detection/ROI_detection/yolov5'

os.makedirs(cfg.SAVE_DIR_TRAIN, exist_ok=True)
os.makedirs(cfg.SAVE_DIR_TRAIN_ROI, exist_ok=True)
os.makedirs(cfg.SAVE_DIR_TRAIN_ROI_RESIZED, exist_ok=True)
os.makedirs(cfg.SAVE_DIR_TRAIN_ROI_RESIZED_LB, exist_ok=True)

df = pd.read_csv(cfg.DF_PATH)


In [3]:
import timm
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from copy import copy
import gc
import shutil 

import glob
from scipy.special import expit

import cv2
cv2.setNumThreads(0)

import dicomsdl
import pydicom
import random

from pydicom.filebase import DicomBytesIO

from os.path import join

from tqdm import tqdm

from joblib import Parallel, delayed
import multiprocessing as mp

from types import SimpleNamespace
from typing import Any, Dict

import torch
import torch.nn.functional as F
from torch import nn
from torch.nn.parameter import Parameter
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import GradScaler, autocast


import nvidia.dali.fn as fn
import nvidia.dali.types as types
from nvidia.dali import pipeline_def
from nvidia.dali.types import DALIDataType

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

In [4]:
def seed_everything(seed=3407):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_everything()

In [5]:
#we need to patch DALI for Int16 support


from nvidia.dali.backend import TensorGPU, TensorListGPU
from nvidia.dali.pipeline import Pipeline
import nvidia.dali.ops as ops
from nvidia.dali import types
from nvidia.dali.plugin.base_iterator import _DaliBaseIterator
from nvidia.dali.plugin.base_iterator import LastBatchPolicy
import torch
import torch.utils.dlpack as torch_dlpack
import ctypes
import numpy as np
import torch.nn.functional as F
import pydicom

to_torch_type = {
    types.DALIDataType.FLOAT:   torch.float32,
    types.DALIDataType.FLOAT64: torch.float64,
    types.DALIDataType.FLOAT16: torch.float16,
    types.DALIDataType.UINT8:   torch.uint8,
    types.DALIDataType.INT8:    torch.int8,
    types.DALIDataType.UINT16:  torch.int16,
    types.DALIDataType.INT16:   torch.int16,
    types.DALIDataType.INT32:   torch.int32,
    types.DALIDataType.INT64:   torch.int64
}


def feed_ndarray(dali_tensor, arr, cuda_stream=None):
    """
    Copy contents of DALI tensor to PyTorch's Tensor.

    Parameters
    ----------
    `dali_tensor` : nvidia.dali.backend.TensorCPU or nvidia.dali.backend.TensorGPU
                    Tensor from which to copy
    `arr` : torch.Tensor
            Destination of the copy
    `cuda_stream` : torch.cuda.Stream, cudaStream_t or any value that can be cast to cudaStream_t.
                    CUDA stream to be used for the copy
                    (if not provided, an internal user stream will be selected)
                    In most cases, using pytorch's current stream is expected (for example,
                    if we are copying to a tensor allocated with torch.zeros(...))
    """
    dali_type = to_torch_type[dali_tensor.dtype]

    assert dali_type == arr.dtype, ("The element type of DALI Tensor/TensorList"
                                    " doesn't match the element type of the target PyTorch Tensor: "
                                    "{} vs {}".format(dali_type, arr.dtype))
    assert dali_tensor.shape() == list(arr.size()), \
        ("Shapes do not match: DALI tensor has size {0}, but PyTorch Tensor has size {1}".
            format(dali_tensor.shape(), list(arr.size())))
    cuda_stream = types._raw_cuda_stream(cuda_stream)

    # turn raw int to a c void pointer
    c_type_pointer = ctypes.c_void_p(arr.data_ptr())
    if isinstance(dali_tensor, (TensorGPU, TensorListGPU)):
        stream = None if cuda_stream is None else ctypes.c_void_p(cuda_stream)
        dali_tensor.copy_to_external(c_type_pointer, stream, non_blocking=True)
    else:
        dali_tensor.copy_to_external(c_type_pointer)
    return arr

In [6]:
print(f'Len df : {len(df)}')
df['patient_id'].nunique()

Len df : 54706


11913

In [7]:
df["fns"] = df['patient_id'].astype(str) + '/' + df['image_id'].astype(str) + '.dcm'

In [8]:
from typing import (
    Dict, Optional, Union, List, Tuple, TYPE_CHECKING, cast, Iterable,
    ByteString
)

def apply_voi_lut(
    arr: "np.ndarray",
    ds: "Dataset",
    index: int = 0,
    prefer_lut: bool = True
) -> "np.ndarray":
    """Apply a VOI lookup table or windowing operation to `arr`.
    .. versionadded:: 1.4
    .. versionchanged:: 2.1
        Added the `prefer_lut` keyword parameter
    Parameters
    ----------
    arr : numpy.ndarray
        The :class:`~numpy.ndarray` to apply the VOI LUT or windowing operation
        to.
    ds : dataset.Dataset
        A dataset containing a :dcm:`VOI LUT Module<part03/sect_C.11.2.html>`.
        If (0028,3010) *VOI LUT Sequence* is present then returns an array
        of ``np.uint8`` or ``np.uint16``, depending on the 3rd value of
        (0028,3002) *LUT Descriptor*. If (0028,1050) *Window Center* and
        (0028,1051) *Window Width* are present then returns an array of
        ``np.float64``. If neither are present then `arr` will be returned
        unchanged.
    index : int, optional
        When the VOI LUT Module contains multiple alternative views, this is
        the index of the view to return (default ``0``).
    prefer_lut : bool
        When the VOI LUT Module contains both *Window Width*/*Window Center*
        and *VOI LUT Sequence*, if ``True`` (default) then apply the VOI LUT,
        otherwise apply the windowing operation.
    Returns
    -------
    numpy.ndarray
        An array with applied VOI LUT or windowing operation.
    Notes
    -----
    When the dataset requires a modality LUT or rescale operation as part of
    the Modality LUT module then that must be applied before any windowing
    operation.
    See Also
    --------
    :func:`~pydicom.pixel_data_handlers.util.apply_modality_lut`
    :func:`~pydicom.pixel_data_handlers.util.apply_voi`
    :func:`~pydicom.pixel_data_handlers.util.apply_windowing`
    References
    ----------
    * DICOM Standard, Part 3, :dcm:`Annex C.11.2
      <part03/sect_C.11.html#sect_C.11.2>`
    * DICOM Standard, Part 3, :dcm:`Annex C.8.11.3.1.5
      <part03/sect_C.8.11.3.html#sect_C.8.11.3.1.5>`
    * DICOM Standard, Part 4, :dcm:`Annex N.2.1.1
      <part04/sect_N.2.html#sect_N.2.1.1>`
    """
    valid_voi = False
    if ds.get('VOILUTSequence'):
        ds.VOILUTSequence = cast(List["Dataset"], ds.VOILUTSequence)
        valid_voi = None not in [
            ds.VOILUTSequence[0].get('LUTDescriptor', None),
            ds.VOILUTSequence[0].get('LUTData', None)
        ]
    valid_windowing = None not in [
        ds.get('WindowCenter', None),
        ds.get('WindowWidth', None)
    ]

    if valid_voi and valid_windowing:
        if prefer_lut:
            return apply_voi(arr, ds, index)

        return apply_windowing(arr, ds, index)

    if valid_voi:
        return apply_voi(arr, ds, index)

    if valid_windowing:
        return apply_windowing(arr, ds, index)

    return arr


def apply_voi(
    arr: "np.ndarray", ds: "Dataset", index: int = 0
) -> "np.ndarray":
    """Apply a VOI lookup table to `arr`.
    .. versionadded:: 2.1
    Parameters
    ----------
    arr : numpy.ndarray
        The :class:`~numpy.ndarray` to apply the VOI LUT to.
    ds : dataset.Dataset
        A dataset containing a :dcm:`VOI LUT Module<part03/sect_C.11.2.html>`.
        If (0028,3010) *VOI LUT Sequence* is present then returns an array
        of ``np.uint8`` or ``np.uint16``, depending on the 3rd value of
        (0028,3002) *LUT Descriptor*, otherwise `arr` will be returned
        unchanged.
    index : int, optional
        When the VOI LUT Module contains multiple alternative views, this is
        the index of the view to return (default ``0``).
    Returns
    -------
    numpy.ndarray
        An array with applied VOI LUT.
    See Also
    --------
    :func:`~pydicom.pixel_data_handlers.util.apply_modality_lut`
    :func:`~pydicom.pixel_data_handlers.util.apply_windowing`
    References
    ----------
    * DICOM Standard, Part 3, :dcm:`Annex C.11.2
      <part03/sect_C.11.html#sect_C.11.2>`
    * DICOM Standard, Part 3, :dcm:`Annex C.8.11.3.1.5
      <part03/sect_C.8.11.3.html#sect_C.8.11.3.1.5>`
    * DICOM Standard, Part 4, :dcm:`Annex N.2.1.1
      <part04/sect_N.2.html#sect_N.2.1.1>`
    """
    if not ds.get('VOILUTSequence'):
        return arr

    if not np.issubdtype(arr.dtype, np.integer):
        warnings.warn(
            "Applying a VOI LUT on a float input array may give "
            "incorrect results"
        )

    # VOI LUT Sequence contains one or more items
    item = cast(List["Dataset"], ds.VOILUTSequence)[index]
    lut_descriptor = cast(List[int], item.LUTDescriptor)
    nr_entries = lut_descriptor[0] or 2**16
    first_map = lut_descriptor[1]

    # PS3.3 C.8.11.3.1.5: may be 8, 10-16
    nominal_depth = lut_descriptor[2]
    if nominal_depth in list(range(10, 17)):
        dtype = 'uint16'
    elif nominal_depth == 8:
        dtype = 'uint8'
    else:
        raise NotImplementedError(
            f"'{nominal_depth}' bits per LUT entry is not supported"
        )

    # Ambiguous VR, US or OW
    unc_data: Iterable[int]
    if item['LUTData'].VR == VR.OW:
        endianness = '<' if ds.is_little_endian else '>'
        unpack_fmt = f'{endianness}{nr_entries}H'
        unc_data = unpack_from(unpack_fmt, cast(bytes, item.LUTData))
    else:
        unc_data = cast(List[int], item.LUTData)

    lut_data: "np.ndarray" = torch.asarray(unc_data, dtype=dtype)# np.asarray(unc_data, dtype=dtype)

    # IVs < `first_map` get set to first LUT entry (i.e. index 0)
    clipped_iv = torch.zeros(arr.shape, dtype=dtype)
    # IVs >= `first_map` are mapped by the VOI LUT
    # `first_map` may be negative, positive or 0
    mapped_pixels = arr >= first_map
    clipped_iv[mapped_pixels] = arr[mapped_pixels] - first_map
    # IVs > number of entries get set to last entry
    torch.clip(clipped_iv, 0, nr_entries - 1, out=clipped_iv)

    return lut_data[clipped_iv] #cast("np.ndarray", lut_data[clipped_iv])


def apply_windowing(
    arr: "np.ndarray", ds: "Dataset", index: int = 0
) -> "np.ndarray":
    """Apply a windowing operation to `arr`.
    .. versionadded:: 2.1
    Parameters
    ----------
    arr : numpy.ndarray
        The :class:`~numpy.ndarray` to apply the windowing operation to.
    ds : dataset.Dataset
        A dataset containing a :dcm:`VOI LUT Module<part03/sect_C.11.2.html>`.
        If (0028,1050) *Window Center* and (0028,1051) *Window Width* are
        present then returns an array of ``np.float64``, otherwise `arr` will
        be returned unchanged.
    index : int, optional
        When the VOI LUT Module contains multiple alternative views, this is
        the index of the view to return (default ``0``).
    Returns
    -------
    numpy.ndarray
        An array with applied windowing operation.
    Notes
    -----
    When the dataset requires a modality LUT or rescale operation as part of
    the Modality LUT module then that must be applied before any windowing
    operation.
    See Also
    --------
    :func:`~pydicom.pixel_data_handlers.util.apply_modality_lut`
    :func:`~pydicom.pixel_data_handlers.util.apply_voi`
    References
    ----------
    * DICOM Standard, Part 3, :dcm:`Annex C.11.2
      <part03/sect_C.11.html#sect_C.11.2>`
    * DICOM Standard, Part 3, :dcm:`Annex C.8.11.3.1.5
      <part03/sect_C.8.11.3.html#sect_C.8.11.3.1.5>`
    * DICOM Standard, Part 4, :dcm:`Annex N.2.1.1
      <part04/sect_N.2.html#sect_N.2.1.1>`
    """
    if "WindowWidth" not in ds and "WindowCenter" not in ds:
        return arr

    if ds.PhotometricInterpretation not in ['MONOCHROME1', 'MONOCHROME2']:
        raise ValueError(
            "When performing a windowing operation only 'MONOCHROME1' and "
            "'MONOCHROME2' are allowed for (0028,0004) Photometric "
            "Interpretation"
        )

    # May be LINEAR (default), LINEAR_EXACT, SIGMOID or not present, VM 1
    voi_func = cast(str, getattr(ds, 'VOILUTFunction', 'LINEAR')).upper()
    # VR DS, VM 1-n
    elem = ds['WindowCenter']
    center = (
        cast(List[float], elem.value)[index] if elem.VM > 1 else elem.value
    )
    center = cast(float, center)
    elem = ds['WindowWidth']
    width = cast(List[float], elem.value)[index] if elem.VM > 1 else elem.value
    width = cast(float, width)

    # The output range depends on whether or not a modality LUT or rescale
    #   operation has been applied
    ds.BitsStored = cast(int, ds.BitsStored)
    y_min: float
    y_max: float
    if ds.get('ModalityLUTSequence'):
        # Unsigned - see PS3.3 C.11.1.1.1
        y_min = 0
        item = cast(List["Dataset"], ds.ModalityLUTSequence)[0]
        bit_depth = cast(List[int], item.LUTDescriptor)[2]
        y_max = 2**bit_depth - 1
    elif ds.PixelRepresentation == 0:
        # Unsigned
        y_min = 0
        y_max = 2**ds.BitsStored - 1
    else:
        # Signed
        y_min = -2**(ds.BitsStored - 1)
        y_max = 2**(ds.BitsStored - 1) - 1

    slope = ds.get('RescaleSlope', None)
    intercept = ds.get('RescaleIntercept', None)
    if slope is not None and intercept is not None:
        ds.RescaleSlope = cast(float, ds.RescaleSlope)
        ds.RescaleIntercept = cast(float, ds.RescaleIntercept)
        # Otherwise its the actual data range
        y_min = y_min * ds.RescaleSlope + ds.RescaleIntercept
        y_max = y_max * ds.RescaleSlope + ds.RescaleIntercept

    y_range = y_max - y_min
    arr = arr.to(torch.float64)

    if voi_func in ['LINEAR', 'LINEAR_EXACT']:
        # PS3.3 C.11.2.1.2.1 and C.11.2.1.3.2
        if voi_func == 'LINEAR':
            if width < 1:
                raise ValueError(
                    "The (0028,1051) Window Width must be greater than or "
                    "equal to 1 for a 'LINEAR' windowing operation"
                )
            center -= 0.5
            width -= 1
        elif width <= 0:
            raise ValueError(
                "The (0028,1051) Window Width must be greater than 0 "
                "for a 'LINEAR_EXACT' windowing operation"
            )

        below = arr <= (center - width / 2)
        above = arr > (center + width / 2)
        between = torch.logical_and(~below, ~above)

        arr[below] = y_min
        arr[above] = y_max
        if between.any():
            arr[between] = (
                ((arr[between] - center) / width + 0.5) * y_range + y_min
            )
    elif voi_func == 'SIGMOID':
        # PS3.3 C.11.2.1.3.1
        if width <= 0:
            raise ValueError(
                "The (0028,1051) Window Width must be greater than 0 "
                "for a 'SIGMOID' windowing operation"
            )

        arr = y_range / (1 + torch.exp(-4 * (arr - center) / width)) + y_min
    else:
        raise ValueError(
            f"Unsupported (0028,1056) VOI LUT Function value '{voi_func}'"
        )

    return arr

def convert_dicom_to_jpg(file, save_folder=""):
    patient = file.split('/')[-2]
    image = file.split('/')[-1][:-4]
    dcmfile = pydicom.dcmread(file)

    if dcmfile.file_meta.TransferSyntaxUID == '1.2.840.10008.1.2.4.90':
        with open(file, 'rb') as fp:
            raw = DicomBytesIO(fp.read())
            ds = pydicom.dcmread(raw)
        offset = ds.PixelData.find(b"\x00\x00\x00\x0C")  #<---- the jpeg2000 header info we're looking for
        hackedbitstream = bytearray()
        hackedbitstream.extend(ds.PixelData[offset:])
        with open(save_folder + f"{patient}_{image}.jpg", "wb") as binary_file:
            binary_file.write(hackedbitstream)
            
    if dcmfile.file_meta.TransferSyntaxUID == '1.2.840.10008.1.2.4.70':
        with open(file, 'rb') as fp:
            raw = DicomBytesIO(fp.read())
            ds = pydicom.dcmread(raw)
        offset = ds.PixelData.find(b"\xff\xd8\xff\xe0")  #<---- the jpeg lossless header info we're looking for
        hackedbitstream = bytearray()
        hackedbitstream.extend(ds.PixelData[offset:])
        with open(save_folder + f"{patient}_{image}.jpg", "wb") as binary_file:
            binary_file.write(hackedbitstream)

            
@pipeline_def
def jpg_decode_pipeline(jpgfiles):
    jpegs, _ = fn.readers.file(files=jpgfiles)
    images = fn.experimental.decoders.image(jpegs, device='mixed', output_type=types.ANY_DATA, dtype=DALIDataType.UINT16)
    return images

def parse_window_element(elem):
    if type(elem)==list:
        return float(elem[0])
    if type(elem)==str:
        return float(elem)
    if type(elem)==float:
        return elem
    if type(elem)==pydicom.dataelem.DataElement:
        try:
            return float(elem[0])
        except:
            return float(elem.value)
    return None

def linear_window(data, center, width):
    lower, upper = center - width // 2, center + width // 2
    data = torch.clamp(data, min=lower, max=upper)
    return data 


def process_dicom(img, dicom):
    try:
        invert = getattr(dicom, "PhotometricInterpretation", None) == "MONOCHROME1"
    except:
        invert = False
            
    # center = parse_window_element(dicom["WindowCenter"]) 
    # width = parse_window_element(dicom["WindowWidth"])
        
    # if (center is not None) & (width is not None):
    #     if voi_func == "LINEAR":
    #         img = linear_window(img, center, width)
    #     elif voi_func == "SIGMOID":
            
    img = apply_voi_lut(img, dicom)
    img = (img - img.min()) / (img.max() - img.min())
    
    if invert:
        img = 1 - img
    return img

In [9]:
N_CHUNKS = len(df["fns"]) // 2000 if len(df["fns"]) > 2000 else 1
CHUNKS = [(len(df["fns"]) / N_CHUNKS * k, len(df["fns"]) / N_CHUNKS * (k + 1)) for k in range(N_CHUNKS)]
CHUNKS = np.array(CHUNKS).astype(int)

In [10]:
import matplotlib
import matplotlib.pyplot as plt


import sys
import timm
print('timm',timm.__version__)
#print(timm.__file__)

print('import ok!')

sys.path.append(cfg.YOLO_DIR)
from utils.general import non_max_suppression



#get yolov5 and preserve plt backend
def get_yolo():
    b = plt.get_backend()
    model = torch.hub.load(cfg.YOLO_DIR, 'custom', path = cfg.ROI_MODEL_PATH, source = 'local', force_reload = True)
    matplotlib.use(b)
    return model

model = get_yolo().to('cuda')

timm 0.6.12
import ok!


YOLOv5 🚀 2023-1-6 Python-3.8.13 torch-1.13.1+cu116 CUDA:0 (NVIDIA RTX A4000, 16117MiB)

Fusing layers... 
Model summary: 206 layers, 3087256 parameters, 0 gradients, 4.2 GFLOPs
Adding AutoShape... 


In [11]:
def _p(d):
    if len(d) > 0 and len(d[0]) >= 1:
        return d[0].numpy()
    return np.array([0, 0, 1024, 1024, 1, 0])

def letterbox(img, input_shape):
    img_h, img_w = img.shape[:2]            
    new_h, new_w = input_shape[1], input_shape[0] 
    
    offset_h, offset_w = 0, 0                
    if (new_w / img_w) <= (new_h / img_h):      
        new_h = int(img_h * new_w / img_w)  
    else:
        new_w = int(img_w * new_h / img_h)   
    
    resized = cv2.resize(img, (new_w, new_h), cv2.INTER_NEAREST)
    img = np.full((input_shape[1], input_shape[0]), 0, dtype=np.uint8)
    img[0:new_h, 0:new_w] = resized

    return img

def resize_image(img, resize=None, do_letterbox=False):
    
    if do_letterbox:
        return letterbox(img, resize)
    
    return cv2.resize(img, resize, interpolation=cv2.INTER_NEAREST)

In [12]:
for ttt, chunk in enumerate(CHUNKS):
    print(f'chunk {ttt} of {len(CHUNKS)} chunks')
    os.makedirs(cfg.JPG_DIR, exist_ok=True)

    _ = Parallel(n_jobs=2)(
        delayed(convert_dicom_to_jpg)(f'{cfg.IMAGES_DIR}/{img}', save_folder=cfg.JPG_DIR)
        for img in df["fns"].tolist()[chunk[0]: chunk[1]]
    )
    
    jpgfiles = glob.glob(cfg.JPG_DIR + "*.jpg")
    

    pipe = jpg_decode_pipeline(jpgfiles, batch_size=1, num_threads=12, device_id=0)
    pipe.build()
    
    for i, f in enumerate(tqdm(jpgfiles)):
        patient, dicom_id = f.split('/')[-1][:-4].split('_')
        dicom = pydicom.dcmread(cfg.IMAGES_DIR + f"/{patient}/{dicom_id}.dcm")
        try:
            out = pipe.run()
            # Dali -> Torch
            img = out[0][0]
            img_torch = torch.empty(img.shape(), dtype=torch.int16, device="cuda")
            feed_ndarray(img, img_torch, cuda_stream=torch.cuda.current_stream(device=0))
            img = img_torch.float()
            
            #apply dicom preprocessing
            img = process_dicom(img, dicom)
            original_h, original_w = img.shape[:2]
            
            if img[:,int(-original_w * 0.10):].sum() > img[:,:int(original_w * 0.10)].sum():
                img = torch.flip(img, dims=[1])
            
            image_roi = F.interpolate(img.view(1, 1, img.size(0), img.size(1)), (1024, 1024), mode="nearest")[0, 0]
            image_roi_tensor = torch.stack([image_roi, image_roi, image_roi], axis=0).unsqueeze(0)

            img = (img * 255).clip(0,255).to(torch.uint8).cpu().numpy()
            
            out_file_train = os.path.join(cfg.SAVE_DIR_TRAIN, f"{patient}_{dicom_id}.png")
            cv2.imwrite(out_file_train, img)
            
            with torch.no_grad():
                y = model(image_roi_tensor).cpu()

            dets = non_max_suppression(y, conf_thres = 0.1, classes = [0, 1])

            # keep best 
            dets = np.array([
                _p(d) for d in dets
            ])

            h, w = 1024, 1024

            cls_ = dets[:, 4].astype(np.bool_)
            xyxy = dets[:, [0, 1, 2, 3]] / 1024
            xyxy = np.clip(xyxy, 0, 1)

            xmin, ymin, xmax, ymax = xyxy[0]
                
            xmin = (xmin * original_w).astype(np.uint16)
            xmax = (xmax * original_w).astype(np.uint16)
            ymin = (ymin * original_h).astype(np.uint16)
            ymax = (ymax * original_h).astype(np.uint16)

            img = img[ymin:ymax, xmin:xmax]
            
            img_resized = resize_image(img, resize=cfg.IMG_SIZE, do_letterbox=False)
            img_lb = resize_image(img, resize=cfg.IMG_SIZE, do_letterbox=True)
            
            out_file_train_roi = os.path.join(cfg.SAVE_DIR_TRAIN_ROI, f"{patient}_{dicom_id}.png")
            out_file_name = os.path.join(cfg.SAVE_DIR_TRAIN_ROI_RESIZED, f"{patient}_{dicom_id}.png")
            out_file_name_lb = os.path.join(cfg.SAVE_DIR_TRAIN_ROI_RESIZED_LB, f"{patient}_{dicom_id}.png")
            
            cv2.imwrite(out_file_train_roi, img)
            cv2.imwrite(out_file_name, img_resized)
            cv2.imwrite(out_file_name_lb, img_lb)
            
        except Exception as e:
            print(i, e)
            pipe = jpg_decode_pipeline(jpgfiles[i+1:], batch_size=1, num_threads=2, device_id=0)
            pipe.build()
            continue

    shutil.rmtree(cfg.JPG_DIR)
print(f'DALI Raw image load complete')

chunk 0 of 27 chunks


100%|██████████| 2026/2026 [06:38<00:00,  5.08it/s]


chunk 1 of 27 chunks


100%|██████████| 2026/2026 [06:51<00:00,  4.92it/s]


chunk 2 of 27 chunks


100%|██████████| 2026/2026 [06:29<00:00,  5.20it/s]


chunk 3 of 27 chunks


100%|██████████| 2026/2026 [06:28<00:00,  5.22it/s]


chunk 4 of 27 chunks


100%|██████████| 2026/2026 [06:37<00:00,  5.10it/s]


chunk 5 of 27 chunks


100%|██████████| 2026/2026 [06:41<00:00,  5.04it/s]


chunk 6 of 27 chunks


100%|██████████| 2027/2027 [06:45<00:00,  5.00it/s]


chunk 7 of 27 chunks


100%|██████████| 2026/2026 [06:31<00:00,  5.17it/s]


chunk 8 of 27 chunks


100%|██████████| 2026/2026 [06:45<00:00,  4.99it/s]


chunk 9 of 27 chunks


100%|██████████| 2026/2026 [06:23<00:00,  5.28it/s]


chunk 10 of 27 chunks


100%|██████████| 2026/2026 [06:35<00:00,  5.12it/s]


chunk 11 of 27 chunks


100%|██████████| 2026/2026 [06:27<00:00,  5.23it/s]


chunk 12 of 27 chunks


100%|██████████| 2026/2026 [06:24<00:00,  5.27it/s]


chunk 13 of 27 chunks


100%|██████████| 2027/2027 [06:40<00:00,  5.07it/s]


chunk 14 of 27 chunks


100%|██████████| 2026/2026 [06:27<00:00,  5.23it/s]


chunk 15 of 27 chunks


100%|██████████| 2026/2026 [06:42<00:00,  5.04it/s]


chunk 16 of 27 chunks


100%|██████████| 2026/2026 [06:26<00:00,  5.24it/s]


chunk 17 of 27 chunks


100%|██████████| 2026/2026 [06:18<00:00,  5.36it/s]


chunk 18 of 27 chunks


100%|██████████| 2026/2026 [06:24<00:00,  5.27it/s]


chunk 19 of 27 chunks


100%|██████████| 2026/2026 [06:41<00:00,  5.05it/s]


chunk 20 of 27 chunks


100%|██████████| 2027/2027 [06:33<00:00,  5.15it/s]


chunk 21 of 27 chunks


100%|██████████| 2026/2026 [06:28<00:00,  5.22it/s]


chunk 22 of 27 chunks


100%|██████████| 2026/2026 [06:43<00:00,  5.02it/s]


chunk 23 of 27 chunks


100%|██████████| 2026/2026 [06:40<00:00,  5.05it/s]


chunk 24 of 27 chunks


100%|██████████| 2026/2026 [06:19<00:00,  5.33it/s]


chunk 25 of 27 chunks


100%|██████████| 2026/2026 [06:26<00:00,  5.24it/s]


chunk 26 of 27 chunks


100%|██████████| 2027/2027 [06:13<00:00,  5.43it/s]


DALI Raw image load complete
