Adapted from: 
* https://www.kaggle.com/phunghieu/gwd-resize-images-bboxes
* https://www.kaggle.com/muhammadimran112233/siim-a-data-science-approach-starter

In [None]:
! conda install -c conda-forge gdcm -y

In [None]:
import numpy as np
import pandas as pd 
import os
from PIL import Image
from tqdm.auto import tqdm
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
import seaborn as sns
from pandas_profiling import ProfileReport
from glob import glob
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from skimage import exposure
import cv2
import warnings
import albumentations as A

In [None]:
# config params
class CFG:
    data_path = '../input/siim-covid19-detection/'
    size = 512
    seed = 2021
    working_dir = '/kaggle/working/'

# Functions

In [None]:
def dicom2array(path, voi_lut=True, fix_monochrome=True):
    dicom = pydicom.read_file(path)
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    return data
        
    
def plot_img(img, size=(7, 7), is_rgb=True, title="", cmap='gray'):
    plt.figure(figsize=size)
    plt.imshow(img, cmap=cmap)
    plt.suptitle(title)
    plt.show()


# Data

In [None]:
# storage file for the transformed images
!mkdir train

In [None]:
# prepare a dataframe for producing resized images with bboxes
xtrain_img = pd.read_csv(CFG.data_path + 'train_image_level.csv')


path_list = []
image_list = []
splits = []

for split in ['train']:
    
    for dirname, _, filenames in tqdm(os.walk(f'../input/siim-covid19-detection/{split}')):
        for file in filenames:
            fullpath = dirname + '/' + file
            path_list.append(fullpath)
            image_list.append(file)

df = pd.DataFrame(image_list, columns =['image_id'])
df['image_path'] = path_list

xtrain_img['image_id'] = xtrain_img['id'].apply(lambda s: s.replace('_image','') + '.dcm')
xtrain_img = pd.merge(left = xtrain_img, right = df, on = 'image_id')


# Generation

In [None]:
# common transformation we shall use
transform = A.Compose(
    [
        A.Resize(height = CFG.size , width = CFG.size, p=1),
    ], 
    p=1.0,  bbox_params=A.BboxParams( format='pascal_voc', min_area=0,  min_visibility=0, label_fields=['labels']  ))        

img_list = []
label_list = []

# loop over files
for ii in range(len(xtrain_img)):
    # get the image
    row = xtrain_img.loc[ii]
    img_path = row['image_path']
    img = dicom2array(path=img_path)
    newname = img_path.split('/')[-1].replace('dcm', 'jpg')
    img_list.append(newname)
    
    # get the bounding boxes
    bboxes = []
    bbox = []
    labels = []
    confidences = []

    for i, l in enumerate(row['label'].split(' ')):
        if (i % 6 == 0) :
            labels.append(l)
        if (i % 6 == 1):
            confidences.append(l)
        if (i % 6 > 1):
            bbox.append(np.clip(float(l), a_min = 0, a_max = None ))
        if i % 6 == 5:
            bboxes.append(bbox)
            bbox = []    

    # transform both
    result = transform(image = img, bboxes = bboxes, labels = np.ones(len(bboxes)))
    new_image = result['image']
    new_bboxes = np.array(result['bboxes']).tolist()

    # format the output
#    print('orig label: ' + row['label'])
    newlabel = ''
    if labels[0] == 'none':
        newlabel = 'none 1 0 0 1 1'
    else:
        for j in range(len(labels)):
            newlabel += labels[j] + ' ' + confidences[j] + ' ' +  ' '.join([str(np.round(f,5)) for f in new_bboxes[0]]) + ' '
#    print('new label:' + newlabel)
    label_list.append(newlabel)
    
    # store the new image
    cv2.imwrite(CFG.working_dir + 'train/' + newname , new_image)

In [None]:
# store the new boxes with image_ids
xmeta = pd.DataFrame(img_list, columns =['image_id'])
xmeta['label'] = label_list
xmeta.to_csv('bounding_boxes.csv', index = False)

In [None]:
# wrap it up
!zip -rm -qq rescaled_with_bb.zip train bounding_boxes.csv