In [None]:
!conda install gdcm -c conda-forge -y

In [None]:
import os

from PIL import Image
import pandas as pd
from tqdm.auto import tqdm
import cv2 as cv
from glob import glob
import matplotlib.pyplot as plt

In [None]:
import numpy as np
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut

def read_xray(path, voi_lut = True, fix_monochrome = True):
    # Original from: https://www.kaggle.com/raddar/convert-dicom-to-np-array-the-correct-way
    dicom = pydicom.read_file(path)
    
    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to 
    # "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
               
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
        
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
        
    return data

In [None]:
def resize(array, size, keep_ratio=False, resample=Image.LANCZOS):
    # Original from: https://www.kaggle.com/xhlulu/vinbigdata-process-and-resize-to-image

    im = array 
    hist_im = cv.equalizeHist(im)
    im = Image.fromarray(hist_im)
    
    if keep_ratio:
        im.thumbnail((size, size), resample)
    else:
        im = im.resize((size, size), resample)
    return im

In [None]:
train = pd.read_csv('../input/siim-covid19-detection/train_image_level.csv')

In [None]:
path = '../input/siim-covid19-detection/train/ae3e63d94c13/288554eb6182/e00f9fe0cce5.dcm'
dicom = pydicom.read_file(path)

In [None]:
import csv
file = open('../input/siimdup/duplicates.csv' , "r")
csv_reader = csv.reader(file)

lists_from_csv = []
for row in csv_reader:
    lists_from_csv.append(row)

print(lists_from_csv)

In [None]:
keep = ['74077a8e3b7c_image', '9e4824fcee2e_image', '19fc87ff0612_image', '961e67cadcdc_image', '70fbcd6dcc53_image', '6494a03199e5_image', '089bd77c8c10_image', 'b61f3493c551_image', '0842f032a217_image', '830063223a31_image', 'efc93a3917b6_image', '26f643772090_image', 'c8156ae4d6e8_image', '93979c3e3177_image', '9e844dea386a_image', 'b4b931e5ad31_image', 'bb3076795a01_image', 'b6b74c8a97e4_image', '12e97ed89297_image', 'd180fed57716_image', 'c05a1da5efe0_image', '06f6423be3f9_image', '2da1eb17b0b7_image', '9fa8318fb7f6_image', '66712e2fc6a4_image', 'ea516e218fe6_image', '7966f780f27f_image', '93301812b0e7_image', '1a0a148c030f_image', 'd93b8a8335cf_image', 'caa7fd25ee9d_image', 'd342f75ccb55_image', '1dc7459cb081_image', '63c6e1324ac3_image', '04f41a8958f7_image', '41e9a794b342_image', '0d4d6acc9ed3_image', 'fa447a409bd6_image', 'f7edf5c476c4_image', '893fde8abd42_image', 'ea2688741043_image', 'b43de320e7d9_image', 'ac212043ee3c_image', '6f5e70a99d77_image', 'c7925ab50eb0_image', '42f55c3da74f_image', 'abb96ea8a826_image', '7be323e5f816_image', '0c6b440ba98e_image', '19701de5ae89_image', '40e1d57e03be_image', '077c85f226d8_image', 'c64ddad4795a_image', 'f7c7683d6ed9_image', '32ebc75d961e_image', 'df565bcf3504_image', 'a0178e3a0d8f_image', '3e7b2ffc97db_image', '4bb94cd7f2f4_image', '61f3ac249c50_image', '847f48f57169_image', '172a7ed6d7e2_image', '84135cf828e7_image', '7e8966bfec61_image', '7b3e9f13d4e7_image', 'f208dc529d16_image', 'd74ef8961bff_image', 'a2ee4b862182_image', 'd787c9bd4fa8_image', '68ad4b624a6d_image', '2f6019c75d6d_image', 'b0866caa201a_image', '52f97cad2b63_image', 'cbf0a27f993e_image', '3566e20a178e_image', '9108cdfd43dc_image', '33c026e51b02_image', '1c1069c57757_image', 'a414f67f5735_image', '173c23887f9b_image' ]

In [None]:
remove = []
for i in lists_from_csv:
    for j in i:
        if j not in keep:
            remove.append(j)
#remove

In [None]:
print(len(keep), len(remove))

In [None]:
IMG_SIZE = 640
image_id = []
dim0 = []
dim1 = []
splits = []

tuples = []
for split in ['train', 'test']:
    save_dir = f'/kaggle/working/image/{split}/'

    os.makedirs(save_dir, exist_ok=True)

    for dirname, _, filenames in tqdm(os.walk(f'../input/siim-covid19-detection/{split}')):
        for file in filenames:
            tuples.append((file, dirname, save_dir, split))
import multiprocessing

def resize_and_save(t):
    file, dirname, save_dir, split = t
    # set keep_ratio=True to have original aspect ratio
    xray = read_xray(os.path.join(dirname, file))
    im = resize(xray, size=IMG_SIZE) 
#     print(os.path.join(dirname, file), os.path.join(save_dir, file.replace('dcm', 'png')))
    if (file[:-4] + '_image') in remove:
        pass
    else:
        im.save(os.path.join(save_dir, file[:-4] + '_image' + '.png'))

    image_id.append(file.replace('.dcm', '') + '_image')
    dim0.append(xray.shape[0])
    dim1.append(xray.shape[1])
    splits.append(split)


pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
# for t in tqdm(tuples):
for _ in tqdm(pool.imap_unordered(resize_and_save, tuples), total=len(tuples)):
    pass


In [None]:
#tuples

In [None]:
image_id = []
dim0 = []
dim1 = []
splits = []
for file, dirname, save_dir, split in tqdm(tuples):
    image_id.append(file.replace('.dcm', '') + '_image')
    xray = read_xray(os.path.join(dirname, file))
    dim0.append(xray.shape[0])
    dim1.append(xray.shape[1])
    splits.append(split)

In [None]:
%%time
!tar -zcf train.tar.gz -C "/kaggle/working/image/train/" .
!tar -zcf test.tar.gz -C "/kaggle/working/image/test/" .

In [None]:
df = pd.DataFrame.from_dict({'image_id': image_id, 'dim0': dim0, 'dim1': dim1, 'split': splits})
df.to_csv('meta.csv', index=False)

In [None]:
from IPython.display import FileLink
FileLink(r'./train.tar.gz')

In [None]:
from IPython.display import FileLink
FileLink(r'./test.tar.gz')

In [None]:
for i in tqdm(glob('../input/siimcovid19-512-img-png-600-study-png/image/*')):
    if i[-22:-4] == '0a990c89256a_image':
        im = cv.imread(i, 0)

In [None]:
im 
hist_im = cv.equalizeHist(im)
kernel = cv.getStructuringElement(cv.MORPH_RECT, (15, 15)) # MORPH_ELLIPSE

tophat_img = cv.morphologyEx(hist_im, cv.MORPH_TOPHAT, kernel)
bothat_img = cv.morphologyEx(hist_im, cv.MORPH_BLACKHAT, kernel) # Black --> Bottom

im = hist_im + tophat_img - bothat_img
im = Image.fromarray(im)

In [None]:
hist_test = cv.equalizeHist(im)

In [None]:
plt.imshow(im, cmap='gray')

In [None]:
plt.imshow(hist_test, cmap='gray')

In [None]:
kernel = cv.getStructuringElement(cv.MORPH_RECT, (15, 15)) # MORPH_ELLIPSE

tophat_img = cv.morphologyEx(im, cv.MORPH_TOPHAT, kernel)
bothat_img = cv.morphologyEx(im, cv.MORPH_BLACKHAT, kernel) # Black --> Bottom

img = im + tophat_img - bothat_img

In [None]:
plt.imshow(img, cmap='gray')