In [None]:
#!conda install '/kaggle/input/libs/libjpeg-turbo-2.1.0-h7f98852_0.tar.bz2' -c conda-forge -y
#!conda install '/kaggle/input/pydicom-conda-helper/libgcc-ng-9.3.0-h2828fa1_19.tar.bz2' -c conda-forge -y
#!conda install '/kaggle/input/pydicom-conda-helper/gdcm-2.8.9-py37h500ead1_1.tar.bz2' -c conda-forge -y
#!conda install '/kaggle/input/pydicom-conda-helper/conda-4.10.1-py37h89c1867_0.tar.bz2' -c conda-forge -y
#!conda install '/kaggle/input/pydicom-conda-helper/certifi-2020.12.5-py37h89c1867_1.tar.bz2' -c conda-forge -y
#!conda install '/kaggle/input/pydicom-conda-helper/openssl-1.1.1k-h7f98852_0.tar.bz2' -c conda-forge -y


In [None]:
import numpy as np 
import pandas as pd
import pathlib
import itertools
import pydicom
import os
import cv2
import IPython
#import gdcm

In [None]:
from pydicom.datadict import DicomDictionary, keyword_dict
from pydicom.pixel_data_handlers.util import apply_voi_lut
from pydicom.dataset import Dataset
from tqdm.notebook import tqdm
from IPython.display import Image

#from fmi.fmi.explore import *
#from fmi.fmi.preprocessing import *
#from fmi.fmi.pipeline import *
from fastai.vision.all import *
from fastai.medical.imaging import *

from torchvision.utils import save_image
from skimage import exposure

In [None]:
#system_info()

# Constants & Paths & Configs

In [None]:
SIIM_COVID19_DETECTION_DIR = '/kaggle/input/siim-covid19-detection/'
PART0_RESIZED_DIR = '../input/siim-covid19-resized-to-512px-jpg'
META_DIR = SIIM_COVID19_DETECTION_DIR + '**/*.dcm'
TEMP_DIR = '/kaggle/temp/'

INPUT_DIR = PART0_RESIZED_DIR+'/train/'
OUTPUT_DIR = DATASET_DIR = TEMP_DIR+'/train/'
TRAIN_DIR = DATASET_DIR + 'train/'
TA_DIR = TRAIN_DIR+'ta/'
IA_DIR = TRAIN_DIR+'ia/'
AA_DIR = TRAIN_DIR+'aa/'
NP_DIR = TRAIN_DIR+'np/'

WORKING_DIR = '/kaggle/working/'

WANDB_PROJECT_NAME = 'project8-kaggle-covid19'
WANDB_ENTITY_NAME = ''

TRAIN_IMAGE_LEVEL_PATH = SIIM_COVID19_DETECTION_DIR+'train_image_level.csv'
TRAIN_STUDY_LEVEL_PATH = SIIM_COVID19_DETECTION_DIR+'train_study_level.csv'
META_PATH = PART0_RESIZED_DIR +'meta.csv'


BATCH_SIZE = 32
EPOCHS = 25
IMG_SIZE = WIDTH = HEIGHT = 224
LEARNING_RATE = 0.00008

INTERPOLATION = cv2.INTER_LANCZOS4

print(os.listdir(SIIM_COVID19_DETECTION_DIR))

root = pathlib.Path('/kaggle/input/siim-covid19-detection')


# Learning Objectives
1. Learn how to handle DICOM (Digital Imaging and Communications in Medicine) via pydicom https://dicom.innolitics.com/ciods/segmentation/general-image/00080008
1. Recycle another network https://keras.io/api/applications/densenet/
1. Write a basic network yourself

# Pre-process study, image files

In [None]:
df_train_image_level = pd.read_csv(TRAIN_IMAGE_LEVEL_PATH)
df_train_study_level = pd.read_csv(TRAIN_STUDY_LEVEL_PATH)

df_train_image_level['id'] = df_train_image_level.apply(lambda row: row.id.split('_')[0], axis=1)
df_train_image_level['path'] = df_train_image_level.apply(lambda row: INPUT_DIR+row.id+'.jpg', axis=1)
df_train_image_level['image_level'] = df_train_image_level.apply(lambda row: row.label.split(' ')[0], axis=1)

df_train_study_level['id'] = df_train_study_level.apply(lambda row: row.id.split('_')[0], axis=1)
df_train_study_level.columns = ['StudyInstanceUID', 'Negative for Pneumonia', 'Typical Appearance', 'Indeterminate Appearance', 'Atypical Appearance']
df_train_study_level.head()

In [None]:
df_train_image_level = df_train_image_level.merge(df_train_study_level, on='StudyInstanceUID',how="left")
df_train_image_level = df_train_image_level[['id','StudyInstanceUID','path','Negative for Pneumonia','Typical Appearance','Indeterminate Appearance','Atypical Appearance']]
df_train_image_level = df_train_image_level.dropna()
df_train_image_level = df_train_image_level[~df_train_image_level.duplicated(subset=['StudyInstanceUID'], keep='first')]
df_train_image_level = df_train_image_level.reset_index(drop=True)
df_train_image_level.head()

# Extract metadata from DICOM files
* https://www.kaggle.com/avirdee/understanding-dicoms

In [None]:

IPython.display.Image(url='https://asvcode.github.io/MedicalImaging/images/dicom_.PNG')

In [None]:
# credit @raddar
def read_xray(path, voi_lut = True, fix_monochrome = True):
    dicom = pydicom.read_file(path)
    
    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
               
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
        
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
        
    return data

def resize_and_save(file_path):
    split = 'train' if 'train' in file_path else 'test'
    base_dir = f'/kaggle/working/{split}'
    img = read_xray(file_path)
    h, w = img.shape[:2]  # orig hw
    if aspect_ratio:
        r = dim / max(h, w)  # resize image to img_size
        interp = cv2.INTER_AREA if r < 1 else cv2.INTER_LINEAR
        if r != 1:  # always resize down, only resize up if training with augmentation
            img = cv2.resize(img, (int(w * r), int(h * r)), interpolation=interp)
    else:
        img = cv2.resize(img, (dim, dim), cv2.INTER_AREA)
    filename = file_path.split('/')[-1].split('.')[0]
    cv2.imwrite(os.path.join(base_dir, f'{filename}.jpg'), img)
    return filename.replace('dcm','')+'_image',w, h

In [None]:
#filepaths = df_train_image_level.filepath.iloc[:100 if debug else test_df.shape[0]]
#metadata = []
#for filepath in tqdm(filepaths):
#    metadata.append(resize_and_save(filepath))

In [None]:
metadata = []
for file in tqdm(root.glob('**/*.dcm')):
    dcm = pydicom.dcmread(file, stop_before_pixels=True)
    elements = itertools.chain(dcm.iterall(), dcm.file_meta.iterall())
    for elem in elements:
        meta = {
            'VM': elem.VM,
            'VR': elem.VR,
            'tag': elem.tag,
            'name': elem.name,
            'keyword': elem.keyword,
            # SQ values are redundant with other elements
            'data': elem.value if elem.VR != 'SQ' else None,
            'value': elem.repval,
            'filename': file.name,
        }
        metadata.append(meta)
metadata = pd.DataFrame(metadata)
metadata.head(10)


In [None]:
dcm

In [None]:
metadata.info()

In [None]:
metadata.head()

In [None]:
from pandas_profiling import ProfileReport
metadtaProfile = metadata.profile_report(title = 'Metadata Report', infer_dtypes=True)
#metadtaProfile.to_file("profileMarchant.html")
metadtaProfile.to_notebook_iframe()

In [None]:
IPython.display.Image("https://www.googleapis.com/download/storage/v1/b/kaggle-forum-message-attachments/o/inbox%2F1723677%2F3f6e6f2e073f25f6db8f77b5a3409b43%2Fimg5.png?generation=1602506639545491&alt=media")

# Transfer Learning with Dense Net 169


## References
* https://towardsdatascience.com/understanding-and-visualizing-densenets-7f688092391a
* https://www.kaggle.com/pytorch/densenet169
## Segmentation models
* https://github.com/qubvel/segmentation_models.pytorch
* https://www.tensorflow.org/datasets/catalog/cassava

In [None]:
IPython.display.Image(url='https://imgur.com/wWHWbQt.jpg')

In [None]:
IPython.display.Image(url="https://imgur.com/oiTdqJL.jpg")

# Submission
Utilities to create submission file
* https://www.kaggle.com/farhanhaikhan/random-easy-submission-demo/notebook

In [None]:
def CreateSub(testid, tst_preds):
    sub_df = pd.read_csv('../input/siim-covid19-detection/sample_submission.csv')
    for i in range(len(test)):
        for j in range(len(test)):
            a = test.loc[i,'id'].split('.')[0]
            b = sub_df.loc[j,'id']
            if a==b:
                negative, typical, indeterminate, atypical = str(tst_preds[i][0]),str(tst_preds[i][1]),str(tst_preds[i][2]),str(tst_preds[i][3]),
                sub_df.loc[j,'PredictionString'] = f'negative {negative} 0 0 1 1 typical {typical} 0 0 1 1 indeterminate {indeterminate} 0 0 1 1 atypical {atypical} 0 0 1 1'
    return sub_df

sumfile = CreateSub(test, tst_preds)
sumfile.to_csv('./submission.csv',index=False)

!rm -r ./test/study/

# References
* https://www.kaggle.com/freaxmind/extract-metadata-from-dicom-files-efficiently
* https://github.com/pydicom/pydicom/blob/master/examples/input_output/plot_read_fileset.py