In [None]:
import plotly.offline as pyo
pyo.init_notebook_mode()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import math
import cv2 
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from sklearn import preprocessing
import random
import tensorflow as tf
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
import warnings
warnings.filterwarnings("ignore")
import pydicom as dicom
!pip install visualkeras

# EDA

In [None]:
path = '/kaggle/input/vinbigdata-chest-xray-abnormalities-detection/'
os.listdir(path)

In [None]:
train_data = pd.read_csv(path+'train.csv')
samp_subm = pd.read_csv(path+'sample_submission.csv')
print('Number train samples:', len(train_data.index))
print('Number test samples:', len(samp_subm.index))

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(12, 4))
x = train_data['class_name'].value_counts().keys()
y = train_data['class_name'].value_counts().values
ax.bar(x, y)
ax.set_xticklabels(x, rotation=90)
ax.set_title('Distribution of the labels')
plt.grid()
plt.show()

In [None]:
train_data.head()

# Lets Visualize

1. Read Dimcom data 
2. Visualize all classes in a grid
3. Visualize one input image per call 

Converting dicom data to png/jpg may look straightforward and there is going to be many notebooks doing it simple way - just rescaling it.

However, you must consider, that raw dicom data is not actually linearly convertable to "human-friendly" png/jpg. In fact, most of DICOM's store pixel values in exponential scale, which is resolved by standard standard DICOM viewers.

So in order to get jpg/png as radiologists would initially see in their workspace, you need to apply some transformations. DICOM metadata stores information how to make such "human-friendly" transformations.

An example code I use daily can be found below:
~ [Please Upvote Here](https://www.kaggle.com/raddar/convert-dicom-to-np-array-the-correct-way)

In [None]:
def read_xray(path, voi_lut = True, fix_monochrome = True):
    dicom = pydicom.read_file(path)
    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array           
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data  
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)   
    return data

In [None]:
count = 1
f = plt.figure(figsize=(50,20))
for Class in train_data['class_name'].unique():
    seg = train_data[train_data['class_name']==Class]
    image_id =  seg.sample().iloc[0]['image_id']
    img = read_xray(path+'train/'+image_id+'.dicom')
    ax = f.add_subplot(3, 5,count)
    ax = plt.imshow(img)
    ax = plt.title(Class,fontsize= 30)
    count = count + 1
plt.suptitle("Chest X ray", size = 32)
plt.show()

In [None]:
plt.rcParams["figure.figsize"] = (20,10)
def look(id):
    idnum = id
    image_id = train_data.loc[idnum, 'image_id']
    img = read_xray(path+'train/'+image_id+'.dicom')
    x1,y1,x2,y2 = train_data.loc[idnum, 'x_min'] , train_data.loc[idnum, 'y_min'] , train_data.loc[idnum, 'x_max'] , train_data.loc[idnum, 'y_max']
    if(math.isnan(x1) or math.isnan(y1) or math.isnan(x2) or math.isnan(y2)):
        return img , (-1,-1,-1,-1)
    x1 = int(x1)
    y1 = int(y1)
    x2 = int(x2)
    y2 = int(y2)
    
    return img , (x1,y1,x2,y2)
    
look(825)

# Make all imags of same size and bounding box resizing

In [None]:
train_data.head()

In [None]:
plt.rcParams["figure.figsize"] = (20,10)
def show(img , bbox, title,resized = False):
    start_point = (bbox[0],bbox[1]) 
    end_point = (bbox[2],bbox[3]) 
    color = (0, 0, 0)
    if not resized :
        thickness = 30
    else :
        thickness = 1
    img = cv2.rectangle(img, start_point, end_point, color, thickness) 
    plt.imshow(img)
    plt.title(title,fontsize= 30)

In [None]:
idnum = 2
image_id = train_data.loc[idnum, 'image_id']
title = train_data.loc[idnum, 'class_name']
img = read_xray(path+'train/'+image_id+'.dicom')
print(img.shape)
x1,y1,x2,y2 = train_data.loc[idnum, 'x_min'] , train_data.loc[idnum, 'y_min'] , train_data.loc[idnum, 'x_max'] , train_data.loc[idnum, 'y_max']
x1 = int(x1)
y1 = int(y1)
x2 = int(x2)
y2 = int(y2)
print(x1,y1,x2,y2)
show(img , [x1,y1,x2,y2],title)

In [None]:
y_ = img.shape[0]
x_ = img.shape[1]
targetSize = 224
x_scale = targetSize / x_
y_scale = targetSize / y_
print(x_scale, y_scale)
img = cv2.resize(img, (targetSize, targetSize));
img = img.reshape(targetSize, targetSize,1)
print(img.shape)
(origLeft, origTop, origRight, origBottom) = (x1,y1,x2,y2)
x = int(np.round(origLeft * x_scale))
y = int(np.round(origTop * y_scale))
xmax = int(np.round(origRight * x_scale))
ymax = int(np.round(origBottom * y_scale))
print(x,y,xmax,ymax)
show(img.reshape(targetSize,targetSize) , [x,y,xmax,ymax],title,True)

# Now lets create a function
1. Arg : Image and BBOX
2. Output resized 224 * 224 image and bbox

In [None]:
def create_image(img , bbox):
    y_ = img.shape[0]
    x_ = img.shape[1]
    targetSize = 224
    x_scale = targetSize / x_
    y_scale = targetSize / y_
    img = cv2.resize(img, (targetSize, targetSize));
    img = img.reshape(targetSize, targetSize,1)
    if(bbox[0] == -1):
        return img ,[-1,-1,-1,-1]
    (origLeft, origTop, origRight, origBottom) = (bbox[0],bbox[1],bbox[2],bbox[3])
    x = int(np.round(origLeft * x_scale))
    y = int(np.round(origTop * y_scale))
    xmax = int(np.round(origRight * x_scale))
    ymax = int(np.round(origBottom * y_scale))
    return img , (x,y,xmax,ymax)

Lets test with image id 2 and 1

In [None]:
img , bbox = look(2)
print(img.shape , bbox)
img , bbox = create_image(img , bbox)
print(img.shape , bbox)

In [None]:
img , bbox = look(0)
print(img.shape , bbox)
img , bbox = create_image(img , bbox)
print(img.shape , bbox)

# Load Data

Here we have created train images , y_bb for bounding boxes and y_class for class labelling.
from here w can proceed to model building

In [None]:
from tqdm import tqdm
from sklearn.preprocessing import OneHotEncoder
train_image = []
train_bb = []
for image_id in tqdm(range(len(train_data.iloc[:]['image_id']))):
    img , bbox= look(image_id)
    img , bbox = create_image(img , bbox)
    train_image.append(img)
    train_bb.append(bbox)
X = np.array(train_image)
y_bb = np.array(train_bb)
y_class = np.array(train_data.iloc[:]['class_name'])
y_class = y_class.reshape(y_class.shape[0],1)
enc = OneHotEncoder(handle_unknown='ignore')
enc.fit(y_class)
print(enc.categories_)
y_class = enc.transform(y_class).toarray()
print('Data   :   '+str(X.shape))
print('Output :   '+str(y_class.shape))
print('Data   :   '+str(y_bb.shape))

In [None]:
y_bb

# Transfer Learning

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y_class, random_state=42, test_size=0.1)
print('Train data    :'+str(X_train.shape))
print('Test data     :'+str(X_test.shape))
print('Train Output  :'+str(y_train.shape))
print('Test Output   :'+str(y_test.shape))

In [None]:
import visualkeras
from keras.applications.vgg16 import VGG16
model = VGG16()
visualkeras.layered_view(model)

In [None]:
from keras.utils import plot_model
plot_model(model, to_file='model.png',show_shapes=True)

In [None]:
Training_Output_Results =pd.DataFrame(columns=['Epochs','Learning Rate','Train_Loss','Train_Accuracy','Train_Precision','Val_Loss','Val_Accuracy','Val_Precision'])

METRICS = [
            'accuracy',
            tf.keras.metrics.Precision(name='precision')
]  
model.compile(
                optimizer=tf.keras.optimizers.Adam(),
                loss='categorical_crossentropy',
                metrics=METRICS
            )
history = model.fit(X_train, y_train, epochs=200, validation_split=0.3, batch_size=15,verbose=1,shuffle=True)