In [None]:
import numpy as np
import pandas as pd
import os
import sys
from matplotlib import pyplot
import matplotlib.patches as patches
from collections import Counter
from keras.preprocessing.image import ImageDataGenerator
import pydicom as dcm
from pydicom import dcmread
import glob 
import pylab
import seaborn as sns
# !pip install -q pydicom

# After installing pydicom. This is needed to load .dcm files
import pydicom

# **Setting up project path**

In [None]:
# Load train set image metadata
dataDirPath = '../input/rsna-pneumonia-detection-challenge/'

Train_Image_path = dataDirPath + 'stage_2_train_images'

****Creating a function to load the metadata from images****

In [None]:
class ImageMetadata():
    def __init__(self, setName, file):
        # print(name, file)
        # dataset name(train or test)
        self.setName = setName
        # image file name
        self.file = file

    def __repr__(self):
        return self.imagePath()

    def imagePath(self):
        return os.path.join(self.setName, self.file) 
    

# function to load image metadada   
def loadImageMetadata(dataSetName):
    imageMetadata = []
    for f in os.listdir(dataSetName):
        # Check file extension. Allow only .dcm files.
        ext = os.path.splitext(f)[1]
        if ext == '.dcm' :
            imageMetadata.append(ImageMetadata(dataSetName, f))
    return np.array(imageMetadata)

**Getting Metadata Information**

In [None]:
trainSetImageMetadata = loadImageMetadata(dataDirPath + 'stage_2_train_images')

print("trainSetImageMetadata.shape : ", trainSetImageMetadata.shape)

print("Sample image path : ", trainSetImageMetadata[0])

******Insights from Metadata******

There are 26684 images in the Training data
These are DICOM Images which has pixel information as well as several tags added to it like patientid, age,gender etc.

**Create a function to load image and loaging a sample Image**

In [None]:
def loadImage(path):
    img = pydicom.dcmread(path)
    return img

imgIndex = 4
imgPath = trainSetImageMetadata[imgIndex]
imgPath = imgPath.imagePath()
imgData = loadImage(imgPath)

pyplot.imshow(imgData.pixel_array, cmap=pyplot.cm.bone)

**Preparing Dataset with patient id and respective image paths**

In [None]:
trainSetImageMetadata_df = pd.DataFrame(trainSetImageMetadata, columns=["Path"])
trainSetImageMetadata_df.head(2)

def getImgId(_imgData) :
    return str(_imgData).split(".dcm")[0].split("/")[4]

imageIdPaths = pd.DataFrame(columns=["patientId", "imgPath"])
imageIdPaths["patientId"] = trainSetImageMetadata_df["Path"].apply(getImgId)
imageIdPaths["imgPath"] = trainSetImageMetadata_df["Path"]

print("imageIdPaths", imageIdPaths.shape)
imageIdPaths.head(2)

# Exloratory Data Analysis on Train Labels and Detail Info CSV data sets

**Analyzing Detailed Classes CSV file**

Step 1 -> Define and read the Detail_Info CSV File 

In [None]:
classesPath =  dataDirPath + 'stage_2_detailed_class_info.csv'

detailedClasses = pd.read_csv(classesPath)

detailedClasses.head(2)

Step 2 -> Check for Missing Values


In [None]:
detailedClasses.isna().apply(pd.value_counts)

No Missing Values found

Step 3 -> Checking the shape of data frame

In [None]:
print("detailedClasses.shape : ", detailedClasses.shape, )

# File has 30227 rows and 2 columns - PatientID & Class

Step 3 -> Checking Unique Patients

In [None]:
print("Unique patientIds : ", detailedClasses['patientId'].nunique(), )

Total number of unique patients in data - 26684

**Observation - As we have total 30227 records and out of that 26684 are unique records, this shows presence of multiple records for some patients**

Step 4 -> Checking unique Classes

In [None]:
print("Unique patientIds : ", detailedClasses['class'].nunique(), )

print(detailedClasses['class'].unique)

3 Unique classes observed 
1 - No Lung Opacity/Not Normal,
2 - Normal,
3 - Lung Opacity

In [None]:
sns.countplot(x="class",hue="class",data=detailedClasses)

**Analyzing Train Lables Dataset**

Step 1 -> Reading the data 

In [None]:
labelsPath = dataDirPath + 'stage_2_train_labels.csv'

trainLabels = pd.read_csv(labelsPath)

trainLabels.head(2)

Step 2 -> Checking the missing values if any

In [None]:
trainLabels.isna().apply(pd.value_counts)

**Observation -For around 20672 patients Bounding box cordinates not available where as for 9555 patients its avaialable**

In [None]:
trainLabels[trainLabels['Target']==0].head(2)

Few records have observed with missing values in x,y,width and height coulmn, but no missing values observed in patientid and Target.

Also this is observed such missing columns are present for those records with Target as '0'.

x,y,width and height columns have the information for bounding boxes in Images where Penumonia is detected.

**Explaination on missing values - These are not the missing values instead it is expected not to have Bounding Box co-ordinates for those images where Pneumonia is not detected (Target - '0')**

Hence concluding there are no missing values in this dataset as well

Step 3 -> Checking unique Patients

In [None]:
print("Unique patientIds : ", trainLabels['patientId'].nunique(), )

Total Unique patients found - 26684
This is same as the number of patients in Detailed CSV sheet hence both sheets share the information for same patients

Step 4 -> Checkin unqiue Targets

In [None]:
print("Unique patientIds : ", trainLabels['Target'].nunique(), )

print("Unique patientIds : ", trainLabels['Target'].unique(), )

Trail label has only 2 target variables [0 & 1] 

**Conclusion - In Train labels only two target variables are present 0 & 1, where as in Detailed_Info sheet we have 3 classes.**

**As we have 3 Classes in Detailed_Info dataset and 2 Target Variables in Train_Labels, concatenating to get better insight into the data**

Step 1 -> Sorting both the datasets based on patientId

In [None]:
trainLabels.sort_values("patientId", inplace=True)
detailedClasses.sort_values("patientId", inplace=True)

Step 2 -> Concatenating the data

In [None]:
Combined_Data = pd.concat([trainLabels, detailedClasses["class"]], axis=1, sort=False)
Combined_Data.head(5)

**Validating the concatenation results**

In [None]:
Combined_Data.shape

In [None]:
Combined_Data.isna().apply(pd.value_counts)

In [None]:
Combined_Data[Combined_Data["Target"] == 1].isna().apply(pd.value_counts)

In [None]:
Combined_Data[Combined_Data["Target"] == 0].isna().apply(pd.value_counts)

In [None]:
Combined_Data[Combined_Data["class"] == "Lung Opacity"].isna().apply(pd.value_counts)

From Above Analysis our concatenated data is correct

## Prepare data for training


Step 1 -> 
* Conver data to only two classes, 'Normal' and 'Lung Opacity'
* Splitting the data in three parts, train, test and validation sets.


In [None]:
# Conver data to only two classes, 'Normal' and 'Lung Opacity'
Combined_Data["class"].replace("No Lung Opacity / Not Normal", "Normal", inplace=True)
Combined_Data.head(5)

In [None]:
imageIdPaths.sort_values("patientId", inplace=True)
train_CombinedData = Combined_Data[0:25000]
validate_CombinedData = Combined_Data[25000:30227]

print("train_CombinedData.shape : ", train_CombinedData.shape)
print("validate_CombinedData.shape : ", validate_CombinedData.shape)

print("\nunique train patients : ", train_CombinedData["patientId"].nunique())
print("unique validate patients : ", validate_CombinedData["patientId"].nunique())

print("\nTotal unique patients : ", imageIdPaths["patientId"].nunique())
print("Total of unique train and test : ", train_CombinedData["patientId"].nunique() + validate_CombinedData["patientId"].nunique())

print("\nLast from train set : ", train_CombinedData.iloc[24999]["patientId"])
print("First from validate set : ", validate_CombinedData.iloc[0]["patientId"])

# Set all NaN values to 0 in train and test data sets. While training NaN will not have any meaning.
#    * x, y, width and hight values as zero(0) means no bounding box.
train_CombinedData.fillna(0, inplace=True)
validate_CombinedData.fillna(0, inplace=True)



In [None]:
train_imageIdPaths = imageIdPaths[0:21764]
validate_imageIdPaths = imageIdPaths[21764:26684]

print("train_imageIdPaths.shape : ", train_imageIdPaths.shape)
print("validate_imageIdPaths.shape : ", validate_imageIdPaths.shape)

print("\nunique train patients : ", train_imageIdPaths["patientId"].nunique())
print("unique validate patients : ", validate_imageIdPaths["patientId"].nunique())

print("\nTotal unique patients : ", imageIdPaths["patientId"].nunique())
print("Total of unique train and test : ", train_imageIdPaths["patientId"].nunique() + validate_imageIdPaths["patientId"].nunique())

print("\nLast from train set : ", train_imageIdPaths.iloc[21763]["patientId"])
print("First from validate set : ", validate_imageIdPaths.iloc[0]["patientId"])


In [None]:
wer = wer r

# Build UNet

In [None]:
from tensorflow.keras.layers import Layer, Convolution2D, Flatten, Dense
from tensorflow.keras.layers import Concatenate, UpSampling2D, Conv2D, Reshape, GlobalAveragePooling2D
from tensorflow.keras.models import Model

import cv2

from tensorflow.keras.applications.mobilenet import MobileNet
from tensorflow.keras.applications.mobilenet import preprocess_input 

import tensorflow.keras.utils as pltUtil
from tensorflow.keras.utils import Sequence

import math

from tensorflow.keras.applications.resnet import ResNet50
from tensorflow.keras.applications.resnet import preprocess_input as resnetProcess_input


In [None]:
IMAGE_SIZE = 224


IMG_WIDTH = 1024
IMG_HEIGHT = 1024

In [None]:
BATCH_SIZE = 10

class UNetTrainGenerator(Sequence):

    def __init__(self, _imageIdPaths, _CombinedData):       
        self.pids = _CombinedData["patientId"].to_numpy()
        self.imgIdPaths = _imageIdPaths
        self.coords = _CombinedData[["x", "y", "width", "height"]].to_numpy()
        # Resize Bounding box
        self.coords = self.coords * IMAGE_SIZE / IMG_WIDTH
        

    def __len__(self):
        return math.ceil(len(self.coords) / BATCH_SIZE)
    

    def __getitem__(self, idx): # Get a batch
        batch_coords = self.coords[idx * BATCH_SIZE:(idx + 1) * BATCH_SIZE] # Image coords
        batch_pids = self.pids[idx * BATCH_SIZE:(idx + 1) * BATCH_SIZE] # Image pids    
        
        batch_images = np.zeros((len(batch_pids), IMAGE_SIZE, IMAGE_SIZE, 3), dtype=np.float32)
        batch_masks = np.zeros((len(batch_pids), IMAGE_SIZE, IMAGE_SIZE))
        for _indx, _pid in enumerate(batch_pids):
            _path = self.imgIdPaths[self.imgIdPaths["patientId"] == _pid]["imgPath"].array[0]
            _imgData = loadImage(str(_path)) # Read image
            img = _imgData.pixel_array 
            
            # Resize image
            resized_img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE), interpolation=cv2.INTER_AREA)
            #resized_img = cv2.resize(img[200:824, 200:824], dsize=(IMAGE_SIZE, IMAGE_SIZE), interpolation=cv2.INTER_AREA)
    
            #print("batch_images[_indx] shape :", batch_images[_indx][:,:,0].shape)
            # preprocess image for the batch
            batch_images[_indx][:,:,0] = preprocess_input(np.array(resized_img[:,:], dtype=np.float32)) # Convert to float32 array
            batch_images[_indx][:,:,1] = preprocess_input(np.array(resized_img[:,:], dtype=np.float32)) # Convert to float32 array
            batch_images[_indx][:,:,2] = preprocess_input(np.array(resized_img[:,:], dtype=np.float32)) # Convert to float32 array  
            
            x = int(batch_coords[_indx, 0])
            y = int(batch_coords[_indx, 1])
            width = int(batch_coords[_indx, 2])
            height = int(batch_coords[_indx, 3])
            
            batch_masks[_indx][y:y+height, x:x+width] = 1

        return batch_images, batch_masks

In [None]:
trainUNetDataGen = UNetTrainGenerator(train_imageIdPaths, train_CombinedData)
validateUNetDataGen = UNetTrainGenerator(validate_imageIdPaths, validate_CombinedData)

print(len(trainUNetDataGen), "# of iterations in one train epoch")
print(len(validateUNetDataGen), "# of iterations in one validate epoch")

In [None]:
# To show image with mask
def showMaskedImage(_imageSet, _maskSet, _index) :
    maskImage = _imageSet[_index]

    #pyplot.imshow(maskImage[:,:,0], cmap=pyplot.cm.bone)
    maskImage[:,:,0] = _maskSet[_index] * _imageSet[_index][:,:,0]
    maskImage[:,:,1] = _maskSet[_index] * _imageSet[_index][:,:,1]
    maskImage[:,:,2] = _maskSet[_index] * _imageSet[_index][:,:,2]

    pyplot.imshow(maskImage[:,:,0], cmap=pyplot.cm.bone)


In [None]:
imageSet0 = trainUNetDataGen[0][0]
maskSet0 = trainUNetDataGen[0][1]    
showMaskedImage(imageSet0, maskSet0, 5)

In [None]:

ALPHA = 1.0

def create_UNetModel(trainable=True):
    model = MobileNet(input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3), include_top=False, alpha=ALPHA, weights="imagenet") 
    # Top layer is last layer of the model

    for layer in model.layers:
        layer.trainable = trainable

    # Add all the UNET layers here
    #### Add your code here ####
    convLayer_112by112 = model.get_layer("conv_pw_1_relu").output
    convLayer_56by56 = model.get_layer("conv_pw_3_relu").output
    convLayer_28by28 = model.get_layer("conv_pw_5_relu").output
    convLayer_14by14 = model.get_layer("conv_pw_11_relu").output
    convLayer_7by7 = model.get_layer("conv_pw_13_relu").output
    # The last layer of mobilenet model is of dimensions (7x7x1024)

    # Start upsampling from 7x7 to 14x14 ...up to 224x224 to form UNET
    # concatinate with the original image layer of the same size from MobileNet
    x = Concatenate()([UpSampling2D()(convLayer_7by7), convLayer_14by14])
    x = Concatenate()([UpSampling2D()(x), convLayer_28by28])
    x = Concatenate()([UpSampling2D()(x), convLayer_56by56])
    x = Concatenate()([UpSampling2D()(x), convLayer_112by112])
    x = UpSampling2D()(x) # upsample to 224x224

    # Add classification layer
    x = Conv2D(1, kernel_size=1, activation="sigmoid", name="masks")(x)
    x = Reshape((IMAGE_SIZE, IMAGE_SIZE))(x) 
    
    # To join UNet output to ResNet50 need this lines to match input and output shates. And comment the 'Reshape' line.
    #x = Conv2D(3, kernel_size=1, activation="sigmoid", name="masks")(x)
    #x = Reshape((IMAGE_SIZE, IMAGE_SIZE))(x)   

    return Model(inputs=model.input, outputs=x)

In [None]:
import tensorflow as tf
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras.backend import log, epsilon

def dice_coefficient(y_true, y_pred):
    numerator = 2 * tf.reduce_sum(y_true * y_pred)
    denominator = tf.reduce_sum(y_true + y_pred)

    return numerator / (denominator + tf.keras.backend.epsilon())



def losses(y_true, y_pred):
    return binary_crossentropy(y_true, y_pred) - log(dice_coefficient(y_true, y_pred) + epsilon())

In [None]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

adamOptimizer = Adam(lr=1e-6, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)

checkpoint = ModelCheckpoint("model-{loss:.2f}.h5", monitor="loss", verbose=1, save_best_only=True,
                             save_weights_only=True, mode="min", period=1)
stop = EarlyStopping( monitor="loss", patience=5, mode="min")
reduce_lr = ReduceLROnPlateau(monitor="loss", factor=0.2, patience=5, min_lr=1e-8, verbose=1, mode="min")

In [None]:
trainUnetModel = True

EPOCHS = 10

#### Add your code here ####

#WEIGHTS_FILE = "../input/unwets/model-2.56.h5"
UNetModel = create_UNetModel()
UNetModel.compile(loss=losses, optimizer=adamOptimizer, metrics=[dice_coefficient]) 
#UNetModel.load_weights(WEIGHTS_FILE)

if trainUnetModel==True :
    # Make layers trainable
    for layer in UNetModel.layers:
        layer.trainable = True

    history=UNetModel.fit_generator(generator=trainUNetDataGen,
                        epochs=1,
                        validation_data=validateUNetDataGen,
                        callbacks=[checkpoint, reduce_lr, stop],
                        shuffle=True,
                        verbose=1)
    unet_history = np.array(history.history)
    np.save("unetTrain_hist3", unet_history, allow_pickle=True)

In [None]:
# Check sample ground truth masked image and predicted masked image 
imageSet0 = trainUNetDataGen[0][0]
maskSet0 = trainUNetDataGen[0][1]
showMaskedImage(imageSet0, maskSet0, 5)

predMasks = UNetModel.predict(imageSet0)
showMaskedImage(imageSet0, predMasks, 5)

In [None]:
qwrv =drtery 

In [None]:
def create_resNetModel(trainable=True) :
    
    input_img_shape = (IMAGE_SIZE, IMAGE_SIZE, 3)
    
    # Load pre-trained ResNet50 with 'imagenet' weights
    resNetLayers = ResNet50(weights='imagenet', input_shape = input_img_shape, include_top=False) 

    # Will train some layers and leave other frozen, as we have small dataset and different from trained dataset.
    layer_names = [layer.name for layer in resNetLayers.layers]
    layer_idx = layer_names.index("conv2_block3_out") # index of "conv2_block3_out" = 38

    # to freeze layers, except the last 38 layers out of total 175 layers
    for layer in resNetLayers.layers[:-(layer_idx)]:
        layer.trainable = trainable
        
    # Append classifire
    classLayers = resNetLayers.layers[-1].output
    classLayers = GlobalAveragePooling2D()(classLayers)
    classLayers = Dense(1024, activation='relu')(classLayers)
    classLayers = Dense(2, activation='softmax', kernel_initializer='zero', name='dense_class_{}'.format(2))(classLayers)
    #classLayers = Convolution2D(512, (3, 3), padding='same', activation='relu', kernel_initializer='normal', name='class_conv1')(UNet_ResNet_Layers)
    #classLayers = Convolution2D(2, (1, 1), activation='sigmoid', kernel_initializer='uniform', name='class_out')(classLayers)
    #flattenOut = Flatten()(classLayers)
    #out_class = Dense(2, activation='softmax', kernel_initializer='zero', name='dense_class_{}'.format(2))(flattenOut)
    
    return Model(inputs=resNetLayers.input, outputs=classLayers)

In [None]:

clsCheckpoint = ModelCheckpoint("nemoResnetModel-{loss:.2f}.h5", monitor="loss", verbose=1, save_best_only=True,
                                 save_weights_only=True, mode="min", period=1)
clsStop = EarlyStopping(monitor="loss", patience=5, mode="min")
clsReduce_lr = ReduceLROnPlateau(monitor="loss", factor=0.2, patience=5, min_lr=1e-6, verbose=1, mode="min")

In [None]:
trainResNetClassifierModel = True

CLSI_EPOCHS = 1

    
# This is for training the model.   
if(trainResNetClassifierModel == True) :   
    resNetClassifierModel = create_resNetModel()
    resNetClassifierModel.compile(optimizer='SGD', loss='binary_crossentropy', metrics = ['accuracy'])
    
    print("Training model... ")
    resnet_history = resNetClassifierModel.fit_generator(trainClasiDataGen,
                                                    epochs=CLSI_EPOCHS,
                                                    validation_data=validateClasiDataGen,
                                                    callbacks=[clsCheckpoint, clsReduce_lr, clsStop],
                                                    verbose=1)




In [None]:
#UNetModel
#resNetClassifierModel
pltUtil.plot_model(resNetClassifierModel,
                    to_file="resNetClassifierModel.png",
                    show_shapes=True,
                    show_layer_names=True,
                    expand_nested=False,
                    dpi=70)

In [None]:
# Add UNetModel and resNetClassifierModel to get UNetResNetModel

UNet_ResNet_Layers = Concatenate()([UNetModel.output, resNetClassifierModel.input])

#UNetResNetModel = Add(Model(inputs=UNetModel.input, outputs=resNetClassifierModel)

UNetResNetModel = Model(inputs=UNetModel.input, outputs=UNet_ResNet_Layers)

In [None]:

class JoinModels(Layer):
    def __init__(self, output_dim, **kwargs):
        self.output_dim = output_dim
        super(MyLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        input_dim = input_shape[1]
        initial_weight_value = np.random.random((input_dim, output_dim))
        self.W = K.variable(initial_weight_value)
        self.trainable_weights = [self.W]

    def call(self, x, mask=None):
        return K.dot(x, self.W)

    def get_output_shape_for(self, input_shape):
        return (input_shape[0], self.output_dim)