<a href="https://colab.research.google.com/github/kool7/Medical_Computer_Vision_2020/blob/master/Segmentation/Segmentation_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import pandas as pd
import os
import math
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import Sequence
from sklearn.utils import shuffle
import shutil

import tensorflow as tf
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.layers import Input, AveragePooling2D, Flatten, Dense, Dropout, Lambda, Multiply, AveragePooling2D, BatchNormalization
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.optimizers import RMSprop, Adam
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras import backend as K
from time import time

from matplotlib import pyplot as plt
from sklearn import metrics
import seaborn as sn

import numpy as np
from PIL import Image, ImageOps
from tqdm import tqdm

# Data Preprocessing

In [4]:
# directories
WORKING_DIR = ('/content/drive/My Drive/deepiotic/')
INPUT_DIR = ('/content/drive/My Drive/deepiotic/Classes/')

# Classes
imgClasses = ['COVID', 'OTHERS', 'NORMAL', 'PNEUMONIA']

# DIR STRUCTURE
os.listdir(INPUT_DIR)

['NORMAL', 'OTHERS', 'PNEUMONIA', 'COVID']

## Creating Dataframe

In [5]:
# create a dataframe.

def createDF(imgClasses, force = False):
  # check if saved copy exists. If not, create
  if os.path.isfile(f'{WORKING_DIR}imgDFnormal.csv') == False or force:
    # creating 3 lists that contain: image name, location, and one hot encoded label.
    imgName = []
    imgLocation = []
    imgLabel = []

    for dir in imgClasses:
      if dir == imgClasses[0]:
        print(f"{dir}: {len( os.listdir(f'{INPUT_DIR}{dir}') )}")

        # fetching img names and adding it main list
        imgName = os.listdir(f'{INPUT_DIR}{dir}')

        # adding locs to main list, after Cropped_data.
        imgLocation = [dir] * len( os.listdir(f'{INPUT_DIR}{dir}') )
          
        # adding labels to main list
        imgLabel = [imgClasses.index(dir)] * len( os.listdir(f'{INPUT_DIR}{dir}') )
      else:
        print(f"{dir}: {len( os.listdir(f'{INPUT_DIR}{dir}') )}")

        # fetching img names and adding it main list
        tempimgName = os.listdir(f'{INPUT_DIR}{dir}')
        imgName = imgName + tempimgName

        # adding locs to main list, after Cropped_data.
        tempimgLocation = [dir] * len( os.listdir(f'{INPUT_DIR}{dir}') )
        imgLocation = imgLocation + tempimgLocation

        # adding labels to main list
        tempimgLabel = [imgClasses.index(dir)] * len( os.listdir(f'{INPUT_DIR}{dir}') )
        imgLabel = imgLabel + tempimgLabel


    # now making dataframe and saving for later use
    dfDict = {'name' : imgName, 'location' : imgLocation, 'label' : imgLabel}
    # df = pd.DataFrame(list(zip(imgName, imgLocation, imgLabel)), columns = ['name', 'location', 'label'])
    df = pd.DataFrame(dfDict)
    # df.drop_duplicates(subset = 'name', keep = False, inplace = True)
    # df.to_csv(f'{WORKING_DIR}imgDFnormal.csv', index = False)

    return df

  # load saved copy and return
  else:
    df = pd.read_csv(f'{WORKING_DIR}imgDFnormal.csv')
    return df

# load df
df = createDF(imgClasses, force = True)

COVID: 1053
OTHERS: 725
NORMAL: 1190
PNEUMONIA: 962


In [None]:
# dataframe
df

Unnamed: 0,name,location,label
0,A7E260CE-8A00-4C5F-A7F5-27336527A981.jpeg,COVID,0
1,all14238-fig-0001-m-c.jpg,COVID,0
2,all14238-fig-0002-m-d.jpg,COVID,0
3,all14238-fig-0002-m-e.jpg,COVID,0
4,all14238-fig-0002-m-f.jpg,COVID,0
...,...,...,...
3925,c4c780ea-c3f3-488b-94d3-82301a1160c4_jpg.rf.b4...,PNEUMONIA,3
3926,c4b25d82-38e6-4734-9e53-7c1d73912027_jpg.rf.af...,PNEUMONIA,3
3927,69017cf1-c8b2-4b96-bce8-7823f7fa9dd7_jpg.rf.ac...,PNEUMONIA,3
3928,84f07d83-e987-4a16-a083-5e8a87218532_jpg.rf.a9...,PNEUMONIA,3


## Train-Val Split 

In [15]:
# dataset loader class

# split dataset into train and test
coviddf = df[df.location == 'COVID']
othersdf = df[df.location == 'OTHERS']
normaldf = df[df.location == 'NORMAL']
pneumoniadf = df[df.location == 'PNEUMONIA']

print('Covid: ', len(coviddf))
print('Others: ', len(othersdf))
print('Normal: ', len(normaldf))
print(f'Pneumonia: {len(pneumoniadf)}')

# Creating Train-Val Split for all classes
ctrainset, cvalset = train_test_split(coviddf, test_size=0.1)
ntrainset, nvalset = train_test_split(normaldf, test_size=0.1)
otrainset, ovalset = train_test_split(othersdf, test_size=0.1)
ptrainset, pvalset = train_test_split(pneumoniadf, test_size=0.1)

# recombine
trainset = pd.concat([ctrainset, ntrainset, otrainset, ptrainset])
valset = pd.concat([cvalset, nvalset, ovalset, pvalset])
# testset = pd.concat([ctestset, ntestset, otestset])

# shuffle
RANDOM_STATE = 7
trainset = shuffle(trainset, random_state = RANDOM_STATE)
valset = shuffle(valset, random_state = RANDOM_STATE)

# total number of examples in train-val split
print('------------------------------------------------------------')
print(f'Total number of examples in Training set: {len(trainset)}')
print(f'Total number of examples in Validation set: {len(valset)}')

Covid:  1053
Others:  725
Normal:  1190
Pneumonia: 962
------------------------------------------------------------
Total number of examples in Training set: 3535
Total number of examples in Validation set: 395


## Image Data Generator

In [16]:
# Loading Pretrained Unet Model
unetModel = load_model('/content/drive/My Drive/deepiotic/Unet.h5')

In [17]:
# Custom Image Data Generator
class DataGenerator(Sequence):
    def __init__(self, df, batch_size=8, dim=(224, 224), dim1=(64, 64), shuffle=True, to_fit=True, classesNo = 4):
        self.itemList = df.values.tolist()
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.dim = dim
        self.dim1 = dim1
        self.to_fit = to_fit
        self.classesNo = classesNo
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.itemList) / self.batch_size))

    def __getitem__(self, index):
        # Generate indexes of the batch
        indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]

        # Find list of IDs
        itemListTemp = [self.itemList[k] for k in indexes]

        # Generate data
        X = self._generate_X(itemListTemp)
        # X = tf.keras.applications.densenet.preprocess_input(X)

        if self.to_fit:
            y = self._generate_y(itemListTemp)
            return X, y
        else:
            return X

    def on_epoch_end(self):
        self.indexes = np.arange(len(self.itemList))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def _generate_X(self, itemListTemp):
      for i in itemListTemp:
        originalImage = Image.open(f'{INPUT_DIR}{i[1]}/{i[0]}').convert('L').convert('RGB') # open up
        originalShape = originalImage.size
        
        # predict mask and cal bounding box
        ## pre proc input
        maskResize = originalImage.resize((64, 64))
        maskArray = np.array(maskResize)
        maskArray = (maskArray - np.amin(maskArray)) / (np.amax(maskArray) - np.amin(maskArray) + 1e-8)
        maskInput = maskArray[np.newaxis, :, :, :]

        ## predict
        maskArray = unetModel.predict(maskInput)
        maskArray = maskArray[0, :, :, 0]

        ## gen mask and cal min bounding rec
        maskArray = np.array(Image.fromarray(np.uint8(maskArray * 255)).resize(originalShape))
        maskArray = (maskArray - np.amin(maskArray)) / (np.amax(maskArray) - np.amin(maskArray) + 1e-8)
        maskArray[maskArray >= .6] = 1
        maskArray[maskArray < .6] = 0
        where = np.array(np.where(maskArray))
        x1, y1 = np.amin(where, axis=1)
        x2, y2 = np.amax(where, axis=1)

        # height and width
        cropped_h = np.abs(x1 - x2)
        cropped_w = np.abs(y1 - y2)

        # increase perc
        perc = 0.05
        x2 = np.clip(x2 + (cropped_h * perc * 2), 0, originalShape[1])
        y1 = np.clip(y1 - (cropped_w * perc), 0, originalShape[0])
        y2 = np.clip(y2 + (cropped_w * perc), 0, originalShape[0])
        
        # convert to int
        x2 = math.floor(x2)
        y1 = math.ceil(y1)
        y2 = math.floor(y2)

        # create img
        arrayImage = np.array(originalImage)
        arrayImage = arrayImage[x1:x2, y1:y2]
        arrayImage = np.array(Image.fromarray(arrayImage).resize((224, 224)))
        # save
        arrayImage = (arrayImage - np.amin(arrayImage)) / (np.amax(arrayImage) - np.amin(arrayImage) + 1e-8)

        
        if i == itemListTemp[0]:
          imgs = arrayImage[np.newaxis, :, :, :]
        else:
          imgs = np.concatenate((imgs, arrayImage[np.newaxis, :, :, :]), axis = 0)

      arg1 = imgs

      return arg1#[arg1, arg2]
        

    def _generate_y(self, itemListTemp):
      # one hot encoder
      onehotArr = np.eye(self.classesNo)

      for i in itemListTemp:
        if i == itemListTemp[0]:
          t = onehotArr[int(i[2])]
          labels = t[np.newaxis, :]
        else:
          t = onehotArr[int(i[2])]
          labels = np.concatenate((labels, t[np.newaxis, :]), axis = 0)

      return labels

# Model

In [19]:
# build model
def getModel(finalClasses = len(imgClasses), topTrain = 20, usedrop = True, droprate = .5, dense = None):
  
  ## load densenet121 model
  dense121Input = Input(shape = (224, 224, 3), name = 'densenet121_input')
  dense121Model = tf.keras.applications.DenseNet121(weights = 'imagenet', include_top = False, input_shape = (224, 224, 3), input_tensor = dense121Input)
  for layer in dense121Model.layers[:-1 * topTrain]:
    layer.trainable = False

  # combine models as one
  ## model input layer
  inputLayer = Input(shape = (224, 224, 3), name = 'model_input')

  # load chexnet Model
  dense121Output = dense121Model(inputLayer)
  batchNorm = BatchNormalization()(dense121Output)
  chexnetAvg = AveragePooling2D(4)(batchNorm)
  x = Flatten()(chexnetAvg)

  if dense == None:
    if usedrop:
      x = Dropout(droprate)(x)
  else:
    for denseInfo in dense:
      x = Dense(denseInfo)(x)
      if usedrop:
        x = Dropout(droprate)(x)
  
  dense121Output = Dense(finalClasses, activation = 'softmax')(x)  

  model = Model(inputLayer, dense121Output, name = 'combined_model')

  return model

# # load model
model = getModel()