In [1]:
import os
import pandas as pd
import numpy as np
import PIL
import tensorflow as tf
import matplotlib.pyplot as plt
import datetime
import cv2

from sklearn import model_selection
from PIL import Image
from skimage import exposure

from tensorflow.keras.applications import EfficientNetB3
from tensorflow.keras import models
from tensorflow.keras import layers
import tensorflow_addons as tfa

In [2]:
os.environ['CUDA_VISIBLE_DEVICES'] = '2'

# Datos

In [3]:
path = '/home/mr1142/Documents/Data/NIH'

In [4]:
df = pd.read_csv(os.path.join(path, 'Data_Entry_2017.csv'))
df.head()

Unnamed: 0,Image Index,Finding Labels,Follow-up #,Patient ID,Patient Age,Patient Gender,View Position,OriginalImage[Width,Height],OriginalImagePixelSpacing[x,y],Unnamed: 11
0,00000001_000.png,Cardiomegaly,0,1,58,M,PA,2682,2749,0.143,0.143,
1,00000001_001.png,Cardiomegaly|Emphysema,1,1,58,M,PA,2894,2729,0.143,0.143,
2,00000001_002.png,Cardiomegaly|Effusion,2,1,58,M,PA,2500,2048,0.168,0.168,
3,00000002_000.png,No Finding,0,2,81,M,PA,2500,2048,0.171,0.171,
4,00000003_000.png,Hernia,0,3,81,F,PA,2582,2991,0.143,0.143,


## Labels

In [5]:
labels = np.unique(df['Finding Labels'])
labels = '|'.join(labels)
labels = labels.split('|')
labels = np.unique(labels).tolist()

In [6]:
labels

['Atelectasis',
 'Cardiomegaly',
 'Consolidation',
 'Edema',
 'Effusion',
 'Emphysema',
 'Fibrosis',
 'Hernia',
 'Infiltration',
 'Mass',
 'No Finding',
 'Nodule',
 'Pleural_Thickening',
 'Pneumonia',
 'Pneumothorax']

In [7]:
for lab in labels:
    df[lab] = pd.NA

In [8]:
def fill_label(information, label):
    positive_labels = information.split('|')
    if label in positive_labels:
        return 1
    return 0

In [9]:
for lab in labels:
    df[lab] = list(map(lambda x: fill_label(x, lab), df['Finding Labels'].tolist()))

In [10]:
df = df.sample(frac=1).reset_index(drop = True)
df.head()

Unnamed: 0,Image Index,Finding Labels,Follow-up #,Patient ID,Patient Age,Patient Gender,View Position,OriginalImage[Width,Height],OriginalImagePixelSpacing[x,...,Emphysema,Fibrosis,Hernia,Infiltration,Mass,No Finding,Nodule,Pleural_Thickening,Pneumonia,Pneumothorax
0,00011514_007.png,No Finding,7,11514,46,M,AP,2500,2048,0.168,...,0,0,0,0,0,1,0,0,0,0
1,00020076_000.png,Atelectasis|Infiltration,0,20076,65,M,PA,2992,2991,0.143,...,0,0,0,1,0,0,0,0,0,0
2,00027848_000.png,No Finding,0,27848,46,F,PA,2544,3056,0.139,...,0,0,0,0,0,1,0,0,0,0
3,00026209_001.png,No Finding,1,26209,37,F,PA,2021,2021,0.194311,...,0,0,0,0,0,1,0,0,0,0
4,00012045_035.png,No Finding,35,12045,46,F,AP,2500,2048,0.168,...,0,0,0,0,0,1,0,0,0,0


In [11]:
print(sum(df.Pneumonia == 1))
print(sum(df['No Finding'] == 1))

1431
60361


In [12]:
n = 1000

In [13]:
neumo = df[df.Pneumonia == 1][0:n]
normal = df[df['No Finding'] == 1][0:n]

In [14]:
df = pd.concat([neumo, normal]).reset_index(drop = True)
df = df.sample(frac=1).reset_index(drop = True)
df.head()

Unnamed: 0,Image Index,Finding Labels,Follow-up #,Patient ID,Patient Age,Patient Gender,View Position,OriginalImage[Width,Height],OriginalImagePixelSpacing[x,...,Emphysema,Fibrosis,Hernia,Infiltration,Mass,No Finding,Nodule,Pleural_Thickening,Pneumonia,Pneumothorax
0,00014402_000.png,No Finding,0,14402,59,F,AP,2500,2048,0.168,...,0,0,0,0,0,1,0,0,0,0
1,00010434_000.png,No Finding,0,10434,21,M,PA,2992,2991,0.143,...,0,0,0,0,0,1,0,0,0,0
2,00027725_031.png,Edema|Infiltration|Pneumonia,31,27725,21,M,AP,3056,2544,0.139,...,0,0,0,1,0,0,0,0,1,0
3,00027320_010.png,Pneumonia,10,27320,52,M,PA,2992,2991,0.143,...,0,0,0,0,0,0,0,0,1,0
4,00013615_007.png,No Finding,7,13615,9,F,AP,2048,2500,0.168,...,0,0,0,0,0,1,0,0,0,0


## Imagenes

### Cargamos el modelo de segmentación

In [15]:
from tensorflow.keras import backend as K

def dice_coef(y_true, y_pred, smooth=100):        
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    dice = (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
    return dice

def dice_coef_loss(y_true, y_pred):
    return 1 - dice_coef(y_true, y_pred)

In [16]:
def loss_mask(y_true, y_pred):
    y_pred = sub_mask(y_pred)
    y_true = sub_mask(y_true)
    return abs(y_true - y_pred)


def MyLoss(y_true, y_pred):
    # Loss 1
    loss1 = ex.dice_coef_loss(y_true, y_pred)
    # Loss 2
    loss2 = loss_mask(y_true, y_pred)
    loss = loss1 + 0.5*loss2
    return loss

In [None]:
new_model = tf.keras.models.load_model('/home/mr1142/Documents/Data/models/unet_final_renacimiento_validation_6.h5', 
                                     custom_objects={"MyLoss": MyLoss, 
                                                     "loss_mask": loss_mask, 
                                                     "dice_coef_loss": dice_coef_loss,
                                                     "dice_coef": dice_coef})

### Funciones para aplicar from skimage import measure

In [None]:
from skimage import measure
from scipy import ndimage

In [None]:
def normalize(img):
    return (img - np.mean(img))/ np.std(img)

In [None]:
def quitar_trozos(mask):
    mask = measure.label(mask)
    ntotal = {k: (k==mask).sum() for k in np.unique(mask) if k >0}
    k = list(ntotal.keys())[np.argmax(list(ntotal.values()))]
    mask = k==mask
    mask = ndimage.binary_fill_holes(mask, structure=np.ones((5,5)))
    return mask


def recolor(img):
    try:
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    except:
        print('', end = '')
    return img


def recolor_resize(img, pix=256):
    try:
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    except:
        print('', end = '')
    img = cv2.resize(img, (pix, pix))
    img = np.expand_dims(img, axis=-1)
    return img


def des_normalize(img):
    return cv2.normalize(img, None, alpha = 0, beta = 255,
                         norm_type = cv2.NORM_MINMAX, dtype = cv2.CV_16UC1)


def apply_mask(img, model):
    pix1 = img.shape[0]
    pix2 = img.shape[1]
    # Paso la imagen a escala de grises
    img = recolor(img)
    # Creo una nueva imagen con las dimensiones de entrada al modelo
    img_2 = normalize(recolor_resize(img, 256))[np.newaxis,...]
    # Genero la mascara
    mask = model.predict(img_2)[0,...]
    # Escalo la mascara generada al tamaño de la imagen de entrada
    mask = cv2.resize(mask, (pix2, pix1))
    # Limpio la mascara
    mask = quitar_trozos(mask > 0.5)
    return img*mask

### Funciones para preparar las imagenes

In [None]:
pix = 512

In [None]:
def clahe(img):
    clahe = cv2.createCLAHE(clipLimit = 20)
    final_img = clahe.apply(img)
    return final_img

def get_prepared_img(path, model, pix):
    img = cv2.imread(path)
    segmented = des_normalize(apply_mask(img, model))
    segmented = recolor_resize(segmented, pix)
    segmented = clahe(segmented)
    return segmented

Cargo todas las imagenes y les aplico todos los filtros

In [None]:
def charge_ims(img_list, pix):
    names = []
    imgs = []
    subfolders = [f.path for f in os.scandir(path) if f.is_dir()]
    # De todas las imagenes que quiero cargar miro cuantas hay en cada carpeta y las cargo
    for folder in subfolders:
        folder_images = os.listdir(os.path.join(folder, 'images'))
        this_folder_imgs = set(img_list).intersection(folder_images)
        names = names + list(this_folder_imgs)
        for im in this_folder_imgs:
            imgs.append(get_prepared_img(folder + '/images/' + im, new_model, pix))
    return {'name': names, 'imgs': imgs}

In [None]:
imgs_dict = charge_ims(df['Image Index'], pixels)

Reordeno las imagenes en un array

In [None]:
images = np.zeros((len(df), pix, pix, 1))
for i in range(len(df)):
    index = [j for j in range(len(imgs_dict['name'])) if imgs_dict['name'][j] == df['Image Index'][i]]
    im = recolor_resize(imgs_dict['imgs'][index[0]], 512)
    images[i,...] = im

In [None]:
images.shape

## X e Y

In [None]:
Y = np.array(df['Pneumonia'])
X = images

In [None]:
neumo = X[Y == 1]
norm = X[Y == 0]

In [None]:
f, ax = plt.subplots(1,2, figsize=(15,15)) 
i = np.random.randint(0, len(neumo))
ax[0].imshow(neumo[i], cmap = 'gray')
ax[1].imshow(norm[i], cmap = 'gray')
f.show()

In [None]:
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=0.3, shuffle=True, stratify=Y )

# Modelo

## Red preentrenada EfficienNet B3

In [None]:
input_shape = (pix,pix,3)
conv_base = EfficientNetB3(weights="imagenet", include_top=False, input_shape=input_shape)

In [None]:
print(len(conv_base.layers))
print(len(conv_base.trainable_variables))

## Completo el modelo

In [None]:
model = models.Sequential()
model.add(layers.Conv2D(3,3,padding="same", input_shape=(pix,pix,1), activation='elu', name = 'conv_inicial'))
model.add(conv_base)
model.add(layers.Conv2D(32,3, padding='same', input_shape=(8,8,1280), activation='selu', name = 'conv_posterior'))
model.add(layers.MaxPool2D(pool_size = (2,2), padding='same', name = 'first_pooling'))
model.add(layers.Conv2D(64,3, padding='same', input_shape=(4,4,1280), activation='selu', name = 'last_convolution'))
model.add(layers.GlobalMaxPooling2D(name="general_max_pooling"))
model.add(layers.Dropout(0.2, name="dropout_out"))
model.add(layers.Dense(1, activation="sigmoid", name="fc_out"))

In [None]:
model.summary()

## Callback

In [31]:
log_dir = "/home/mr1142/Documents/Data/logs/fit/image_class_" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir,
                                                      update_freq='batch',
                                                      histogram_freq=1)

# Entrenamiento

## Hiperparametros

In [None]:
lr = 1e-4
opt = tf.keras.optimizers.Adam(learning_rate = lr)
loss = loss = 'binary_crossentropy'
met = ['BinaryAccuracy', 'Precision', 'AUC']

In [None]:
batch = 8
epoch = 500

### Especificación por capas

In [None]:
conv_base.trainable = True

Entrenamos desde la capa 300

In [None]:
fine_tune_at = 300

for layer in conv_base.layers[:fine_tune_at]:
    layer.trainable = False

In [None]:
print(len(model.layers))
print(len(model.trainable_variables))

### Compilación

In [None]:
model.compile(optimizer=opt, loss = loss , metrics = met)

### Entrenamiento

In [None]:
history = model.fit(X_train,Y_train,
                    batch_size = batch,
                    epochs = epoch,
                    # callbacks = tensorboard_callback,
                    validation_data = (X_test, Y_test),
                    shuffle = True,
                    validation_split = 0.2)