In [1]:
import os
import pandas as pd
import numpy as np
import PIL
import tensorflow as tf
import matplotlib.pyplot as plt
import datetime
import cv2

from sklearn import model_selection
from PIL import Image
from skimage import exposure

from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras import models
from tensorflow.keras import layers
import tensorflow_addons as tfa

In [2]:
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

# Datos

In [3]:
path = '/home/mr1142/Documents/Data/NIH'

In [4]:
df = pd.read_csv(os.path.join(path, 'Data_Entry_2017.csv'))
df.head()

Unnamed: 0,Image Index,Finding Labels,Follow-up #,Patient ID,Patient Age,Patient Gender,View Position,OriginalImage[Width,Height],OriginalImagePixelSpacing[x,y],Unnamed: 11
0,00000001_000.png,Cardiomegaly,0,1,58,M,PA,2682,2749,0.143,0.143,
1,00000001_001.png,Cardiomegaly|Emphysema,1,1,58,M,PA,2894,2729,0.143,0.143,
2,00000001_002.png,Cardiomegaly|Effusion,2,1,58,M,PA,2500,2048,0.168,0.168,
3,00000002_000.png,No Finding,0,2,81,M,PA,2500,2048,0.171,0.171,
4,00000003_000.png,Hernia,0,3,81,F,PA,2582,2991,0.143,0.143,


## Labels

In [5]:
labels = np.unique(df['Finding Labels'])
labels = '|'.join(labels)
labels = labels.split('|')
labels = np.unique(labels).tolist()

In [6]:
labels

['Atelectasis',
 'Cardiomegaly',
 'Consolidation',
 'Edema',
 'Effusion',
 'Emphysema',
 'Fibrosis',
 'Hernia',
 'Infiltration',
 'Mass',
 'No Finding',
 'Nodule',
 'Pleural_Thickening',
 'Pneumonia',
 'Pneumothorax']

In [7]:
for lab in labels:
    df[lab] = pd.NA

In [8]:
def fill_label(information, label):
    positive_labels = information.split('|')
    if label in positive_labels:
        return 1
    return 0

In [9]:
for lab in labels:
    df[lab] = list(map(lambda x: fill_label(x, lab), df['Finding Labels'].tolist()))

In [10]:
df = df.sample(frac=1).reset_index(drop = True)
df.head()

Unnamed: 0,Image Index,Finding Labels,Follow-up #,Patient ID,Patient Age,Patient Gender,View Position,OriginalImage[Width,Height],OriginalImagePixelSpacing[x,...,Emphysema,Fibrosis,Hernia,Infiltration,Mass,No Finding,Nodule,Pleural_Thickening,Pneumonia,Pneumothorax
0,00003384_001.png,Infiltration,1,3384,70,F,PA,2048,2500,0.168,...,0,0,0,1,0,0,0,0,0,0
1,00017000_002.png,No Finding,2,17000,36,M,AP,2500,2048,0.168,...,0,0,0,0,0,1,0,0,0,0
2,00012229_000.png,Atelectasis,0,12229,48,F,PA,2992,2845,0.143,...,0,0,0,0,0,0,0,0,0,0
3,00021201_084.png,Atelectasis|Effusion|Infiltration,84,21201,67,M,AP,2500,2048,0.168,...,0,0,0,1,0,0,0,0,0,0
4,00012957_004.png,No Finding,4,12957,37,M,AP,3056,2544,0.139,...,0,0,0,0,0,1,0,0,0,0


In [11]:
print(sum(df.Pneumonia == 1))
print(sum(df['No Finding'] == 1))

1431
60361


In [12]:
n = 100

In [13]:
neumo = df[df.Pneumonia == 1][0:n]
normal = df[df['No Finding'] == 1][0:n]

In [14]:
df = pd.concat([neumo, normal]).reset_index(drop = True)
df = df.sample(frac=1).reset_index(drop = True)
df.head()

Unnamed: 0,Image Index,Finding Labels,Follow-up #,Patient ID,Patient Age,Patient Gender,View Position,OriginalImage[Width,Height],OriginalImagePixelSpacing[x,...,Emphysema,Fibrosis,Hernia,Infiltration,Mass,No Finding,Nodule,Pleural_Thickening,Pneumonia,Pneumothorax
0,00018382_004.png,Edema|Effusion|Pneumonia,4,18382,72,M,AP,2500,2048,0.168,...,0,0,0,0,0,0,0,0,1,0
1,00021289_000.png,Consolidation|Mass|Nodule|Pneumonia,0,21289,40,F,AP,3056,2544,0.139,...,0,0,0,0,1,0,1,0,1,0
2,00009043_000.png,No Finding,0,9043,48,M,PA,2500,2048,0.168,...,0,0,0,0,0,1,0,0,0,0
3,00025331_000.png,No Finding,0,25331,61,F,PA,2992,2991,0.143,...,0,0,0,0,0,1,0,0,0,0
4,00007197_000.png,No Finding,0,7197,46,M,PA,2500,2048,0.168,...,0,0,0,0,0,1,0,0,0,0


## Imagenes

In [15]:
pixels = 256

In [16]:
def charge_specific_img(img_name, pix):
    subfolders = [f.path for f in os.scandir(path) if f.is_dir()]
    for folder in subfolders:
        try: 
            img = cv2.imread(os.path.join(folder, 'images', img_name))
            # img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            # img = clahe(img)
            img = downsample(img, pix)
            return img
        except:
            print('', end = '')
    img = np.zeros((pix,pix,1))
    return img

In [17]:
def clahe(img):
    clahe = cv2.createCLAHE(clipLimit = 20)
    final_img = clahe.apply(img)
    return final_img

In [18]:
def downsample(img, pix):
    img = Image.fromarray(img)
    img = img.resize((pix,pix))
    img = np.array(img)
    img = img/255
    # r = np.expand_dims(img, axis=-1)
    return r

Leo un numero concreto de imagenes con y sin neumonia

In [19]:
images = np.zeros((len(df), pixels, pixels, 3))
for i in range(len(df)):
    images[i,...] = charge_specific_img(df['Image Index'][i], pixels)

In [20]:
Y = np.array(df['Pneumonia'])
X = images

In [21]:
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=0.3, shuffle=True, stratify=Y )

# Model

In [22]:
pixels = 256

In [23]:
input_shape = (pixels,pixels,3)
conv_base = EfficientNetB0(weights=None, include_top=False, input_shape=input_shape)

In [24]:
conv_base.trainable = False

In [25]:
inputs = tf.keras.Input(shape=input_shape)
x = conv_base(inputs, training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dropout(0.2)(x)
outputs = tf.keras.layers.Dense(1)(x)
model = tf.keras.Model(inputs, outputs)

In [26]:
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 256, 256, 3)]     0         
_________________________________________________________________
efficientnetb0 (Functional)  (None, 8, 8, 1280)        4049571   
_________________________________________________________________
global_average_pooling2d (Gl (None, 1280)              0         
_________________________________________________________________
dropout (Dropout)            (None, 1280)              0         
_________________________________________________________________
dense (Dense)                (None, 1)                 1281      
Total params: 4,050,852
Trainable params: 1,281
Non-trainable params: 4,049,571
_________________________________________________________________


In [27]:
# model = models.Sequential()
# model.add(layers.Conv2D(3,3,padding="same", input_shape=(pixels,pixels,1), activation='elu', name = 'conv_inicial'))
# model.add(conv_base)
# model.add(layers.GlobalMaxPooling2D(name="general_max_pooling"))
# model.add(layers.Dropout(0.2, name="dropout_out"))
# model.add(layers.Dense(1, activation="sigmoid", name="fc_out"))

In [28]:
# lr = 0.5
# opt = tf.keras.optimizers.Adam()
# loss = loss = 'binary_crossentropy'
# met = ['BinaryAccuracy', 'Precision', 'AUC']

In [33]:
base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=base_learning_rate),
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [34]:
print(len(model.layers))
print(len(model.trainable_variables))

5
2


In [35]:
# model.compile(optimizer=opt, loss = loss , metrics = met)

In [36]:
history = model.fit(X_train,Y_train,
                    batch_size = 16,
                    epochs = 10,
                    shuffle = True,
                    validation_split = 0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [39]:
conv_base.trainable = True

# Let's take a look to see how many layers are in the base model
print("Number of layers in the base model: ", len(conv_base.layers))

# Fine-tune from this layer onwards
fine_tune_at = 100

# Freeze all the layers before the `fine_tune_at` layer
for layer in conv_base.layers[:fine_tune_at]:
    layer.trainable = False

Number of layers in the base model:  237


In [40]:
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer = tf.keras.optimizers.RMSprop(learning_rate=base_learning_rate/10),
              metrics=['accuracy'])

In [43]:
fine_tune_epochs = 10
total_epochs =  10 + fine_tune_epochs

history_fine = model.fit(X_train,Y_train,
                         epochs=total_epochs,
                         initial_epoch=history.epoch[-1],
                         validation_split = 0.2)

Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
