In [1]:
import os
import pandas as pd
import numpy as np
import PIL
import tensorflow as tf
import matplotlib.pyplot as plt
import datetime
import cv2

from sklearn import model_selection
from PIL import Image
from skimage import exposure

from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras import models
from tensorflow.keras import layers
import tensorflow_addons as tfa

In [2]:
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

# Datos

In [3]:
path = '/home/mr1142/Documents/Data/NIH'

In [4]:
df = pd.read_csv(os.path.join(path, 'Data_Entry_2017.csv'))
df.head()

Unnamed: 0,Image Index,Finding Labels,Follow-up #,Patient ID,Patient Age,Patient Gender,View Position,OriginalImage[Width,Height],OriginalImagePixelSpacing[x,y],Unnamed: 11
0,00000001_000.png,Cardiomegaly,0,1,58,M,PA,2682,2749,0.143,0.143,
1,00000001_001.png,Cardiomegaly|Emphysema,1,1,58,M,PA,2894,2729,0.143,0.143,
2,00000001_002.png,Cardiomegaly|Effusion,2,1,58,M,PA,2500,2048,0.168,0.168,
3,00000002_000.png,No Finding,0,2,81,M,PA,2500,2048,0.171,0.171,
4,00000003_000.png,Hernia,0,3,81,F,PA,2582,2991,0.143,0.143,


## Labels

In [5]:
labels = np.unique(df['Finding Labels'])
labels = '|'.join(labels)
labels = labels.split('|')
labels = np.unique(labels).tolist()

In [6]:
labels

['Atelectasis',
 'Cardiomegaly',
 'Consolidation',
 'Edema',
 'Effusion',
 'Emphysema',
 'Fibrosis',
 'Hernia',
 'Infiltration',
 'Mass',
 'No Finding',
 'Nodule',
 'Pleural_Thickening',
 'Pneumonia',
 'Pneumothorax']

In [7]:
for lab in labels:
    df[lab] = pd.NA

In [8]:
def fill_label(information, label):
    positive_labels = information.split('|')
    if label in positive_labels:
        return 1
    return 0

In [9]:
for lab in labels:
    df[lab] = list(map(lambda x: fill_label(x, lab), df['Finding Labels'].tolist()))

In [10]:
df = df.sample(frac=1).reset_index(drop = True)
df.head()

Unnamed: 0,Image Index,Finding Labels,Follow-up #,Patient ID,Patient Age,Patient Gender,View Position,OriginalImage[Width,Height],OriginalImagePixelSpacing[x,...,Emphysema,Fibrosis,Hernia,Infiltration,Mass,No Finding,Nodule,Pleural_Thickening,Pneumonia,Pneumothorax
0,00027661_000.png,No Finding,0,27661,58,F,PA,2498,2991,0.143,...,0,0,0,0,0,1,0,0,0,0
1,00016243_002.png,No Finding,2,16243,61,M,PA,2992,2991,0.143,...,0,0,0,0,0,1,0,0,0,0
2,00019766_021.png,Atelectasis|Cardiomegaly|Effusion|Infiltration,21,19766,41,M,PA,2021,2021,0.194311,...,0,0,0,1,0,0,0,0,0,0
3,00022141_019.png,Infiltration|Pneumothorax,19,22141,32,M,AP,3056,2544,0.139,...,0,0,0,1,0,0,0,0,0,1
4,00016466_000.png,No Finding,0,16466,31,M,PA,2822,2889,0.143,...,0,0,0,0,0,1,0,0,0,0


In [11]:
print(sum(df.Pneumonia == 1))
print(sum(df['No Finding'] == 1))

1431
60361


In [12]:
n = 100

In [13]:
neumo = df[df.Pneumonia == 1][0:n]
normal = df[df['No Finding'] == 1][0:n]

In [14]:
df = pd.concat([neumo, normal]).reset_index(drop = True)
df = df.sample(frac=1).reset_index(drop = True)
df.head()

Unnamed: 0,Image Index,Finding Labels,Follow-up #,Patient ID,Patient Age,Patient Gender,View Position,OriginalImage[Width,Height],OriginalImagePixelSpacing[x,...,Emphysema,Fibrosis,Hernia,Infiltration,Mass,No Finding,Nodule,Pleural_Thickening,Pneumonia,Pneumothorax
0,00011837_009.png,No Finding,9,11837,44,M,AP,2500,2048,0.168,...,0,0,0,0,0,1,0,0,0,0
1,00008095_003.png,No Finding,3,8095,63,M,AP,2500,2048,0.168,...,0,0,0,0,0,1,0,0,0,0
2,00028183_000.png,No Finding,0,28183,73,M,PA,2021,2021,0.194311,...,0,0,0,0,0,1,0,0,0,0
3,00023271_019.png,No Finding,19,23271,40,F,AP,3056,2544,0.139,...,0,0,0,0,0,1,0,0,0,0
4,00004808_101.png,Edema|Infiltration|Pneumonia,101,4808,38,M,AP,2500,2048,0.168,...,0,0,0,1,0,0,0,0,1,0


## Imagenes

In [15]:
pixels = 256

In [16]:
def charge_specific_img(img_name, pix):
    subfolders = [f.path for f in os.scandir(path) if f.is_dir()]
    for folder in subfolders:
        try: 
            img = cv2.imread(os.path.join(folder, 'images', img_name))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            # img = clahe(img)
            img = downsample(img, pix)
            return img
        except:
            print('', end = '')
    img = np.zeros((pix,pix,1))
    return img

In [17]:
def clahe(img):
    clahe = cv2.createCLAHE(clipLimit = 20)
    final_img = clahe.apply(img)
    return final_img

In [18]:
def downsample(img, pix):
    img = Image.fromarray(img)
    img = img.resize((pix,pix))
    img = np.array(img)
    img = img/255
    r = np.expand_dims(img, axis=-1)
    return r

Leo un numero concreto de imagenes con y sin neumonia

In [19]:
images = np.zeros((len(df), pixels, pixels, 1))
for i in range(len(df)):
    images[i,...] = charge_specific_img(df['Image Index'][i], pixels)

In [20]:
Y = np.array(df['Pneumonia'])
X = images

In [21]:
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=0.3, shuffle=True, stratify=Y )

# Model

In [22]:
pixels = 256

In [23]:
input_shape = (pixels,pixels,3)
conv_base = EfficientNetB0(weights=None, include_top=False, input_shape=input_shape)

In [35]:
model = models.Sequential()
model.add(layers.Conv2D(3,3,padding="same", input_shape=(pixels,pixels,1), activation='elu', name = 'conv_inicial'))
model.add(conv_base)
model.add(layers.GlobalMaxPooling2D(name="general_max_pooling"))
model.add(layers.Dropout(0.2, name="dropout_out"))
model.add(layers.Dense(1, activation="sigmoid", name="fc_out"))

In [36]:
lr = 0.5
opt = tf.keras.optimizers.Adam()
loss = loss = 'binary_crossentropy'
met = ['BinaryAccuracy', 'Precision', 'AUC']

In [37]:
print(len(model.layers))
print(len(model.trainable_variables))

5
215


In [38]:
model.compile(optimizer=opt, loss = loss , metrics = met)

In [39]:
history = model.fit(X_train,Y_train,
                    batch_size = 16,
                    epochs = 100,
                    shuffle = True,
                    validation_split = 0.2)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100


Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100


Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
