# RADIOGRAFÍAS

### Paquetes

In [1]:
# Paquetes de imagen
import SimpleITK as sitk
from skimage import exposure

# Paquetes de df y arrays
import numpy as np
import pandas as pd
import os
from ipywidgets import interact, fixed

%matplotlib inline
import matplotlib.pyplot as plt

# Del propio paquete
from downloaddata import fetch_data as fdata

# OS
from os import listdir
from os.path import isfile, join

image_viewer = sitk.ImageViewer()

### Lector de imagen

In [2]:
fiji = 'D:/Users/María Rollán/Documents/Fiji.app/ImageJ-win64.exe'
image_viewer.SetApplication(fiji)
ctFolder = 'Vessel_stencils'

### Logging

In [3]:
import logging
import sys

# Configura el logging
log_format = '[%(process)d]\t%(asctime)s %(levelname)s: %(message)s'
logging.basicConfig(format=log_format, level=logging.INFO, datefmt="%H:%M:%S",
                    handlers=[logging.StreamHandler(sys.stdout)])

### Funciones

In [4]:
def histo(img):
    '''
    Create a histogram from a SITK image
    
    Parameters
    ----------
    img (SITK image)
    '''
    arr = sitk.GetArrayFromImage(img)
    fig = plt.figure(figsize = (7,5))
    ax = fig.gca()
    ax.hist(arr.flatten(), bins = 255)
    plt.show(fig)

def plotImg(img, color = 'gray'):
    '''
    Plot a SITK image
    
    Parameters
    ----------
    img (SITK image)
    color (string): the colour for image representation
    '''
    arr = sitk.GetArrayFromImage(img)
    plt.imshow(arr, cmap = color)

## Dataset

In [5]:
df = pd.read_csv('CXR8/Data_Entry_2017_v2020.csv')
df.head()

Unnamed: 0,Image Index,Finding Labels,Follow-up #,Patient ID,Patient Age,Patient Gender,View Position,OriginalImage[Width,Height],OriginalImagePixelSpacing[x,y]
0,00000001_000.png,Cardiomegaly,0,1,57,M,PA,2682,2749,0.143,0.143
1,00000001_001.png,Cardiomegaly|Emphysema,1,1,58,M,PA,2894,2729,0.143,0.143
2,00000001_002.png,Cardiomegaly|Effusion,2,1,58,M,PA,2500,2048,0.168,0.168
3,00000002_000.png,No Finding,0,2,80,M,PA,2500,2048,0.171,0.171
4,00000003_001.png,Hernia,0,3,74,F,PA,2500,2048,0.168,0.168


Sacamos los posibles labels que hay

In [6]:
labels = np.unique(df['Finding Labels'])
labels = '|'.join(labels)
labels = labels.split('|')
labels = np.unique(labels).tolist()

Creamos una nueva columna por cada uno de los labels

In [7]:
for lab in labels:
    df[lab] = pd.NA

Creamos una funcion para rellenar esas columnas por cada paciente

In [8]:
def fill_label(information, label):
    positive_labels = information.split('|')
    if label in positive_labels:
        return 1
    return 0

In [9]:
# map(objeto enumerado x: funcion(x, y), lista de x)
for lab in labels:
    df[lab] = list(map(lambda x: fill_label(x, lab), df['Finding Labels'].tolist()))

## Imagenes

In [10]:
def charge_images(folder):
    path = os.path.join(folder, 'images')
    images_path = [f for f in listdir(path) if isfile(join(path, f))]
    images = [sitk.ReadImage(os.path.join(path, image_path)) for image_path in images_path]
    data = {'path': images_path, 'image': images}
    return data

In [11]:
subfolders = [f.path for f in os.scandir('./CXR8/images/') if f.is_dir()]

No se pueden cargar todas las imagenes así que solo he cargado la carpeta 1 y 2 (subfolders[0:1])

In [13]:
keys = ['path', 'image']
all_images = dict.fromkeys(keys)

for folder in subfolders[0:1]:
    logging.info('[F]\tFolder {}'.format(folder))
    group = folder.split('/')
    folder_info = charge_images(folder)
    try:
        logging.info('[F]\tappending')
        all_images['path'][0].append(folder_info['path'])
        all_images['image'][0].append(folder_info['image'])
    except:
        logging.info('[F]\tstarting')
        all_images['path'] = folder_info['path']
        all_images['image'] = folder_info['image']

[3352]	22:16:02 INFO: [F]	Folder ./CXR8/images/images_001
[3352]	22:17:21 INFO: [F]	appending
[3352]	22:17:21 INFO: [F]	starting


In [14]:
all_images = pd.DataFrame(all_images)

In [15]:
all_images.head()

Unnamed: 0,path,image
0,00000001_000.png,"[202, 199, 195, 193, 195, 194, 193, 192, 184, ..."
1,00000001_001.png,"[208, 205, 206, 205, 207, 205, 207, 202, 204, ..."
2,00000001_002.png,"[7, 10, 9, 8, 8, 8, 7, 7, 7, 6, 6, 7, 7, 6, 6,..."
3,00000002_000.png,"[199, 175, 152, 133, 124, 118, 113, 111, 110, ..."
4,00000003_000.png,"[69, 58, 49, 42, 36, 30, 25, 23, 20, 18, 16, 1..."


Ahora voy a añadir datos a este nuevo dataframe

In [16]:
def get_column(path, column):
    value = df[column][df['Image Index'] == path]
    value = list(value)[0]
    return value

In [17]:
all_images['neumonia'] = list(map(lambda x: get_column(x, 'Pneumonia'), all_images['path']))

In [18]:
all_images.head()

Unnamed: 0,path,image,neumonia
0,00000001_000.png,"[202, 199, 195, 193, 195, 194, 193, 192, 184, ...",0
1,00000001_001.png,"[208, 205, 206, 205, 207, 205, 207, 202, 204, ...",0
2,00000001_002.png,"[7, 10, 9, 8, 8, 8, 7, 7, 7, 6, 6, 7, 7, 6, 6,...",0
3,00000002_000.png,"[199, 175, 152, 133, 124, 118, 113, 111, 110, ...",0
4,00000003_000.png,"[69, 58, 49, 42, 36, 30, 25, 23, 20, 18, 16, 1...",0


In [42]:
all_images['image'][2].GetSize()

(1024, 1024)

## Filtros sobre imágenes

### Equalizar

In [33]:
def equalizar(img):
    '''
    Equalize a SITK image
    
    Parameters
    ----------
    img (SITK image)
    
    Returns
    -------
    img (SITK image)    
    '''
    imgArr = sitk.GetArrayFromImage(img)
    
    # Contrast stretching
    p2, p98 = np.percentile(imgArr, (2, 98))
    img_rescale = exposure.rescale_intensity(imgArr, in_range=(p2, p98))

    # Equalization
    img_eq = exposure.equalize_hist(imgArr)

    # Adaptive Equalization
    img_adapteq = exposure.equalize_adapthist(imgArr, clip_limit=0.03)
    logging.info('[F]\tend equalizar')
        
    return img_adapteq

In [None]:
# all_images['equalizado'] = list(map(lambda x: equalizar(x), all_images['image']))

## Red neuronal

In [50]:
import seaborn as sns

import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D , MaxPool2D , Flatten , Dropout 
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam

from sklearn.metrics import classification_report,confusion_matrix
from sklearn.model_selection import train_test_split

import tensorflow as tf

import cv2

Defino el modelo

In [22]:
model = Sequential()
model.add(Conv2D(32,3,padding="same", activation="relu", input_shape=(224,224,3)))
model.add(MaxPool2D())

model.add(Conv2D(32, 3, padding="same", activation="relu"))
model.add(MaxPool2D())

model.add(Conv2D(64, 3, padding="same", activation="relu"))
model.add(MaxPool2D())
model.add(Dropout(0.4))

model.add(Flatten())
model.add(Dense(128,activation="relu"))
model.add(Dense(2, activation="softmax"))

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 224, 224, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 112, 112, 32)     0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 112, 112, 32)      9248      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 56, 56, 32)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 56, 56, 64)        18496     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 28, 28, 64)       0

In [23]:
opt = Adam(lr=0.000001)
model.compile(optimizer = opt , loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) , metrics = ['accuracy'])

  super(Adam, self).__init__(name, **kwargs)


Cojo X e Y

In [66]:
X = list(map(lambda x: equalizar(x), all_images['image'][1:100]))
Y = list(all_images['neumonia'][1:100])

[3352]	22:53:31 INFO: [F]	end equalizar
[3352]	22:53:31 INFO: [F]	end equalizar
[3352]	22:53:31 INFO: [F]	end equalizar
[3352]	22:53:31 INFO: [F]	end equalizar
[3352]	22:53:32 INFO: [F]	end equalizar
[3352]	22:53:32 INFO: [F]	end equalizar
[3352]	22:53:32 INFO: [F]	end equalizar
[3352]	22:53:32 INFO: [F]	end equalizar
[3352]	22:53:32 INFO: [F]	end equalizar
[3352]	22:53:33 INFO: [F]	end equalizar
[3352]	22:53:33 INFO: [F]	end equalizar
[3352]	22:53:34 INFO: [F]	end equalizar
[3352]	22:53:34 INFO: [F]	end equalizar
[3352]	22:53:34 INFO: [F]	end equalizar
[3352]	22:53:34 INFO: [F]	end equalizar
[3352]	22:53:35 INFO: [F]	end equalizar
[3352]	22:53:35 INFO: [F]	end equalizar
[3352]	22:53:35 INFO: [F]	end equalizar
[3352]	22:53:35 INFO: [F]	end equalizar
[3352]	22:53:36 INFO: [F]	end equalizar
[3352]	22:53:36 INFO: [F]	end equalizar
[3352]	22:53:36 INFO: [F]	end equalizar
[3352]	22:53:36 INFO: [F]	end equalizar
[3352]	22:53:37 INFO: [F]	end equalizar
[3352]	22:53:37 INFO: [F]	end equalizar


Divido en train y test

In [79]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state=42)

Normalizo los datos

In [80]:
X_train = X_train / 255
X_test = X_test / 255

X_train.reshape(-1, img_size, img_size, 1)
Y_train = Y_train

X_test.reshape(-1, img_size, img_size, 1)
Y_test = Y_test

TypeError: unsupported operand type(s) for /: 'list' and 'int'

Aumento del train data

datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range = 30,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.2, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip = True,  # randomly flip images
        vertical_flip=False)  # randomly flip images


datagen.fit(x_train)

Entreno el modelo

In [81]:
history = model.fit(X_train,Y_train,epochs = 500 , validation_data = (X_test, Y_test))

ValueError: Failed to find data adapter that can handle input: (<class 'list'> containing values of types {"<class 'numpy.ndarray'>"}), (<class 'list'> containing values of types {"<class 'int'>"})

Compruebo el modelo

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(500)

plt.figure(figsize=(15, 15))
plt.subplot(2, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(2, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
predictions = model.predict_classes(X_test)
predictions = predictions.reshape(1,-1)[0]
print(classification_report(Y_test, predictions, target_names = ['No neumonia','Neumonia']))