> # Detección de neumonía en imágenes médicas.

El desafío de la competencia consta en crear un algoritmo para la localización de una señal visual de neumonía en imágenes médicas. Concretamente, el algoritmo debe detectar las opacidades pulmonares (en imágenes radiológicas torácicas) características de esta enfermedad.

Lectura de un archivo .csv, el que detalla la informacion de las clases.

In [None]:
import csv

file = open('../input/rsna-pneumonia-detection-challenge/stage_2_detailed_class_info.csv')

print(file.read()[0:500])

Lo mismo, con pandas

In [None]:
import pandas as pd

dataframe = pd.read_csv('/kaggle/input/rsna-pneumonia-detection-challenge/stage_2_train_labels.csv')  

dataframe.sample(6)

In [None]:
#Características de las imágenes.
import pydicom

patientId = dataframe['patientId'][0]
dcm_file = '../input/rsna-pneumonia-detection-challenge/stage_2_train_images/%s.dcm' % patientId
dcm_data = pydicom.read_file(dcm_file)

im = dcm_data.pixel_array
print(type(im))
print(im.dtype)
print(im.shape)

 Clases y cantidad de imágenes por cada una:
* No Lung Opacity / Not Normal: pulmones con alguna anomalía que no es neumonía.
* Normal: no hay indicios de neumonía.
* Lung Opacity: pulmones que potencialmente tienen neumonía.

In [None]:
df_detailed = pd.read_csv('../input/rsna-pneumonia-detection-challenge/stage_2_detailed_class_info.csv')

summary = {}
for n, row in df_detailed.iterrows():
    if row['class'] not in summary:
        summary[row['class']] = 0
    summary[row['class']] += 1
    
print(summary)

In [None]:
# Histograma de las clases
import matplotlib.pyplot as plt

clases = pd.read_csv('../input/rsna-pneumonia-detection-challenge/stage_2_detailed_class_info.csv', index_col=0)
plt.xlabel("Clases")
plt.ylabel("Cantidad de imágenes")
plt.hist(clases['class'], 5, color="brown", ec='black')

# Detección de posibles áreas con neumonía. 

In [None]:
def parse_data(dataframe, dataframe_detailed): 
 # --- Define lambda to extract coords in list [y, x, height, width]
    pids = []

    extract_box = lambda row: [row['y'], row['x'], row['height'], row['width']]

    parsed = {}
    for n, row in dataframe.iterrows():
        # --- Initialize patient entry into parsed 
        pid = row['patientId']
        if pid not in parsed:
            pids.append(pid)
            parsed[pid] = {
                'dicom': '../input/rsna-pneumonia-detection-challenge/stage_2_train_images/%s.dcm' % pid,
                'label': row['Target'],
                #'class': df_detailed.iloc[df_detailed.index[df_detailed.iloc[:]['patientId']==pid][0]]['class'],
                'boxes': []}

        # --- Add box if opacity is present
        #if parsed[pid]['label'] == 1:
        parsed[pid]['boxes'].append(extract_box(row))

    return parsed, pids

In [None]:
parsed, pids = parse_data(dataframe, df_detailed)

In [None]:
import numpy as np
import pylab

def draw(data):
    """
    Method to draw single patient with bounding box(es) if present 
    """
    # --- Open DICOM file
    d = pydicom.read_file(data['dicom'])
    im = d.pixel_array

    # --- Convert from single-channel grayscale to 3-channel RGB
    im = np.stack([im] * 3, axis=2)

    # --- Add boxes with random color if present
    for box in data['boxes']:
        rgb = np.floor(np.random.rand(3) * 256).astype('int')
        im = overlay_box(im=im, box=box, rgb=rgb, stroke=6)

    pylab.imshow(im, cmap=pylab.cm.gist_gray)
    pylab.axis('off')

def overlay_box(im, box, rgb, stroke=1):
    """
    Method to overlay single box on image
    """
    # --- Convert coordinates to integers
    box = [int(b) for b in box]
    
    # --- Extract coordinates
    y1, x1, height, width = box
    y2 = y1 + height
    x2 = x1 + width

    im[y1:y1 + stroke, x1:x2] = rgb
    im[y2:y2 + stroke, x1:x2] = rgb
    im[y1:y2, x1:x1 + stroke] = rgb
    im[y1:y2, x2:x2 + stroke] = rgb

    return im

def crop_image(data):
    """
    Method to overlay single box on image
    """
    # --- Open DICOM file
    d = pydicom.read_file(data['dicom'])
    im = d.pixel_array

    # --- Convert from single-channel grayscale to 3-channel RGB
    im = np.stack([im] * 3, axis=2)
    
    for box in data['boxes']:

        # --- Convert coordinates to integers
        box = [int(b) for b in box]
    
        # --- Extract coordinates
        y1, x1, height, width = box
        y2 = y1 + height
        x2 = x1 + width
        
        new_image = im[y1:y2, x1:x2]

    return im, new_image

In [None]:
# Imagen con neumonía.
print(parsed['00436515-870c-4b36-a041-de91049b9ab4'])

In [None]:
print(parsed[pids[0]])

In [None]:
dataframe.loc[dataframe['Target']==1, ['x', 'y', 'width', 'height']].isnull().any()

In [None]:
dataframe.loc[dataframe['Target']==0, ['x', 'y', 'width', 'height']].isnull().all()

**Información que contienen las imágenes .dcm :**

In [None]:
pId = dataframe['patientId'].sample(1).values[0]    
dcmdata = pydicom.read_file('../input/rsna-pneumonia-detection-challenge/stage_2_train_images/'+pId+'.dcm')
print(dcmdata)

In [None]:
dcmimg = dcmdata.pixel_array
plt.figure(figsize=(20,10))
plt.imshow(dcmimg, cmap=pylab.cm.binary)
plt.axis('off')

Nuevo dataset:

In [None]:
dataframe.loc[dataframe['Target']==1]

In [None]:
for i, row in dataframe.iterrows():
    if row['Target'] == 0:
        dataframe.at[i,'x'] = 264
        dataframe.at[i,'y'] = 152
        dataframe.at[i,'width'] = 256
        dataframe.at[i,'height'] = 379
dataframe.loc[dataframe['Target']==0]

parsed, pids = parse_data(dataframe, df_detailed)



In [None]:
# Deteccion de áreas afectadas en la imagen.
image_original, image_cropped = crop_image(parsed['00436515-870c-4b36-a041-de91049b9ab4'])
plt.figure(figsize=(20,10))
plt.imshow(image_cropped, cmap=pylab.cm.binary)
plt.axis('off')
plt.figure(figsize=(20,10))
plt.imshow(image_original, cmap=pylab.cm.binary)
plt.axis('off')

from skimage.transform import resize
image_resized = resize(image_cropped, (128,64))
plt.figure(figsize=(20,10))
plt.imshow(image_resized, cmap=pylab.cm.binary)
plt.axis('off')


**Redimensión de imágenes, del set de entrenamiento, en imágenes de 128x64:**

In [None]:
dataset_train_im = []
dataset_train_labels = []

for i in pids[0:10000]:
    image_original, image_cropped = crop_image(parsed[i])
    image_resized = resize(image_cropped, (128,64))

    dataset_train_im.append(image_resized)
    dataset_train_labels.append(parsed[i]['label'])
    

In [None]:
cnt = 0
for i in dataset_train_im[0:10]:
    print(dataset_train_labels[cnt])
    cnt+=1
    plt.figure(figsize=(20,10))
    plt.imshow(i, cmap=pylab.cm.binary)
    plt.axis('off')

Se divide set de datos: el 90% para entrenamiento, lo restante para el set de prueba, con sus respectivas etiquetas:

In [None]:
test_im = []
train_im = []

long = (0.9*len(dataset_train_im))

for i in range(len(dataset_train_im)):
  if i < long:
    train_im.append(dataset_train_im[i])
  else:
    test_im.append(dataset_train_im[i])

print(len(train_im))
print(len(test_im))


In [None]:
train_labels = []
test_labels = []

long1 = (0.9*len(dataset_train_labels))

for i in range(len(dataset_train_labels)):
  if i < long:
    train_labels.append(dataset_train_labels[i])
  else:
    test_labels.append(dataset_train_labels[i])

print(len(train_labels))
print(len(test_labels))

**Modelo:**

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

# TensorFlow y tf.keras
import tensorflow as tf
from tensorflow import keras

print(tf.__version__)

In [None]:
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(128, 64)),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')
])

In [None]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

**Set de datos de entrenamiento:**

In [None]:
# Se transforman las imágenes de entrenamiento a escala de grises 
train_im = np.array(train_im)
train_labels = np.array(train_labels)

print(train_im.shape)
train_im_gray = []
for i in range(len(train_im)):
    train_im_gray.append((np.dot(train_im[i], [0.2989, 0.5870, 0.1140])))

train_im_gray = np.array(train_im_gray)

In [None]:
# Entrenamiento del modelo
model.fit(train_im_gray, train_labels, epochs=10)

In [None]:
test_im = np.array(test_im)
test_labels = np.array(test_labels)

# Se transforman las imágenes de prueba a escala de grises 
test_im_gray = []
for i in range(len(test_im)):
    test_im_gray.append((np.dot(test_im[i], [0.2989, 0.5870, 0.1140])))

test_im_gray = np.array(test_im_gray)

In [None]:
# Se evalúa la exactitud del modelo:
test_loss, test_acc = model.evaluate(test_im_gray,  test_labels, verbose=2)

print('\nTest accuracy:', test_acc)

El modelo reliza predicciones sobre el set de prueba:

In [None]:
print(test_im_gray.shape)
print(test_im_gray[0].shape)
predicts = model.predict(test_im_gray)
threshold = 0.9
predicts = (predicts >= threshold).astype(int)

In [None]:
# Ejemplo
predicts[2]

In [None]:
# Se corrobora la predicción con su etiqueta
test_labels[2]

In [None]:
import os
print(os.listdir('../input/pruebas/'))

In [None]:
!ls ../input/pruebas/

In [None]:
import cv2

img1 = cv2.imread('../input/pruebas/neumo1.png', cv2.IMREAD_GRAYSCALE)
img2 = cv2.imread('../input/pruebas/no_neumo.jpg', cv2.IMREAD_GRAYSCALE)
#img = cv2.bitwise_not(img)
img1 = cv2.resize(img1,(64,128))
img2 = cv2.resize(img2,(64,128))

plt.figure()
plt.imshow(img1, cmap=pylab.cm.binary)
plt.axis('off')
plt.figure()
plt.imshow(img2, cmap=pylab.cm.binary)

In [None]:
#img = (np.expand_dims(img,0))
#print(img.shape)
img1 = np.expand_dims(img1, axis = 0)
img2 = np.expand_dims(img2, axis = 0)

In [None]:
predictions_single1 = model.predict(img1)
predictions_single2 = model.predict(img2)

print(predictions_single1)
print(predictions_single2)

In [None]:
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('Etiqueta correcta')
    plt.xlabel('Etiqueta predicha')
from sklearn.metrics import confusion_matrix
import itertools

cm = confusion_matrix(test_labels, predicts)
tn, fp, fn, tp = confusion_matrix(test_labels, predicts).ravel()
plot_confusion_matrix(cm,["0","1"])