# **Detección histopatológica del cáncer.**

El desafío de la competencia consta de crear un algoritmo para identificar el cáncer metastásico en pequeños parches de imágenes tomadas de exploraciones patológicas digitales más grandes.

# **Exploración del set de datos.**

Características:
1. Las imágenes tienen un tamaño de 96x96 pixeles.
2. Una etiqueta positiva indica que la región central de 32x32 pixeles de un parche contiene al menos un píxel de tejido tumoral. El tejido tumoral en la región externa del parche no influye en la etiqueta.

In [None]:
import csv
import numpy as np
import pandas as pd
import numpy as np
import os

In [None]:
dataframe = pd.read_csv('/kaggle/input/histopathologic-cancer-detection/train_labels.csv')
dataframe.head()

In [None]:
#Cantidad de imágenes en el set de datos, sin patologías (0) y patológicas (1):
dataframe['label'].value_counts()

In [None]:
#Imágenes para entrenamiento
print(len(os.listdir('../input/histopathologic-cancer-detection/train')))

#Imágenes para testeo
print(len(os.listdir('../input/histopathologic-cancer-detection/test')))

In [None]:
# Histograma de la cantidad de imágenes por clase.
import matplotlib.pyplot as plt

clases = pd.read_csv('../input/histopathologic-cancer-detection/train_labels.csv', index_col=0)
plt.xlabel("No patológicas - Patológicas.")
plt.ylabel("Cantidad de imágenes")
plt.hist(clases['label'], 3, color="blue", ec='black')

# **Modelo de clasificación con redes totalmente conectadas.**

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals
try:
  %tensorflow_version 2.x
except Exception:
  pass

# TensorFlow y tf.keras
import tensorflow as tf
from tensorflow import keras

print(tf.__version__)

In [None]:
files = os.listdir('../input/histopathologic-cancer-detection/train/')
files.sort()
for i in files[0:20]:
    print(i)

In [None]:
labels = pd.read_csv('../input/histopathologic-cancer-detection/train_labels.csv')
labels.sort_values(by='id', inplace = True)
print(labels[0:20])

In [None]:
import cv2 

files = os.listdir('../input/histopathologic-cancer-detection/train/')
files.sort()
dataset_train = []
cnt = 0
for i in files[0:10000]: #FIXME
    if (cnt < 20):
        print(i)
        cnt+=1
    dataset_train.append(cv2.imread('../input/histopathologic-cancer-detection/train/'+i, cv2.IMREAD_GRAYSCALE)/256.)
     #dataset_train.append([i.replace('.tif',''),cv2.imread('../input/histopathologic-cancer-detection/train/'+i, cv2.IMREAD_GRAYSCALE)/256.])

dataset_train = np.array(dataset_train)

print(dataset_train.shape)
print(dataset_train[0])

In [None]:
dataset_train_labels = pd.read_csv('../input/histopathologic-cancer-detection/train_labels.csv')
dataset_train_labels.sort_values(by='id', inplace = True)

dataset_train_labels = np.array(dataset_train_labels[0:10000]) # FIXME

print(dataset_train_labels.shape)
print(labels[0:20])


In [None]:
# División del set de entrenamiento:
img_train = []
img_test = []

long = (0.9*len(dataset_train))

for i in range(len(dataset_train)):
  if i < long:
    img_train.append((dataset_train[i]))
  else:
    img_test.append(dataset_train[i])

img_train = np.array(img_train).astype('float32')
img_test = np.array(img_test).astype('float32')
    
print(img_train.shape)
print(img_test.shape)

In [None]:
# División de las etiquetas: 
train_labels = []
test_labels = []

long1 = (0.9*len(dataset_train_labels))

for i in range(len(dataset_train_labels)):
  if i < long1:
    train_labels.append(dataset_train_labels[i][1])
  else:
    test_labels.append(dataset_train_labels[i][1])
        
train_labels = np.array(train_labels).astype('float32')
test_labels = np.array(test_labels).astype('float32')

print(train_labels.shape)
print(test_labels.shape)

In [None]:
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(96, 96)),
    keras.layers.Dense(128, activation='relu',kernel_regularizer=keras.regularizers.l1_l2(l1=1e-5, l2=1e-4),
    bias_regularizer=keras.regularizers.l2(1e-4)),
    keras.layers.Dense(1, activation='sigmoid',kernel_regularizer=keras.regularizers.l1_l2(l1=1e-5, l2=1e-4),
    bias_regularizer=keras.regularizers.l2(1e-4))
])

In [None]:
model.compile(optimizer='Adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
# Entrenamiento del modelo
model.fit(img_train, train_labels, epochs = 50)

In [None]:
predicts = model.predict(img_test)
predicts_slice = []
for i in predicts: 
    predicts_slice.append(float(i[0] > 0.5))

predicts_slice = np.array(predicts_slice)
print(predicts_slice)

In [None]:
total = predicts_slice-test_labels
print(total)
accuracy = np.count_nonzero(total == 0.)/len(total)
print(accuracy)

In [None]:
.