##### **IMAGE GENERATOR**

Genera varias imagenes que son distintas variaciones de una misma imágen.

In [53]:
from utils import IMAGE_HEIGHT, IMAGE_WIDTH, BATCH_SIZE, EPOCHS, DATA_PATH, balanceData

import pandas as pd 
import matplotlib.pyplot as plt
import os   # Manipulacion de directorios y archivos del ordenador.

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.utils import shuffle

from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense
from keras.preprocessing.image import ImageDataGenerator

from typing import List

In [41]:
files = os.listdir(DATA_PATH)  # Lista de los nombres de los archivos en el directorio especificado.
category = [0 if 'normal' in file else 1 for file in files]  # Columna con la categoría de la imagen: 0 celulas normales, 1 celulas atipicas.
df_cito = pd.DataFrame({
            'filename (X)' : files,
            'category (Y)' : category }).sample(frac=1, random_state=14, ignore_index=True) # Aleatorizamos el orden de las filas para minimizar sesgos en la seleccion de datos.

df_cito

Unnamed: 0,filename (X),category (Y)
0,atipica- (254).bmp,1
1,atipica- (1494).bmp,1
2,atipica- (717).bmp,1
3,normal- (2039).bmp,0
4,normal- (1425).bmp,0
...,...,...
4044,normal- (1662).bmp,0
4045,normal- (1733).bmp,0
4046,atipica- (688).bmp,1
4047,atipica- (756).bmp,1


In [75]:
df_cito["category (Y)"] = df_cito["category (Y)"].replace({0: 'normal', 1: 'atipica'})

print("Categories:", df_cito["category (Y)"].unique())

Categories: ['atipica' 'normal']


In [76]:
train_datagen = ImageDataGenerator(
    rotation_range=15,
    rescale=1./255,
    shear_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True,
    width_shift_range=0.1,
    height_shift_range=0.1
)

train_generator = train_datagen.flow_from_dataframe(
    df_cito.loc[:round(0.8*len(df_cito)),:], 
    DATA_PATH, 
    x_col='filename (X)',
    y_col='category (Y)',
    target_size=(IMAGE_HEIGHT, IMAGE_WIDTH),
    class_mode='binary',
    batch_size=BATCH_SIZE
)

Found 3240 validated image filenames belonging to 2 classes.


In [77]:
train_df, validate_df = train_test_split(df_cito.sample(4049),
                                         test_size=0.15,
                                         random_state=14)

train_df

Unnamed: 0,filename (X),category (Y)
335,normal- (1789).bmp,normal
56,normal- (1218).bmp,normal
3389,atipica- (638).bmp,atipica
1420,normal- (1530).bmp,normal
1628,normal- (17).bmp,normal
...,...,...
1444,atipica- (90).bmp,atipica
1242,normal- (1275).bmp,normal
1120,atipica- (1606).bmp,atipica
2715,atipica- (1315).bmp,atipica


In [78]:
print("Shape train", train_df.shape[0])
print("Shape validation", validate_df.shape[0])
train_df['category (Y)'].value_counts()

Shape train 3441
Shape validation 608


normal     2053
atipica    1388
Name: category (Y), dtype: int64

In [81]:
## BALANCEAMOS X_train

filename_balanced = balanceData(train_df['category (Y)'])[0] 
category_column = [0 if 'normal' in file else 1 for file in filename_balanced] 
df_cito = pd.DataFrame({
            'filename (X)' : filename_balanced,
            'category (Y)' : category_column }).sample(frac=1, random_state=14, ignore_index=True) 

df_cito

Unnamed: 0,filename (X),category (Y)
0,normal,0
1,normal,0
2,atipica,1
3,atipica,1
4,normal,0
...,...,...
2771,atipica,1
2772,atipica,1
2773,normal,0
2774,normal,0


##### **VALIDATION** 

In [None]:
validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = validation_datagen.flow_from_dataframe(
    train_df, 
    DATA_PATH, 
    x_col='filename',
    y_col='category',
    target_size=(IMAGE_HEIGHT, IMAGE_WIDTH),
    class_mode='binary',
    batch_size=BATCH_SIZE
)

##### **FIT MODEL** 

In [None]:
## ARQUITECTURA 


model = Sequential()
## CAPAS DE CONVOLUCION-POOLING

# PRIMERA CAPA 
model.add(Conv2D(filters = 32, kernel_size= (3,3), activation='relu',
                input_shape=(IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_CHANNELS)))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

# SEGUNDA CAPA
model.add(Conv2D(filters = 64, kernel_size= (3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

# TERCERA CAPA
model.add(Conv2D(filters = 128, kernel_size= (3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))


## CAPAS DE CLASIFICACION

# INPUT LAYER
model.add(Flatten())

# HIDEN LAYER
model.add(Dense(units = 512, activation= 'relu'))
model.add(Dropout(0.5))

# OUTPUT LAYER 
model.add(Dense(units= 1, activation= 'sigmoid'))


model.compile(optimizer='adam', loss='binary_crossentropy',
             metrics=['accuracy']) 

In [None]:
history = model.fit(
    train_generator, 
    epochs=EPOCHS,
    validation_data=validation_generator
)

In [None]:
results = model.evaluate(X_test, y_test)
print("test loss, test acc:", results)

In [None]:
pd.DataFrame(history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1) # set the vertical range to [0-1]
plt.show()

In [None]:
model.save("model.h5")