##### **IMAGE GENERATOR**

Genera varias imagenes en base a variaciones de una misma imágen de partida. Lo usaremos pare reentrenar el modelo.

In [1]:
from utils.utils import IMAGE_HEIGHT, IMAGE_WIDTH,IMAGE_CHANNELS, BATCH_SIZE, EPOCHS, DATA_PATH, PATIENCE, balanceData

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os 

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense

##### **PREPARACION DE DATOS** 

In [2]:
## CREAMOS DF DEL DATASET

files = os.listdir(DATA_PATH)  # Lista de los nombres de los archivos en el directorio especificado.
category = ['normal' if 'normal' in file else 'atipic' for file in files]  # Columna con la categoría de cada imagen.
df_cito = pd.DataFrame({
            'filename (X)' : files,
            'category (Y)' : category }).sample(frac=1, random_state=14, ignore_index=True) # Aleatorizamos el orden de las filas.


In [3]:
## SPLIT TRAIN-VAL CONSERVANDO LA PROPORCION DE CATEGORIAS
X_train, X_val, y_train, y_val = train_test_split(df_cito['filename (X)'], df_cito['category (Y)'], train_size=0.85, random_state=14, stratify =df_cito['category (Y)'])

In [4]:
print("Size train", len(X_train))
print("Size validation", len(X_val))
print('Category normal:', len(['normal' for file in X_train if 'normal' in file])/len(X_train))
print('Category atipic:', len(['atipic' for file in X_train if 'normal' in file])/len(X_train))

Size train 3441
Size validation 608
Category normal: 0.5954664341761116
Category atipic: 0.5954664341761116


In [5]:
## BALANCEAMOS EN X_TRAIN
filename_balanced = balanceData(X_train)[0] 

## REHACEMOS DATAFRAMES DE TRAIN Y VALIDATION
category_column = ['normal' if 'normal' in file else 'atipic' for file in filename_balanced] 
X_train = pd.DataFrame({
            'filename (X)' : filename_balanced,
            'category (Y)' : category_column }).sample(frac=1, random_state=14, ignore_index=True) 
X_val = pd.DataFrame({
            'filename (X)' : X_val,
            'category (Y)' : y_val }).sample(frac=1, random_state=14, ignore_index=True) 

print(X_train['category (Y)'].value_counts())

atipic    1392
normal    1392
Name: category (Y), dtype: int64


In [6]:
print(len(X_train[X_train['category (Y)']=='atipic'])/len(X_train))
print(len(X_train[X_train['category (Y)']=='normal'])/len(X_train))
print(len(X_val[X_val['category (Y)']=='atipic'])/len(X_val))
print(len(X_val[X_val['category (Y)']=='normal'])/len(X_val))

0.5
0.5
0.40460526315789475
0.5953947368421053


##### **CONFIGURACION DEL GENERADOR DE IMAGENES** 

In [7]:
## CONFIGURAMOS EL GENERADOR DE IMAGENES EN TRAIN

train_datagen = ImageDataGenerator(
    rotation_range=15,
    rescale=1./255,
    shear_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True,
    width_shift_range=0.1,
    height_shift_range=0.1)

train_generator = train_datagen.flow_from_dataframe(
    X_train, 
    DATA_PATH, 
    x_col='filename (X)',
    y_col='category (Y)',
    target_size=(IMAGE_HEIGHT, IMAGE_WIDTH),
    class_mode='binary',
    batch_size=BATCH_SIZE)

Found 2784 validated image filenames belonging to 2 classes.


In [8]:
## CONFIGURAMOS EL GENERADOR DE IMAGENES EN VALIDATION

validation_datagen = ImageDataGenerator(rescale=1./255)

validation_generator = validation_datagen.flow_from_dataframe(
    X_val, 
    DATA_PATH, 
    x_col='filename (X)',
    y_col='category (Y)',
    target_size=(IMAGE_HEIGHT, IMAGE_WIDTH),
    class_mode='binary',
    batch_size=BATCH_SIZE
)

Found 608 validated image filenames belonging to 2 classes.


##### **FIT MODEL** 

In [9]:
## ARQUITECTURA DEL MODELO

model = Sequential()

## CAPAS DE CONVOLUCION-POOLING

# PRIMERA CAPA 
model.add(Conv2D(filters = 32, kernel_size= (3,3), activation='relu',
                input_shape=(IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_CHANNELS)))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

# SEGUNDA CAPA
model.add(Conv2D(filters = 64, kernel_size= (3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

# TERCERA CAPA
model.add(Conv2D(filters = 128, kernel_size= (3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))


## CAPAS DE CLASIFICACION

# INPUT LAYER (ENTRADA.Aplana las images 3D a 1D)
model.add(Flatten())

# HIDEN LAYER
model.add(Dense(units = 512, activation= 'relu'))
model.add(Dropout(0.5))

# OUTPUT LAYER 
model.add(Dense(units= 1, activation= 'sigmoid'))

# CONFIGURACION DE EJECUCION (COMPILE)
model.compile(optimizer='adam', loss='binary_crossentropy',
             metrics=['accuracy']) 

In [10]:
## REENTRENAMIENTO DEL MODELO

earlystop =EarlyStopping(patience= PATIENCE, restore_best_weights= True)

history = model.fit(
    train_generator, 
    epochs=EPOCHS,
    batch_size = BATCH_SIZE,
    callbacks = earlystop,
    validation_data=validation_generator,
    verbose = 2
)

Epoch 1/50


KeyboardInterrupt: 

In [None]:
## BEST SCORE
results = model.evaluate(train_generator)
print("best score train:", results)

In [None]:
## GRAFICA HISTORIAL
pd.DataFrame(history.history).plot(figsize=(5, 3))
plt.grid(True)
plt.gca().set_ylim(0, 1) # set the vertical range to [0-1]
plt.show()

In [None]:
## GUARDAMOS MODELO
model.save("./models/modelo-gen.h5") 