# Montar drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## revisar particion de datos

In [None]:
import os

dir_dataset = '/content/drive/MyDrive/projects/brainTumor/dataset'

train_path = 'train'
val_path = 'val'

train_dir = os.path.join(dir_dataset, train_path)
val_dir = os.path.join(dir_dataset, val_path)

def count_files_in_subdirectories(base_directory):
    """
    Counts the number of files in each immediate subdirectory
    of a given base directory.
    """
    if not os.path.isdir(base_directory):
        print(f"Error: Directory not found at {base_directory}")
        return

    print(f"\nCounting files in subdirectories of: {base_directory}")
    total_files = 0
    for subdir_name in os.listdir(base_directory):
        subdir_path = os.path.join(base_directory, subdir_name)
        if os.path.isdir(subdir_path): # Ensure it's a directory
            num_files = len([f for f in os.listdir(subdir_path) if os.path.isfile(os.path.join(subdir_path, f))])
            print(f"  {subdir_name}/: {num_files} files")
            total_files += num_files
    print(f"Total files in {base_directory}: {total_files}")

# Contar archivos train
count_files_in_subdirectories(train_dir)

# Contar archivos validation
count_files_in_subdirectories(val_dir)

# Para los directorios se usa la regla 80/20


# Importar librerias

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import Precision, Recall, AUC
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
import numpy as np
from sklearn.utils import class_weight


# Preparar datos

## Constantes de datos de entrada

In [None]:
# Constants for EfficientNetB0

# images
IMG_HEIGHT = 224
IMG_WIDTH = 224
BATCH_SIZE = 32 # EfficientNet is efficient, so we can often use a larger batch size

# learning_rate
learning_rate = 0.001

# epochs
epochs = 15

# dropout
DROPOUT = 0.5

# path to model
model_save_path = '/content/drive/MyDrive/projects/brainTumor/'


# aumentar data de entranamiento

In [None]:
# Data Augmentation for Training
train_image_generator = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Only preprocessing for Validation
validation_image_generator = ImageDataGenerator(
    preprocessing_function=preprocess_input
)

train_data_gen = train_image_generator.flow_from_directory(
    batch_size=BATCH_SIZE,
    directory=train_dir,
    shuffle=True,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    class_mode='binary' # Changed to binary
)

val_data_gen = validation_image_generator.flow_from_directory(
    batch_size=BATCH_SIZE,
    directory=val_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    class_mode='binary' # Changed to binary
)


Found 3679 images belonging to 2 classes.
Found 921 images belonging to 2 classes.


## Verificar clases

In [None]:
print("\n--- Verificación de Clases e Índices ---")

# Acceder al mapeo de class_indices
class_indices = train_data_gen.class_indices
print(f"Mapeo de clases a índices: {class_indices}")

# 2. Obtener los nombres de las clases en el orden de los índices (para predicciones)
# Esto es importante porque el modelo predice un índice (0, 1, 2...)
# y se necesita saber a qué clase corresponde cada índice.
num_classes = len(class_indices)
class_names = [None] * num_classes # Crear una lista vacía con el tamaño correcto

for class_name, index in class_indices.items():
    class_names[index] = class_name

print(f"Nombres de las clases en el orden de los índices del modelo: {class_names}")

## Calcular pesos

In [None]:
# --- Calcular los pesos de las clases ---
# Para calcular class_weights, necesitamos conocer la distribución de las clases en el conjunto de entrenamiento.
# El atributo .classes del generador de datos contiene los índices de clase para cada imagen.
# Esto es más preciso que simplemente contar archivos, ya que usa los datos que el generador realmente ve.

print("\n--- Calculando pesos de las clases para manejar el desbalance ---")
# Obtener los índices de clase de todas las imágenes en el conjunto de entrenamiento
# (esto puede tardar un poco si el dataset es muy grande)
labels = train_data_gen.classes

# Calcular los pesos de las clases
# La función compute_class_weight de sklearn calcula pesos inversamente proporcionales a la frecuencia de la clase.
# Las clases con menos muestras tendrán un peso mayor.
weights = class_weight.compute_class_weight(
    class_weight='balanced', # 'balanced' es la opción clave aquí
    classes=np.unique(labels), # Asegura que todas las clases estén representadas
    y=labels # Las etiquetas de clase de tus datos de entrenamiento
)

# Convertir el array de pesos a un diccionario, que es el formato que espera Keras
class_weights = dict(enumerate(weights))

print(f"Pesos de las clases calculados: {class_weights}")
# Ejemplo de salida: {0: 1.5, 1: 0.7, 2: 1.8}
# Donde los índices (0, 1, 2) corresponden a las clases según train_data_gen.class_indices
# Y los valores (1.5, 0.7, 1.8) son los pesos. Un peso > 1 significa que es una clase minoritaria
# y los errores en ella tendrán mas impacto

# cargar modelo preentrenado EfficientNetB0

In [None]:
# Load pre-trained EfficientNetB0
base_model = EfficientNetB0(input_shape=(IMG_HEIGHT, IMG_WIDTH, 3),
                            include_top=False,
                            weights='imagenet')

# Freeze the base model
base_model.trainable = False

# Build custom head
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(DROPOUT)(x)
# Binary output: 1 neuron with Sigmoid activation
outputs = Dense(1, activation='sigmoid')(x)


## compilar modelo

In [None]:
model = Model(inputs=base_model.input, outputs=outputs)

model.compile(optimizer=Adam(learning_rate=learning_rate),
              loss='binary_crossentropy', # Appropriate for binary
              metrics=['accuracy', Precision(name='precision'), Recall(name='recall'), AUC(name='auc')])

model.summary()


## Definir pasos y callbacks

In [None]:
# Mejor modelo
log_dir = os.path.join(model_save_path, "logs")
pathModeloMejorPrecision = os.path.join(model_save_path, 'modeloMejorPrecision.h5')

# Calculate steps_per_epoch and validation_steps robustly
steps_per_epoch_train = train_data_gen.samples // BATCH_SIZE
if train_data_gen.samples % BATCH_SIZE != 0: # Account for partial last batch
    steps_per_epoch_train += 1

steps_per_epoch_val = val_data_gen.samples // BATCH_SIZE
if val_data_gen.samples % BATCH_SIZE != 0: # Account for partial last batch
    steps_per_epoch_val += 1

# Callbacks para mejorar entrenamiento, principalmente precision
callbacks = [
    # EarlyStopping: Monitor 'val_precision' and restore best weights
    EarlyStopping(monitor='val_precision',
                  patience=5, # Increased patience as precision might fluctuate more
                  mode='max', # 'max' because we want to maximize precision
                  restore_best_weights=True,
                  verbose=1),

    # ModelCheckpoint: Save the model with the highest 'val_precision'
    ModelCheckpoint(filepath=pathModeloMejorPrecision,
                    monitor='val_precision',
                    save_best_only=True,
                    mode='max', # 'max' because we want to maximize precision
                    verbose=1),

    # Optional: ReduceLROnPlateau can also monitor precision
    ReduceLROnPlateau(monitor='val_precision',
                      factor=0.2, # Reduce learning rate by 20%
                      patience=3, # If val_precision doesn't improve for 3 epochs
                      mode='max',
                      min_lr=0.00001, # Minimum learning rate
                      verbose=1),
]


# Entrenar modelo

## Entrenar modelo base EfficientNetB0

In [None]:
print("Iniciando entrenamiento...")
# cambiar nombre history por history_base
# para no confundir con history_fine_tuning
history_base = model.fit(
    train_data_gen,
    # Pasos por epoca train
    steps_per_epoch=steps_per_epoch_train,
    epochs=epochs,
    validation_data=val_data_gen,
    # Pasos por epoca validación
    validation_steps=steps_per_epoch_val,
    class_weight=class_weights, # pesos por clase para evitar desbalance
    callbacks=callbacks # Optimiza entrenamiento
)
print("Entrenamiento finalizado.")
