## 1. Libraries

In [None]:
from google.colab import drive
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.callbacks import ModelCheckpoin

## 2. Mount Google Drive 

In [None]:
drive.mount('/content/drive')

OUTPUT_DIR = '/content/drive/MyDrive/trained_models_000'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

## 3. Unzip resized_dataset

In [None]:
!unzip "/content/drive/MyDrive/main/resized_dataset.zip" -d "/content/resized_dataset"

## 4. Configuration

In [None]:
IMG_HEIGHT, IMG_WIDTH = 384, 512
BATCH_SIZE = 32
NUM_FOLDS = 10
EPOCHS = 30
DATASET_DIR = '/content/resized_dataset/resized_dataset'
RANDOM_STATE = 42

## 5. Choose the starting model

In [None]:
# 1 = MobileNetV2, 2 = ResNet50, 3 = Xception
start_model =1

## 6. Load Data

In [None]:
def load_data(dataset_dir, img_height, img_width):
    X, y = [], []
    for label in os.listdir(dataset_dir):
        label_path = os.path.join(dataset_dir, label)
        if not os.path.isdir(label_path):
            continue
        for file in os.listdir(label_path):
            file_path = os.path.join(label_path, file)
            try:
                img = load_img(file_path, target_size=(img_height, img_width))
                img = img_to_array(img) / 255.0
                X.append(img)
                y.append(label)
            except Exception as e:
                print(f"Error loading {file_path}: {e}")
    return np.array(X), np.array(y)

X, y = load_data(DATASET_DIR, IMG_HEIGHT, IMG_WIDTH)

# Encode labels
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)
num_classes = len(np.unique(y_encoded))
y_categorical = to_categorical(y_encoded, num_classes=num_classes)

## 7. Define Models

In [None]:
def create_mobilenet(img_height, img_width, num_classes):
    base_model = tf.keras.applications.MobileNetV2(weights='imagenet', include_top=False,
                                                   input_shape=(img_height, img_width, 3))
    base_model.trainable = False
    model = tf.keras.Sequential([
        base_model,
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

def create_resnet50(img_height, img_width, num_classes):
    base_model = tf.keras.applications.ResNet50(weights='imagenet', include_top=False,
                                                input_shape=(img_height, img_width, 3))
    base_model.trainable = False
    model = tf.keras.Sequential([
        base_model,
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

def create_xception(img_height, img_width, num_classes):
    base_model = tf.keras.applications.Xception(weights='imagenet', include_top=False,
                                                input_shape=(img_height, img_width, 3))
    base_model.trainable = False
    model = tf.keras.Sequential([
        base_model,
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(1024, activation='relu'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

models_info = {
    1: ('MobileNetV2', create_mobilenet),
    2: ('ResNet50', create_resnet50),
    3: ('Xception', create_xception)
}

# Determine which models to train based on start_model
# If start_model = 1, train all three. If 2, skip MobileNet. If 3, skip MobileNet & ResNet.
train_models = [m for m in models_info.keys() if m >= start_model]

## 8. K-Fold setup

In [None]:
kf = KFold(n_splits=NUM_FOLDS, shuffle=True, random_state=RANDOM_STATE)

# Data generators
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True
)
val_datagen = tf.keras.preprocessing.image.ImageDataGenerator()

## 9. Training with Cross-Validation 

In [None]:
for key in train_models:
    model_name, model_func = models_info[key]
    fold_no = 1

    # Create an empty DataFrame to store the metrics
    metrics_df = pd.DataFrame(columns=['fold_no', 'epoch', 'accuracy', 'loss', 'val_accuracy', 'val_loss'])

    for train_idx, val_idx in kf.split(X):
        X_train, X_val = X[train_idx], X[val_idx]
        y_train, y_val = y_categorical[train_idx], y_categorical[val_idx]

        train_generator = train_datagen.flow(X_train, y_train, batch_size=BATCH_SIZE, shuffle=True)
        val_generator = val_datagen.flow(X_val, y_val, batch_size=BATCH_SIZE, shuffle=False)

        model = model_func(IMG_HEIGHT, IMG_WIDTH, num_classes)

        checkpoint_path = os.path.join(OUTPUT_DIR, f'{model_name}_fold_{fold_no}.keras')
        # Save the best model for this fold
        checkpoint = ModelCheckpoint(checkpoint_path, save_best_only=True,
                                     monitor='val_accuracy', mode='max')

        # Train the model
        history = model.fit(
            train_generator,
            epochs=EPOCHS,
            validation_data=val_generator,
            callbacks=[checkpoint],
            verbose=1
        )

        # Extract the metrics and add them to the DataFrame
        history_df = pd.DataFrame(history.history)
        history_df['fold_no'] = fold_no
        history_df['epoch'] = history_df.index + 1  # Add epoch number

        # Reorder the columns
        history_df = history_df[['fold_no', 'epoch', 'accuracy', 'loss', 'val_accuracy', 'val_loss']]

        # Concatenate with the main DataFrame
        metrics_df = pd.concat([metrics_df, history_df], ignore_index=True)

        fold_no += 1

    # Save metrics to a CSV file per model
    csv_path = os.path.join(OUTPUT_DIR, f'{model_name}_metrics.csv')
    metrics_df.to_csv(csv_path, index=False)
    print(f'Métricas guardadas en {csv_path}')