In [10]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, models,Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Flatten
from tensorflow.keras.utils import to_categorical

In [None]:
#Preprocesamiento de la data
(x_train_0, y_train_fine), (x_test, y_test_fine) = tf.keras.datasets.cifar100.load_data(
    label_mode='fine'
)
import matplotlib.pyplot as plt

# Split the training data into training and validation sets
x_train_1, x_val, y_train, y_val = train_test_split(x_train_0, y_train_fine, test_size=0.2, random_state=42)
print("Size of x_train_1:", x_train_1.shape)
print("Size of x_val:", x_val.shape)


# Convert labels to one-hot encoding
num_classes = 100
y_train_ = to_categorical(y_train, num_classes)
y_val = to_categorical(y_val, num_classes)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz
Size of x_train_1: (40000, 32, 32, 3)
Size of x_val: (10000, 32, 32, 3)


Utilice la data augmentation pero parece que lo único que hizo fue ralentizar el proceso de entrenamiento. Para intentar mejorar un poco la data sí hice un horizontal flip, duplicando la cantidad de datos de train.

In [None]:
#Data Augmentation

x_train_norm=x_train_1

datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    zoom_range=0.1,
    shear_range=0.1
)


# Primer approach a data augmentation
x_train_flipped = np.flip(x_train_norm, axis=2)
x_train = np.concatenate((x_train_norm, x_train_flipped), axis=0)
y_train = np.concatenate((y_train_, y_train_), axis=0)

datagen.fit(x_train)
print("Size of x_train:", x_train.shape)
print("Size of y_train:", y_train.shape)

Size of x_train: (80000, 32, 32, 3)
Size of y_train: (80000, 100)


Encontré una función de activación swish, que es muy parecida a la ReLu pero da mejores resultados
swish(x)=x/(1+e^(-bx))



In [None]:
input_shape = x_train.shape[1:]  # (32, 32, 3)

model = Sequential([
    Flatten(input_shape=(32,32,3)),

    Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    BatchNormalization(),
    Dropout(0.2),
    Dense(512, activation='swish', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    BatchNormalization(),
    Dense(256, activation='swish', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    Dropout(0.25),
    Dense(100, activation='softmax')
])
# Compile the model with learning rate scheduling
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=1e-3,
    decay_steps=10000,
    decay_rate=1)

optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Callbacks

from tensorflow.keras.callbacks import EarlyStopping,LearningRateScheduler,ModelCheckpoint,ReduceLROnPlateau, TensorBoard
from tensorflow.keras.optimizers import Adam

# Model Checkpoint
mc = ModelCheckpoint(
    "best_weights.h5",
    monitor = "val_accuracy",
    verbose = 1,
    save_best_only = True,
    save_weights_only = True,
)

# Reduce Learning Rate on Plateau
rlrop = ReduceLROnPlateau(
    monitor = "val_accuracy",
    factor = 0.5,
    patience = 10,
    verbose = 1,
    min_lr = 1e-5
)

# Early Stopping
es = EarlyStopping(
    monitor = "val_accuracy",
    patience = 20,                # Probamos con 3, 7, 10, 15, 20, 100 (sin early stopping basicamente)
    verbose = 1,
    restore_best_weights = True,
)

# Tensorboard
tb = TensorBoard(
    log_dir="logs",
)

In [None]:
history = model.fit(x_train, y_train, #batch_size=64),
                    batch_size=64,
                    epochs=200,
                    validation_data=(x_val, y_val),
                    callbacks=[rlrop, es, mc, tb])

Epoch 1/200
Epoch 1: val_accuracy improved from -inf to 0.07370, saving model to best_weights.h5
Epoch 2/200
Epoch 2: val_accuracy improved from 0.07370 to 0.10970, saving model to best_weights.h5
Epoch 3/200
Epoch 3: val_accuracy improved from 0.10970 to 0.11760, saving model to best_weights.h5
Epoch 4/200
Epoch 4: val_accuracy did not improve from 0.11760
Epoch 5/200
Epoch 5: val_accuracy improved from 0.11760 to 0.12570, saving model to best_weights.h5
Epoch 6/200
Epoch 6: val_accuracy did not improve from 0.12570
Epoch 7/200
Epoch 7: val_accuracy did not improve from 0.12570
Epoch 8/200
Epoch 8: val_accuracy improved from 0.12570 to 0.14430, saving model to best_weights.h5
Epoch 9/200
Epoch 9: val_accuracy improved from 0.14430 to 0.15470, saving model to best_weights.h5
Epoch 10/200
Epoch 10: val_accuracy improved from 0.15470 to 0.18490, saving model to best_weights.h5
Epoch 11/200
Epoch 11: val_accuracy did not improve from 0.18490
Epoch 12/200
Epoch 12: val_accuracy did not imp

In [12]:
# Evaluate the model on the test set
y_test_fine = to_categorical(y_test_fine, num_classes)
test_loss, test_acc = model.evaluate(x_test,y_test_fine, verbose=2)
print('Test accuracy:', test_acc)


313/313 - 1s - loss: 3.5429 - accuracy: 0.2274 - 715ms/epoch - 2ms/step
Test accuracy: 0.227400004863739
