### Import the CNN Util and libraries needed
We have the util to make it easy to create and try new variations of the CNN model and be consistent with how we're analyzing and evaluating it.

In [None]:
# Import necessary libraries
import cnn_utils
from keras import layers, models
from keras.src.legacy.preprocessing.image import ImageDataGenerator
from keras.regularizers import l2
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from keras.optimizers import Adam


### Load the data

In [None]:
data_dict = cnn_utils.load_cifar10_from_tar()

### Preporcess the data

In [None]:
data = cnn_utils.preprocess_data(data_dict)

### Let's do a quick visualization of sample images (to also ensure we still have the correct shape)

In [None]:
cnn_utils.visualize_data_samples(data)


### Data Augmentation

Moderate geometric augmentation that applies realistic transformations to training images:
- Rotation (±15°), shifting (10% in each direction), and zooming (±10%) simulate natural camera angle and distance variations
- Horizontal flipping doubles the dataset by creating mirror images (works well for CIFAR-10 since objects like cars/planes look realistic when flipped)


In [None]:
def create_augmentation():
    return ImageDataGenerator(
        rotation_range=20,           # Increased from 15
        width_shift_range=0.15,      # Increased from 0.1
        height_shift_range=0.15,     # Increased from 0.1
        horizontal_flip=True,
        zoom_range=0.15,             # Increased from 0.1
        brightness_range=[0.8, 1.2], # New: brightness variation
        channel_shift_range=0.1,     # New: color variation
        fill_mode='nearest'          # Better edge handling
    )

augmentation = create_augmentation()
augmentation.fit(data['X_train'])

### Let's define our CNN model (architecture)
Deeper, more sophisticated architecture for higher accuracy
Structure:
- 3 convolutional blocks (64→128→256 filters)
- BatchNormalization after each conv layer
- Progressive dropout (0.3→0.4→0.5)
- Large dense layer (512 neurons)

In [None]:
def create_cnn_model(input_shape=(32, 32, 3), num_classes=10):
    model = models.Sequential()
    
    # Block 1 - Enhanced with L2 regularization
    model.add(layers.Conv2D(64, (3, 3), padding='same', activation='relu', 
                           kernel_regularizer=l2(0.0001), input_shape=input_shape))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(64, (3, 3), padding='same', activation='relu', 
                           kernel_regularizer=l2(0.0001)))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(layers.Dropout(0.25))  # Reduced from 0.3
    
    # Block 2 - With Spatial Dropout
    model.add(layers.Conv2D(128, (3, 3), padding='same', activation='relu', 
                           kernel_regularizer=l2(0.0001)))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(128, (3, 3), padding='same', activation='relu', 
                           kernel_regularizer=l2(0.0001)))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(layers.SpatialDropout2D(0.25))  # Changed to Spatial Dropout
    
    # Block 3 - Triple convolutions for deeper feature extraction
    model.add(layers.Conv2D(256, (3, 3), padding='same', activation='relu', 
                           kernel_regularizer=l2(0.0001)))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(256, (3, 3), padding='same', activation='relu', 
                           kernel_regularizer=l2(0.0001)))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(256, (1, 1), padding='same', activation='relu', 
                           kernel_regularizer=l2(0.0001)))  # New: 1x1 conv for channel mixing
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(layers.SpatialDropout2D(0.4))
    
    # Block 4 - Additional block for deeper features
    model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu', 
                           kernel_regularizer=l2(0.0001)))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu', 
                           kernel_regularizer=l2(0.0001)))
    model.add(layers.BatchNormalization())
    model.add(layers.Dropout(0.5))
    
    # Global Average Pooling instead of Flatten (more efficient)
    model.add(layers.GlobalAveragePooling2D())
    
    # Enhanced classifier with residual-like connection
    model.add(layers.Dense(256, activation='relu', kernel_regularizer=l2(0.0001)))
    model.add(layers.BatchNormalization())
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(num_classes, activation='softmax'))
    
    return model

In [None]:
# Custom callbacks for better training
def get_enhanced_callbacks():
    return [
        EarlyStopping(
            monitor='val_loss', 
            patience=15, 
            restore_best_weights=True,
            verbose=1
        ),
        ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=7,
            min_lr=1e-7,
            verbose=1
        )
    ]

In [None]:
model = create_cnn_model()
model.compile(Adam(learning_rate=0.001, decay=1e-6), loss='categorical_crossentropy', metrics=['accuracy'])
    
cnn_utils.print_model_summary(model)


### Now let's train the model

In [None]:
history = cnn_utils.train_model(
    model, 
    data, 
    augmentation=augmentation,
    epochs=80,  # Increased epochs
    batch_size=32,  # Smaller batch size for better gradients
    callbacks=get_enhanced_callbacks()
)



### Let's show the evaluation result

In [None]:
cnn_utils.evaluate_model(model, data, history)