In [None]:
# CNN training accuracy decrease with regularizers

* [What may cause the CNN layer weight regularizer to reduce the model accuracy](https://datascience.stackexchange.com/q/124728/68313)

In [1]:
import tensorflow as tf
from keras.layers import (
    Conv2D,
    MaxPooling2D,
    BatchNormalization,
    Dense,
    Flatten,
    Dropout,
)
from keras.models import (
    Sequential
)
from keras.optimizers import (
    Adam
)
from keras.preprocessing.image import (
    ImageDataGenerator
)
from sklearn.model_selection import (
    train_test_split
)

2023-11-24 14:34:04.488657: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-11-24 14:34:04.515398: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-24 14:34:04.515418: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-24 14:34:04.515443: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-11-24 14:34:04.520583: I tensorflow/core/platform/cpu_feature_g

In [2]:
NUM_CLASSES = 10
BATCH_SIZE = 32
EPOCHS = 3

In [3]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
x_train, x_validation, y_train, y_validation = train_test_split(
    x_train, y_train, test_size=0.2, random_state=42
)

y_train = tf.keras.utils.to_categorical(y_train, NUM_CLASSES)
y_validation = tf.keras.utils.to_categorical(y_validation, NUM_CLASSES)

In [4]:
# set up image augmentation
datagen = ImageDataGenerator(
    rotation_range=15,
    horizontal_flip=True,
    width_shift_range=0.1,
    height_shift_range=0.1
)

In [5]:
def build(reg):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', kernel_regularizer=reg, input_shape=(32, 32, 3),padding='same'))
    model.add(BatchNormalization(axis=-1))
    model.add(MaxPooling2D(pool_size=(2, 2))) 
    model.add(Flatten())
    model.add(Dense(512, activation='relu',kernel_regularizer=reg))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(NUM_CLASSES, activation='softmax'))
    
    model.compile(
        loss='categorical_crossentropy', 
        metrics=['accuracy'],
        optimizer=Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
    )
    return model

In [6]:
# reg=tf.keras.regularizers.L2(l2=0.01)
model = build(reg=None)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 32, 32, 32)        896       
                                                                 
 batch_normalization (Batch  (None, 32, 32, 32)        128       
 Normalization)                                                  
                                                                 
 max_pooling2d (MaxPooling2  (None, 16, 16, 32)        0         
 D)                                                              
                                                                 
 flatten (Flatten)           (None, 8192)              0         
                                                                 
 dense (Dense)               (None, 512)               4194816   
                                                                 
 batch_normalization_1 (Bat  (None, 512)               2

2023-11-24 14:34:06.702128: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-11-24 14:34:06.709375: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-11-24 14:34:06.709595: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

# Without Regularizer

In [7]:
history = model.fit(
    x=datagen.flow(x_train, y_train, batch_size=BATCH_SIZE),
    steps_per_epoch = len(x_train) / BATCH_SIZE, 
    epochs=EPOCHS, 
    validation_data=(x_validation, y_validation)

Epoch 1/3


2023-11-24 14:34:07.571558: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:442] Loaded cuDNN version 8700
2023-11-24 14:34:07.651847: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-11-24 14:34:07.733437: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-11-24 14:34:08.446583: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f65808a3d10 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-11-24 14:34:08.446601: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 4050 Laptop GPU, Compute Capability 8.9
2023-11-24 14:34:08.449948: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-11-24 14:34:08.514905: I ./tensorflow/compiler/jit/

Epoch 2/3
Epoch 3/3


# With Regularizer

In [8]:
model = build(reg=tf.keras.regularizers.L2(l2=0.01))
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_1 (Conv2D)           (None, 32, 32, 32)        896       
                                                                 
 batch_normalization_2 (Bat  (None, 32, 32, 32)        128       
 chNormalization)                                                
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 16, 16, 32)        0         
 g2D)                                                            
                                                                 
 flatten_1 (Flatten)         (None, 8192)              0         
                                                                 
 dense_2 (Dense)             (None, 512)               4194816   
                                                                 
 batch_normalization_3 (Bat  (None, 512)              

In [9]:
history2 = model.fit(
    x=datagen.flow(x_train, y_train, batch_size=BATCH_SIZE),
    steps_per_epoch = len(x_train) / BATCH_SIZE, 
    epochs=EPOCHS, 
    validation_data=(x_validation, y_validation)
)

Epoch 1/3
Epoch 2/3
Epoch 3/3
