In [1]:
import numpy as np
import matplotlib.pyplot as plt
import time
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score

In [2]:
import tensorflow as tf
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, Flatten, Dense, ReLU as KerasReLU, BatchNormalization, Dropout, InputLayer, GlobalAveragePooling2D, Lambda, Activation
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy as KerasSCCE # Use alias
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator

2025-05-29 09:40:36.498848: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-29 09:40:36.755535: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1748486436.847350  323749 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748486436.874530  323749 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1748486437.096593  323749 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [3]:
import sys
sys.path.append('../../')

from src.cnn.cnn import CNN
from src.cnn.layers import Conv2DLayer, ReLULayer, PoolingLayer, FlattenLayer, DenseLayer, SoftmaxLayer, BatchNormalizationLayer, DropoutLayer
from src.cnn.losses import SparseCategoricalCrossentropy as ScratchSCCE

In [4]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(f"{len(gpus)} Physical GPUs, {len(logical_gpus)} Logical GPUs detected and configured.")
    except RuntimeError as e:
        print(e)
else:
    print("No GPU detected. TensorFlow will use CPU.")

1 Physical GPUs, 1 Logical GPUs detected and configured.


I0000 00:00:1748486439.430381  323749 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 9706 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3060, pci bus id: 0000:01:00.0, compute capability: 8.6


In [5]:
(x_train_full, y_train_full), (x_test, y_test) = cifar10.load_data()

In [6]:
x_train_full = x_train_full.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

In [7]:
x_train, x_val, y_train, y_val = train_test_split(
    x_train_full, y_train_full, test_size=0.2, random_state=42
)

In [8]:
num_classes = 10
input_shape = x_train.shape[1:]

In [9]:
y_train_sparse = y_train.flatten()
y_val_sparse = y_val.flatten()
y_test_sparse = y_test.flatten()

In [10]:
print(f"x_train: {x_train.shape}, y_train_sparse: {y_train_sparse.shape}")
print(f"x_val: {x_val.shape}, y_val_sparse: {y_val_sparse.shape}")
print(f"x_test: {x_test.shape}, y_test_sparse: {y_test_sparse.shape}")

x_train: (40000, 32, 32, 3), y_train_sparse: (40000,)
x_val: (10000, 32, 32, 3), y_val_sparse: (10000,)
x_test: (10000, 32, 32, 3), y_test_sparse: (10000,)


In [11]:
def keras_cnn(
    input_shape, num_classes,
    conv_blocks=[
        {'filters': 32, 'kernel_size': (3,3), 'pool': 'max', 'batch_norm': True, 'dropout': 0.25},
        {'filters': 64, 'kernel_size': (3,3), 'pool': 'max', 'batch_norm': True, 'dropout': 0.25}
    ],
    dense_layers_units=[512],
    dense_dropout=0.5,
    global_pooling=None
    ):
    model = Sequential()
    model.add(InputLayer(input_shape=input_shape))

    for block_params in conv_blocks:
        model.add(Conv2D(filters=block_params['filters'], kernel_size=block_params['kernel_size'], padding='same'))
        if block_params.get('batch_norm'):
            model.add(BatchNormalization())
        model.add(KerasReLU())
        
        pool = block_params.get('pool')
        if pool == 'max':
            model.add(MaxPooling2D(pool_size=(2, 2)))
        elif pool == 'average':
            model.add(AveragePooling2D(pool_size=(2, 2)))
        
        if block_params.get('dropout') and block_params['dropout'] > 0:
            model.add(Dropout(block_params['dropout']))
            
    if global_pooling == 'avg':
        model.add(GlobalAveragePooling2D())
    elif global_pooling == 'max':
        model.add(Lambda(lambda x: tf.reduce_max(x, axis=[1,2])))
    else:
        if not isinstance(model.layers[-1], (GlobalAveragePooling2D, Lambda, Flatten)):
             model.add(Flatten())
    
    for units in dense_layers_units:
        model.add(Dense(units=units))
        model.add(KerasReLU())
        if dense_dropout > 0:
            model.add(Dropout(dense_dropout))
    
    model.add(Dense(units=num_classes))
    return model

In [12]:
base_conv_params = [
    {'filters': 32, 'kernel_size': (3,3), 'batch_norm': True, 'dropout': 0.0},
    {'filters': 32, 'kernel_size': (3,3), 'pool': 'max', 'batch_norm': True, 'dropout': 0.2},
    {'filters': 64, 'kernel_size': (3,3), 'batch_norm': True, 'dropout': 0.0},
    {'filters': 64, 'kernel_size': (3,3), 'pool': 'max', 'batch_norm': True, 'dropout': 0.3},
    {'filters': 128, 'kernel_size': (3,3), 'batch_norm': True, 'dropout': 0.0},
    {'filters': 128, 'kernel_size': (3,3), 'pool': 'max', 'batch_norm': True, 'dropout': 0.4},
]

base_dense_units = [521]

In [13]:
keras_cnn_model = keras_cnn(
    input_shape, num_classes,
    conv_blocks=base_conv_params,
    dense_layers_units=base_dense_units,
    dense_dropout=0.5
)
keras_cnn_model.summary()



In [14]:
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.00001, verbose=1)

In [15]:
datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    zoom_range=0.1
)
datagen.fit(x_train)

In [16]:
keras_cnn_model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss=KerasSCCE(from_logits=True),
    metrics=['accuracy']
)

epochs_keras = 100
batch_size_keras = 64
history_keras = keras_cnn_model.fit(
    datagen.flow(x_train, y_train, batch_size=batch_size_keras),
    epochs=epochs_keras,
    validation_data=(x_val, y_val),
    callbacks=[early_stopping, reduce_lr],
    verbose=1
)

Epoch 1/100


  self._warn_if_super_not_called()
I0000 00:00:1748486444.912430  324807 service.cc:152] XLA service 0x7f46580140d0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1748486444.912592  324807 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 3060, Compute Capability 8.6
2025-05-29 09:40:44.998371: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1748486445.492203  324807 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m  2/625[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m31s[0m 50ms/step - accuracy: 0.1055 - loss: 5.1798   

I0000 00:00:1748486452.297431  324807 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m624/625[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 18ms/step - accuracy: 0.2659 - loss: 2.1481











[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 25ms/step - accuracy: 0.2661 - loss: 2.1471 - val_accuracy: 0.3960 - val_loss: 1.6640 - learning_rate: 0.0010
Epoch 2/100
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 17ms/step - accuracy: 0.4372 - loss: 1.5408 - val_accuracy: 0.4771 - val_loss: 1.4768 - learning_rate: 0.0010
Epoch 3/100
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 17ms/step - accuracy: 0.5106 - loss: 1.3519 - val_accuracy: 0.5489 - val_loss: 1.2754 - learning_rate: 0.0010
Epoch 4/100
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 17ms/step - accuracy: 0.5609 - loss: 1.2319 - val_accuracy: 0.5636 - val_loss: 1.2878 - learning_rate: 0.0010
Epoch 5/100
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 20ms/step - accuracy: 0.5968 - loss: 1.1444 - val_accuracy: 0.6243 - val_loss: 1.0457 - learning_rate: 0.0010
Epoch 6/100
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [17]:
loss_keras, accuracy_keras = keras_cnn_model.evaluate(x_test, y_test, verbose=0)
y_pred_keras_proba = tf.nn.softmax(keras_cnn_model.predict(x_test)).numpy()
y_pred_keras = np.argmax(y_pred_keras_proba, axis=1)
f1_keras = f1_score(y_test_sparse, y_pred_keras, average='macro')

print(f"\nEnhanced Keras Model - Test Accuracy: {accuracy_keras:.4f}, Macro F1-Score: {f1_keras:.4f}")

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step

Enhanced Keras Model - Test Accuracy: 0.8503, Macro F1-Score: 0.8485


In [18]:
for i, layer in enumerate(keras_cnn_model.layers):
    if layer.get_weights():
        weights = layer.get_weights()
        print(f"Layer {i}: {layer.name}")
        for j, w in enumerate(weights):
            print(f"  Param {j} wieight: {w}")
    else:
        print(f"Layer {i}: {layer.name} (No weights)")

Layer 0: conv2d
  Param 0 wieight: [[[[-8.87642056e-02  1.15612082e-01 -1.60254180e-01  9.44101661e-02
     4.90332842e-02 -1.08593315e-01  2.18233503e-02  1.03773102e-01
     2.35715434e-01 -2.21581832e-01 -7.61660188e-02  2.31210932e-01
    -3.13957594e-02  6.24771714e-02 -2.03501973e-02 -6.41558170e-02
    -1.52371628e-02  1.21673904e-01  1.49819195e-01  1.27031907e-01
    -1.79299116e-01 -1.45356460e-02  1.96847077e-02  6.96292222e-02
     1.77288249e-01  3.02250236e-01 -8.34366586e-03 -3.32460031e-02
    -5.18235192e-02  8.06169733e-02 -1.87521189e-01 -1.64868996e-01]
   [ 4.92994040e-02 -1.68149322e-01  1.61553491e-02 -1.84713937e-02
    -1.57137774e-02 -6.81192651e-02  1.06796451e-01 -5.22906408e-02
    -1.95008010e-01  2.48125158e-02  2.15829849e-01  2.67441213e-01
    -9.81164724e-02  1.09132737e-01  2.07137913e-01  1.18858188e-01
    -8.24823827e-02 -1.15428334e-02  6.50931969e-02 -1.39431760e-01
     1.74378514e-01  1.44804046e-01 -1.62510306e-03  1.90612614e-01
    -2.00699

In [19]:
manual_cnn_model = CNN()

for params in base_conv_params:
    manual_cnn_model.add_layer(Conv2DLayer(num_filters=params['filters'], filter_size=params['kernel_size'], padding='same'))
    if params.get('batch_norm'):
        manual_cnn_model.add_layer(BatchNormalizationLayer()) # Add scratch BN
    manual_cnn_model.add_layer(ReLULayer())
    
    pool_type = params.get('pool_type')
    if pool_type == 'max':
        manual_cnn_model.add_layer(PoolingLayer(pool_size=(2,2), stride=2, mode='max'))
    elif pool_type == 'average':
        manual_cnn_model.add_layer(PoolingLayer(pool_size=(2,2), stride=2, mode='average'))
        
    if params.get('dropout') and params['dropout'] > 0:
        manual_cnn_model.add_layer(DropoutLayer(rate=params['dropout']))

manual_cnn_model.add_layer(FlattenLayer())

dense_dropout_rate_keras = 0.5
for units in base_dense_units:
    manual_cnn_model.add_layer(DenseLayer(output_dim=units))
    manual_cnn_model.add_layer(ReLULayer())
    if dense_dropout_rate_keras > 0:
        manual_cnn_model.add_layer(DropoutLayer(rate=dense_dropout_rate_keras))

manual_cnn_model.add_layer(DenseLayer(output_dim=num_classes))

manual_cnn_model.load_weights_from_keras(keras_cnn_model)

manual_cnn_model.loss_function = ScratchSCCE(from_logits=True)

Attempting to load weights from Keras model...
Weight loading attempt finished.


In [21]:
x_test_subset_verify = x_test[:100]
y_test_subset_verify_sparse = y_test_sparse[:100]

keras_subset_logits_verify = keras_cnn_model.predict(x_test_subset_verify)
keras_subset_proba_verify = tf.nn.softmax(keras_subset_logits_verify).numpy()
keras_subset_preds_verify = np.argmax(keras_subset_proba_verify, axis=1)
f1_keras_verify = f1_score(y_test_subset_verify_sparse, keras_subset_preds_verify, average='macro')
acc_keras_verify = accuracy_score(y_test_subset_verify_sparse, keras_subset_preds_verify)
print(f"Keras (on subset {len(x_test_subset_verify)}) - Accuracy: {acc_keras_verify:.4f}, Macro F1: {f1_keras_verify:.4f}")

manual_cnn_model.loss_function = ScratchSCCE(from_logits=True)

manual_subset_proba_verify = manual_cnn_model.predict_proba(x_test_subset_verify)
manual_subset_preds_verify = np.argmax(manual_subset_proba_verify, axis=1)

f1_manual_verify = f1_score(y_test_subset_verify_sparse, manual_subset_preds_verify, average='macro')
acc_manual_verify = accuracy_score(y_test_subset_verify_sparse, manual_subset_preds_verify)
print(f"Manual (on subset {len(x_test_subset_verify)}, loaded weights) - Accuracy: {acc_manual_verify:.4f}, Macro F1: {f1_manual_verify:.4f}")

print("\nRaw probability Comparison")
for i in range(5):
    print(f"Sample {i}:")
    print(f"  Keras Probs (first 5):   {keras_subset_proba_verify[i, :5]}")
    print(f"  Manual Probs (first 5): {manual_subset_proba_verify[i, :5]}")
    diff = np.sum(np.abs(keras_subset_proba_verify[i] - manual_subset_proba_verify[i]))
    print(f"  Sum of Absolute Differences in Probs: {diff:.6e}")
    if diff > 1e-2:
            print(f"  INFO: Probabilities differ for sample {i}, potentially due to BN/Dropout behavior in Keras inference vs scratch.")


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
Keras (on subset 100) - Accuracy: 0.8600, Macro F1: 0.8469
Manual (on subset 100, loaded weights) - Accuracy: 0.0900, Macro F1: 0.0268

Raw probability Comparison
Sample 0:
  Keras Probs (first 5):   [1.6903014e-04 1.0883027e-04 3.2663517e-04 7.9014665e-01 6.2829065e-05]
  Manual Probs (first 5): [0.18967547 0.00952727 0.38619001 0.05030903 0.10521434]
  Sum of Absolute Differences in Probs: 1.785557e+00
  INFO: Probabilities differ for sample 0, potentially due to BN/Dropout behavior in Keras inference vs scratch.
Sample 1:
  Keras Probs (first 5):   [1.80132938e-05 2.18749736e-02 1.08058014e-10 1.47521628e-09
 7.96869352e-13]
  Manual Probs (first 5): [0.16882727 0.01477062 0.15245181 0.08622431 0.02858337]
  Sum of Absolute Differences in Probs: 1.580582e+00
  INFO: Probabilities differ for sample 1, potentially due to BN/Dropout behavior in Keras inference vs scratch.
Sample 2:
  Keras Probs (first 5):   [9.947