# CNN From Scratch Testing

This notebook tests the CNN implementation from scratch by comparing it with a trained Keras model. The goal is to verify that our from-scratch implementation produces similar results to the Keras model by loading the same weights.

## Setup and Imports

Import all necessary libraries and modules for testing.

In [1]:
import numpy as np
import tensorflow as tf
import keras
import json
import sys
import os
from sklearn.metrics import f1_score

from from_scratch.layers import Conv2D, ReLU, MaxPooling2D, AveragePooling2D, Flatten, Dense, Softmax
from from_scratch.model import CNNModelFromScratch
from cnn_keras_training import load_and_preprocess_data, build_cnn_model

## Helper Functions

Define functions to load the Keras model and build the from-scratch model with Keras weights.

In [2]:
def load_keras_model_and_weights(model_path, best_config, input_shape, num_classes):
    """
    Loads a Keras model structure (based on best_config) and then loads weights.
    """
    keras_model = build_cnn_model(
        num_conv_layers=best_config['num_conv_layers'],
        filters_per_layer=best_config['filters_per_layer'],
        filter_size=best_config['filter_size'],
        pooling_type=best_config['pooling_type'],
        input_shape=input_shape,
        num_classes=num_classes
    )
    keras_model.load_weights(model_path)
    print(f"Keras model loaded with weights from {model_path}")
    keras_model.summary()
    return keras_model

In [None]:
def build_from_scratch_model_with_keras_weights(keras_model_loaded):
    """
    Builds the from-scratch model and loads weights from the Keras model.
    """
    from_scratch_layers = []
    
    print("\n--- Extracting Weights and Building From-Scratch Model ---")
    
    idx = 0
    keras_layer_idx = 0 
    
    while keras_layer_idx < len(keras_model_loaded.layers):
        k_layer = keras_model_loaded.layers[keras_layer_idx]
        print(f"Processing Keras layer: {k_layer.name} of type {type(k_layer)}")

        if isinstance(k_layer, keras.layers.Conv2D):
            weights, biases = k_layer.get_weights()

            stride = k_layer.strides[0] 
            padding_keras = k_layer.padding 
            
            padding_val = 0 
            if padding_keras == 'same':
                if stride == 1:
                     padding_val = (weights.shape[0] - 1) // 2
                else:
                    print(f"Warning: 'same' padding with stride > 1 for Conv2D {k_layer.name} might need manual calculation.")
                    padding_val = (weights.shape[0] - 1) // 2 

            activation = None
            if k_layer.activation == keras.activations.relu:
                activation = 'relu'

            fs_conv = Conv2D(weights, biases, stride=stride, padding=padding_val, activation=activation)
            from_scratch_layers.append(fs_conv)
            print(f"  Added Conv2D: filters={weights.shape[3]}, kernel_size={weights.shape[0:2]}, stride={stride}, padding={padding_val}, activation={activation}")
            
            keras_layer_idx += 1

        elif isinstance(k_layer, (keras.layers.MaxPooling2D, keras.layers.AveragePooling2D)):
            pool_size = k_layer.pool_size
            stride = k_layer.strides[0] 
            if isinstance(k_layer, keras.layers.MaxPooling2D):
                from_scratch_layers.append(MaxPooling2D(pool_size=pool_size, stride=stride))
                print(f"  Added MaxPooling2D: pool_size={pool_size}, stride={stride}")
            else:
                from_scratch_layers.append(AveragePooling2D(pool_size=pool_size, stride=stride))
                print(f"  Added AveragePooling2D: pool_size={pool_size}, stride={stride}")
            keras_layer_idx += 1
            
        elif isinstance(k_layer, keras.layers.Flatten):
            from_scratch_layers.append(Flatten())
            print("  Added Flatten")
            keras_layer_idx += 1
            
        elif isinstance(k_layer, keras.layers.Dense):
            weights, biases = k_layer.get_weights()

            activation = None
            if k_layer.activation == keras.activations.relu:
                activation = 'relu'
            elif k_layer.activation == keras.activations.softmax:
                activation = 'softmax'
            
            from_scratch_layers.append(Dense(weights, biases, activation=activation))
            print(f"  Added Dense: units={weights.shape[1]}, activation={activation}")
            
            keras_layer_idx += 1
        
        elif isinstance(k_layer, keras.layers.InputLayer): 
            print(f"  Skipping InputLayer: {k_layer.name}")
            keras_layer_idx +=1
        
        else:
            print(f"Warning: Keras layer type {type(k_layer)} (name: {k_layer.name}) not handled for from-scratch model.")
            keras_layer_idx += 1

    return CNNModelFromScratch(from_scratch_layers)

## Load Test Data and Model Configuration

Load the test data and the best model configuration from training.

In [4]:
(_, _), (_, _), (x_test, y_test_true) = load_and_preprocess_data()
input_shape = x_test.shape[1:]
num_classes = np.max(y_test_true) + 1

print(f"Test data shape: {x_test.shape}")
print(f"Input shape: {input_shape}")
print(f"Number of classes: {num_classes}")

x_train shape: (40000, 32, 32, 3)
y_train shape: (40000, 1)
x_val shape: (10000, 32, 32, 3)
y_val shape: (10000, 1)
x_test shape: (10000, 32, 32, 3)
y_test shape: (10000, 1)
Test data shape: (10000, 32, 32, 3)
Input shape: (32, 32, 3)
Number of classes: 10


In [5]:
try:
    with open('best_model_config.json', 'r') as f:
        best_model_config_for_loading = json.load(f)
    print(f'Loaded model config from best_model_config.json: {best_model_config_for_loading}')
except FileNotFoundError:
    print("best_model_config.json not found, using default config")
    # Fallback to default config
    best_model_config_for_loading = {
        'num_conv_layers': 2, 
        'filters_per_layer': [32, 32], 
        'filter_size': (3,3), 
        'pooling_type': 'max'
    }

model_config = {k: v for k, v in best_model_config_for_loading.items() if k != 'final_f1_score'}

print('Best model config for loading:', model_config)

Loaded model config from best_model_config.json: {'num_conv_layers': 2, 'filters_per_layer': [32, 32], 'filter_size': [3, 3], 'pooling_type': 'max', 'final_f1_score': 0.5369205148986673}
Best model config for loading: {'num_conv_layers': 2, 'filters_per_layer': [32, 32], 'filter_size': [3, 3], 'pooling_type': 'max'}


## Build and Load Keras Model

Build the Keras model architecture and load the trained weights.

In [6]:
model = build_cnn_model(**model_config, input_shape=input_shape, num_classes=num_classes)
model.summary()
keras_model_path = 'cnn_keras_final.weights.h5'

2025-05-28 00:31:56.455936: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2 Pro
2025-05-28 00:31:56.455971: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2025-05-28 00:31:56.455977: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
I0000 00:00:1748367116.456365 25610119 pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
I0000 00:00:1748367116.456556 25610119 pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [7]:
dummy_input = np.zeros((1,) + input_shape, dtype=np.float32)
flatten_output_shape = None
for i, layer in enumerate(model.layers):
    dummy_input = layer(dummy_input)
    if isinstance(layer, keras.layers.Flatten):
        flatten_output_shape = dummy_input.shape
        break
print('Flatten output shape:', flatten_output_shape)

Flatten output shape: (1, 2048)


In [8]:
try:
    loaded_keras_model = load_keras_model_and_weights(keras_model_path, model_config, input_shape, num_classes)
except Exception as e:
    print(f"Error loading Keras model: {e}")
    print("Please ensure 'cnn_keras_final.weights.h5' exists and 'best_model_config_for_loading' matches its architecture.")
    print("If you see a shape mismatch, check the output shape of the Flatten layer in both training and testing.")
    raise

Keras model loaded with weights from cnn_keras_final.weights.h5


  saveable.load_own_variables(weights_store.get(inner_path))


## Build From-Scratch Model with Keras Weights

Build the from-scratch CNN model and transfer the weights from the Keras model.

In [9]:
fs_model = build_from_scratch_model_with_keras_weights(loaded_keras_model)

if not fs_model.layers:
    print("From-scratch model has no layers. Weight loading likely failed or was incomplete.")
    raise ValueError("From-scratch model construction failed")


--- Extracting Weights and Building From-Scratch Model ---
Processing Keras layer: conv2d_2 of type <class 'keras.src.layers.convolutional.conv2d.Conv2D'>
  Added Conv2D: filters=32, kernel_size=(3, 3), stride=1, padding=1, activation=relu
Processing Keras layer: max_pooling2d_2 of type <class 'keras.src.layers.pooling.max_pooling2d.MaxPooling2D'>
  Added MaxPooling2D: pool_size=(2, 2), stride=2
Processing Keras layer: conv2d_3 of type <class 'keras.src.layers.convolutional.conv2d.Conv2D'>
  Added Conv2D: filters=32, kernel_size=(3, 3), stride=1, padding=1, activation=relu
Processing Keras layer: max_pooling2d_3 of type <class 'keras.src.layers.pooling.max_pooling2d.MaxPooling2D'>
  Added MaxPooling2D: pool_size=(2, 2), stride=2
Processing Keras layer: flatten_1 of type <class 'keras.src.layers.reshaping.flatten.Flatten'>
  Added Flatten
Processing Keras layer: dense_2 of type <class 'keras.src.layers.core.dense.Dense'>
  Added Dense: units=128, activation=relu
Processing Keras layer:

In [10]:
print(f"\n--- From-Scratch Model Structure ({len(fs_model.layers)} layers) ---")
for i, layer in enumerate(fs_model.layers):
    print(f"Layer {i}: {type(layer)}")
    if hasattr(layer, 'weights') and layer.weights is not None:
        print(f"  Weights shape: {layer.weights.shape}")
    if hasattr(layer, 'biases') and layer.biases is not None:
        print(f"  Biases shape: {layer.biases.shape}")


--- From-Scratch Model Structure (7 layers) ---
Layer 0: <class 'from_scratch.layers.Conv2D'>
  Weights shape: (3, 3, 3, 32)
  Biases shape: (32,)
Layer 1: <class 'from_scratch.layers.MaxPooling2D'>
Layer 2: <class 'from_scratch.layers.Conv2D'>
  Weights shape: (3, 3, 32, 32)
  Biases shape: (32,)
Layer 3: <class 'from_scratch.layers.MaxPooling2D'>
Layer 4: <class 'from_scratch.layers.Flatten'>
Layer 5: <class 'from_scratch.layers.Dense'>
  Weights shape: (2048, 128)
  Biases shape: (128,)
Layer 6: <class 'from_scratch.layers.Dense'>
  Weights shape: (128, 10)
  Biases shape: (10,)


## Test on Sample Data

First, test both models on a small subset of data to verify they work correctly.

In [11]:
sample_x_test = x_test[:100] 
sample_y_test_true = y_test_true[:100].flatten()

print(f"\n--- Making predictions on {len(sample_x_test)} samples ---")


--- Making predictions on 100 samples ---


In [12]:
keras_pred_proba = loaded_keras_model(sample_x_test, training=False).numpy()
keras_pred_labels = np.argmax(keras_pred_proba, axis=1)

print(f"Keras predictions shape: {keras_pred_proba.shape}")
print(f"Keras predicted labels shape: {keras_pred_labels.shape}")

Keras predictions shape: (100, 10)
Keras predicted labels shape: (100,)


In [13]:
fs_pred_labels = fs_model.predict(sample_x_test)

print(f"From-scratch predicted labels shape: {fs_pred_labels.shape}")

From-scratch predicted labels shape: (100,)


## Compare Sample Results

Compare the F1 scores and predictions between Keras and from-scratch models on the sample data.

In [14]:
# 5. Compare Outputs (Macro F1-Score)
keras_f1 = f1_score(sample_y_test_true, keras_pred_labels, average='macro', zero_division=0)
fs_f1 = f1_score(sample_y_test_true, fs_pred_labels, average='macro', zero_division=0)

print(f"\n--- Comparison on {len(sample_x_test)} samples ---")
print(f"Keras Model Macro F1-Score: {keras_f1:.4f}")
print(f"From-Scratch Model Macro F1-Score: {fs_f1:.4f}")


--- Comparison on 100 samples ---
Keras Model Macro F1-Score: 0.3244
From-Scratch Model Macro F1-Score: 0.3244


In [15]:
# Example: Compare first 5 predictions
print("\nFirst 10 Keras predictions:", keras_pred_labels[:10])
print("First 10 From-Scratch predictions:", fs_pred_labels[:10])
print("First 10 True labels:", sample_y_test_true[:10])

if np.allclose(keras_f1, fs_f1, atol=0.01): # Allow a small tolerance
    print("\nSUCCESS: The F1 scores are very close!")
else:
    print("\nNOTICE: F1 scores differ. Further debugging of from-scratch layers or weight loading might be needed.")
    if np.array_equal(keras_pred_labels, fs_pred_labels):
        print("However, the predicted labels are identical for this sample.")
    else:
        differences = np.sum(keras_pred_labels != fs_pred_labels)
        print(f"Predicted labels differ for {differences}/{len(sample_x_test)} samples.")


First 10 Keras predictions: [3 1 1 0 4 6 1 6 3 1]
First 10 From-Scratch predictions: [3 1 1 0 4 6 1 6 3 1]
First 10 True labels: [3 8 8 0 6 6 1 6 3 1]

SUCCESS: The F1 scores are very close!


## Test on Full Dataset

Run the comparison on the complete test set to get final performance metrics.

In [16]:
# Test on the full test set if sample test is successful
print("\n--- Running on FULL Test Set ---")
keras_full_pred_proba = loaded_keras_model(x_test, training=False).numpy()
keras_full_pred_labels = np.argmax(keras_full_pred_proba, axis=1)

print(f"Keras full predictions computed: {keras_full_pred_labels.shape}")


--- Running on FULL Test Set ---
Keras full predictions computed: (10000,)


In [17]:
fs_full_pred_labels = fs_model.predict(x_test)

print(f"From-scratch full predictions computed: {fs_full_pred_labels.shape}")

From-scratch full predictions computed: (10000,)


In [20]:
keras_full_f1 = f1_score(y_test_true.flatten(), keras_full_pred_labels, average='macro', zero_division=0)
fs_full_f1 = f1_score(y_test_true.flatten(), fs_full_pred_labels, average='macro', zero_division=0)

print(f"Keras Model Macro F1-Score (Full Test Set): {keras_full_f1:.4f}")
print(f"From-Scratch Model Macro F1-Score (Full Test Set): {fs_full_f1:.4f}")

if np.allclose(keras_full_f1, fs_full_f1, atol=0.01):
    print("\nSUCCESS on Full Test Set: The F1 scores are very close!")
else:
    print("\nNOTICE on Full Test Set: F1 scores differ.")

Keras Model Macro F1-Score (Full Test Set): 0.3430
From-Scratch Model Macro F1-Score (Full Test Set): 0.3430

SUCCESS on Full Test Set: The F1 scores are very close!
