**SimpleConv2D CNN - MNIST Training**

Complete CNN implementation from scratch using NumPy.

**Setup**

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import time
from sklearn.model_selection import train_test_split
from src.data_loader import MNISTDataLoader
from src.simpleconv2d_classifier import Scratch2dCNNClassifier
from src.cnn_layers import Conv2d, MaxPool2D, Flatten, FullyConnected, SoftmaxCrossEntropyLoss, SGD, relu, relu_derivative

np.random.seed(42)

**Load MNIST Data**

In [None]:
print("Loading MNIST data...")
data_loader = MNISTDataLoader(data_dir='data')
X_train, X_test, y_train, y_test = data_loader.load_data(test_size=0.2, random_state=42)

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

print(f"Training set: {X_train.shape[0]} samples")
print(f"Validation set: {X_val.shape[0]} samples")
print(f"Test set: {X_test.shape[0]} samples")

**Build CNN Architecture**

In [None]:
class ReLULayer:
    def __init__(self):
        self.input = None
    def forward(self, x, training=True):
        self.input = x
        return relu(x)
    def backward(self, dout):
        return relu_derivative(self.input) * dout
    def get_params_count(self):
        return 0

layers = [
    Conv2d(in_channels=1, out_channels=8, kernel_size=3, stride=1, padding=1),
    ReLULayer(),
    MaxPool2D(kernel_size=2, stride=2),
    Conv2d(in_channels=8, out_channels=16, kernel_size=3, stride=1, padding=1),
    ReLULayer(),
    MaxPool2D(kernel_size=2, stride=2),
    Flatten(),
    FullyConnected(in_features=16*7*7, out_features=128),
    ReLULayer(),
    FullyConnected(in_features=128, out_features=10)
]

loss_fn = SoftmaxCrossEntropyLoss()
optimizer = SGD(learning_rate=0.01)
model = Scratch2dCNNClassifier(layers, loss_fn, optimizer)

print("\nModel Summary:")
model.summary()

**Train Model**

In [None]:
print("\nTraining model...")
start_time = time.time()

history = model.fit(
    X_train, y_train,
    X_val, y_val,
    epochs=10,
    batch_size=32,
    verbose=True
)

training_time = time.time() - start_time
print(f"\nTraining completed in {training_time:.2f} seconds")

**Training History**

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))

ax1.plot(history['train_loss'], label='Training Loss')
ax1.plot(history['val_loss'], label='Validation Loss')
ax1.set_title('Model Loss')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.legend()
ax1.grid(True)

ax2.plot(history['train_acc'], label='Training Accuracy')
ax2.plot(history['val_acc'], label='Validation Accuracy')
ax2.set_title('Model Accuracy')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Accuracy')
ax2.legend()
ax2.grid(True)

plt.tight_layout()
plt.savefig('plots/training_history.png', dpi=150, bbox_inches='tight')
plt.show()

**Evaluate on Test Set**

In [None]:
print("Evaluating on test set...")
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_acc:.4f}")

**Sample Predictions**

In [None]:
predictions = model.predict(X_test[:5])
probabilities = model.predict_proba(X_test[:5])

fig, axes = plt.subplots(1, 5, figsize=(15, 3))
for i in range(5):
    ax = axes[i]
    ax.imshow(X_test[i, 0], cmap='gray')
    ax.axis('off')
    
    pred_label = predictions[i]
    true_label = y_test[i]
    confidence = np.max(probabilities[i])
    color = 'green' if pred_label == true_label else 'red'
    
    ax.set_title(f'Pred: {pred_label}\nTrue: {true_label}\nConf: {confidence:.2f}', color=color, fontsize=10)

plt.tight_layout()
plt.show()

**Results Summary**

In [None]:
print("="*50)
print("Training Summary:")
print("="*50)
print(f"Model: Simple CNN")
print(f"Test Accuracy: {test_acc:.4f}")
print(f"Test Loss: {test_loss:.4f}")
print(f"Training Time: {training_time:.2f} seconds")
print(f"Epochs: 10")
print(f"Batch Size: 32")
print("="*50)