# Neural Network from Scratch

Module 11 Project: Image recognition of letters **A**, **B**, and **C** using a 2-layer neural network implemented with NumPy only.

This notebook defines binary 5x6 pixel patterns for each letter, builds and trains a small feedforward neural network with one hidden layer using sigmoid activation, and evaluates it.

## 1. Create binary 5x6 patterns for A, B, C

We define each letter as a 5x6 (width x height = 5 x 6 -> 30 pixels) binary pattern and flatten to 30-element vectors. We'll create a small dataset by adding slight noise variations.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(42)

A = np.array([
    [0,1,1,1,0],
    [1,0,0,0,1],
    [1,1,1,1,1],
    [1,0,0,0,1],
    [1,0,0,0,1],
    [1,0,0,0,1]
])

B = np.array([
    [1,1,1,1,0],
    [1,0,0,0,1],
    [1,1,1,1,0],
    [1,0,0,0,1],
    [1,0,0,0,1],
    [1,1,1,1,0]
])

C = np.array([
    [0,1,1,1,1],
    [1,0,0,0,0],
    [1,0,0,0,0],
    [1,0,0,0,0],
    [1,0,0,0,0],
    [0,1,1,1,1]
])

def flatten(letter):
    return letter.reshape(-1)

def noisy_versions(base, n=50, noise_level=0.05):
    samples = []
    for _ in range(n):
        mat = base.copy().astype(float)
        flip = np.random.rand(*mat.shape) < noise_level
        mat = np.where(flip, 1-mat, mat)
        samples.append(flatten(mat))
    return np.array(samples)

n_per = 100
data_A = noisy_versions(A, n=n_per, noise_level=0.05)
data_B = noisy_versions(B, n=n_per, noise_level=0.05)
data_C = noisy_versions(C, n=n_per, noise_level=0.05)

X = np.vstack([data_A, data_B, data_C])
y_labels = np.array([0]*n_per + [1]*n_per + [2]*n_per)

Y = np.zeros((y_labels.size, 3))
Y[np.arange(y_labels.size), y_labels] = 1

print('Dataset shape X:', X.shape, 'Y:', Y.shape)

## 2. Build a 2-layer Neural Network (NumPy only)

Architecture: Input (30) -> Hidden (16) -> Output (3). Activation: sigmoid. Loss: Mean Squared Error (for simplicity).
Training with batch gradient descent.

In [None]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def sigmoid_derivative(a):
    return a * (1 - a)

def mse_loss(y_true, y_pred):
    return np.mean((y_true - y_pred)**2)

input_size = 30
hidden_size = 16
output_size = 3
rng = np.random.default_rng(1)
W1 = rng.normal(0, 0.5, (input_size, hidden_size))
b1 = np.zeros((1, hidden_size))
W2 = rng.normal(0, 0.5, (hidden_size, output_size))
b2 = np.zeros((1, output_size))

lr = 0.8
epochs = 1000

perm = np.random.permutation(X.shape[0])
X_shuffled = X[perm]
Y_shuffled = Y[perm]

loss_history = []
acc_history = []

for epoch in range(epochs):
    Z1 = X_shuffled.dot(W1) + b1
    A1 = sigmoid(Z1)
    Z2 = A1.dot(W2) + b2
    A2 = sigmoid(Z2)
    
    loss = mse_loss(Y_shuffled, A2)
    loss_history.append(loss)
    
    preds = np.argmax(A2, axis=1)
    true = np.argmax(Y_shuffled, axis=1)
    acc = np.mean(preds == true)
    acc_history.append(acc)
    
    dA2 = -(Y_shuffled - A2) * (2 / Y_shuffled.size)
    dZ2 = dA2 * sigmoid_derivative(A2)
    dW2 = A1.T.dot(dZ2)
    db2 = np.sum(dZ2, axis=0, keepdims=True)
    
    dA1 = dZ2.dot(W2.T)
    dZ1 = dA1 * sigmoid_derivative(A1)
    dW1 = X_shuffled.T.dot(dZ1)
    db1 = np.sum(dZ1, axis=0, keepdims=True)
    
    W2 -= lr * dW2
    b2 -= lr * db2
    W1 -= lr * dW1
    b1 -= lr * db1
    
    if (epoch+1) % 200 == 0 or epoch==0:
        print(f"Epoch {epoch+1}/{epochs} - Loss: {loss:.5f} - Acc: {acc:.3f}")

print('Training complete')

## 3. Training curves (Loss & Accuracy)

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
plt.plot(loss_history)
plt.title('Loss over epochs')
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')

plt.subplot(1,2,2)
plt.plot(acc_history)
plt.title('Accuracy over epochs')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')

plt.tight_layout()
plt.show()

## 4. Evaluate on held-out noisy examples and visualize predictions

In [None]:
# Create some test examples (clean + noisy)
test_clean = np.vstack([flatten(A), flatten(B), flatten(C)])
test_noisy = np.vstack([noisy_versions(A, n=5, noise_level=0.08),
                        noisy_versions(B, n=5, noise_level=0.08),
                        noisy_versions(C, n=5, noise_level=0.08)])
X_test = np.vstack([test_clean, test_noisy])
y_test = np.array([0,1,2] + [0]*5 + [1]*5 + [2]*5)
Y_test = np.zeros((y_test.size, 3)); Y_test[np.arange(y_test.size), y_test] = 1

A1_test = sigmoid(X_test.dot(W1) + b1)
A2_test = sigmoid(A1_test.dot(W2) + b2)
preds = np.argmax(A2_test, axis=1)
acc_test = np.mean(preds == y_test)
print('Test accuracy:', acc_test)

labels = ['A','B','C']
import matplotlib.pyplot as plt
n_show = X_test.shape[0]
cols = 5
rows = int(np.ceil(n_show/cols))
plt.figure(figsize=(12, 3*rows))
for i in range(n_show):
    plt.subplot(rows, cols, i+1)
    plt.imshow(X_test[i].reshape(6,5), cmap='gray_r', interpolation='nearest')
    plt.title(f"True: {labels[y_test[i]]} - Pred: {labels[preds[i]]}")
    plt.axis('off')
plt.tight_layout()
plt.show()