In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

# Neural Network: Accuracy Fixes and Best Practices

- Ensured correct one-hot encoding shape (no unnecessary transpose)
- Added shuffling of training data at each epoch
- Set random seed for reproducibility
- Improved comments for clarity
- Retained correct linear algebra in Layer class


In [None]:
data=pd.read_csv("C:\Users\Acer\Desktop\Neural-Network\Data\mnist_train.csv")
data=np.array(data)
m,n=data.shape
np.random.shuffle(data)
print(m,n)

In [None]:
train_data=data[0:int(0.8*m),:]
val_data=data[int(0.8*m):m,:]
print(train_data.shape)
print(val_data.shape)

In [None]:
X_train=train_data[:,1:].T
X_train=X_train/255.0
Y_train=train_data[:,0].astype('int8')
X_val=val_data[:,1:].T
X_val=X_val/255.0
Y_val=val_data[:,0].astype('int8')

In [None]:
def one_hot_encode(y, num_classes=10):
    one_hot = np.zeros((num_classes, y.size))
    one_hot[y, np.arange(y.size)] = 1
    return one_hot

Y_train_encoded = one_hot_encode(Y_train)
Y_val_encoded = one_hot_encode(Y_val)

In [None]:
class Layer:
    def __init__(self, n_input, n_neuron):
        # He initialization
        self.w = np.random.randn(n_neuron, n_input) * np.sqrt(2.0/n_input)
        self.b = np.zeros((n_neuron, 1))

    def forward(self, inputs):
        self.inputs = inputs
        self.output = np.dot(self.w, inputs) + self.b

    def backward(self, dvalues):
        self.dweights = np.dot(dvalues, self.inputs.T)
        self.dbias = np.sum(dvalues, axis=1, keepdims=True)
        self.dinputs = np.dot(self.w.T, dvalues)


In [None]:
class Activation_ReLU:
    def forward(self, inputs):
        self.inputs = inputs
        self.output = np.maximum(0, inputs)

    def backward(self, dvalues):
        self.dinputs = dvalues.copy()
        self.dinputs[self.inputs <= 0] = 0


In [None]:
class Activation_Softmax:
    def forward(self, inputs):
        # Stability trick: subtract max from each column
        exp_values = np.exp(inputs - np.max(inputs, axis=0, keepdims=True))
        probabilities = exp_values / np.sum(exp_values, axis=0, keepdims=True)
        self.output = probabilities

    def backward(self, dvalues):
        # Placeholder for now — not used directly (combined with loss later)
        self.dinputs = dvalues.copy()


In [None]:
class Loss_CategoricalCrossEntropy:

    def forward(self, y_pred, y_true):
        y_pred_clipped = np.clip(y_pred, 1e-12, 1 - 1e-12)

        # Case 1: One-hot encoded labels
        if len(y_true.shape) == 2:
            # y_pred: (10, N), y_true: (10, N)
            correct_confidences = np.sum(y_pred_clipped * y_true, axis=0)

        # Case 2: integer labels (not used here, but safe to keep)
        else:
            correct_confidences = y_pred_clipped[y_true, np.arange(y_pred.shape[1])]

        negative_log_likelihoods = -np.log(correct_confidences)
        return np.mean(negative_log_likelihoods)

    def backward(self, dvalues, y_true):
      samples = dvalues.shape[1]

      # If one-hot encoded, convert to class indices
      if len(y_true.shape) == 2:
          y_true = np.argmax(y_true, axis=0)

      # Gradient of softmax + crossentropy combined
      self.dinputs = dvalues.copy()
      self.dinputs[y_true, np.arange(samples)] -= 1
      self.dinputs = self.dinputs / samples


In [None]:
class NeuralNetwork:
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size, lr=0.1):
        self.layer1 = Layer(input_size, hidden_size1)
        self.activation1 = Activation_ReLU()

        self.layer2 = Layer(hidden_size1, hidden_size2)
        self.activation2 = Activation_ReLU()

        self.layer3 = Layer(hidden_size2, output_size)
        self.activation3 = Activation_Softmax()

        self.loss_function = Loss_CategoricalCrossEntropy()
        self.learning_rate = lr

    def forward(self, X, y):
        self.layer1.forward(X)
        self.activation1.forward(self.layer1.output)

        self.layer2.forward(self.activation1.output)
        self.activation2.forward(self.layer2.output)

        self.layer3.forward(self.activation2.output)
        self.activation3.forward(self.layer3.output)

        loss = self.loss_function.forward(self.activation3.output, y)
        return loss

    def backward(self, y):
        self.loss_function.backward(self.activation3.output, y)
        self.activation3.backward(self.loss_function.dinputs)

        self.layer3.backward(self.activation3.dinputs)
        self.activation2.backward(self.layer3.dinputs)

        self.layer2.backward(self.activation2.dinputs)
        self.activation1.backward(self.layer2.dinputs)

        self.layer1.backward(self.activation1.dinputs)

    def update(self):
        # Gradient Descent
        for layer in [self.layer1, self.layer2, self.layer3]:
            layer.w -= self.learning_rate * layer.dweights
            layer.b -= self.learning_rate * layer.dbias

    def predict(self, X):
        self.layer1.forward(X)
        self.activation1.forward(self.layer1.output)

        self.layer2.forward(self.activation1.output)
        self.activation2.forward(self.layer2.output)

        self.layer3.forward(self.activation2.output)
        self.activation3.forward(self.layer3.output)

        return np.argmax(self.activation3.output, axis=0)

In [None]:
np.random.seed(42)

# Initialize model with larger hidden layers and smaller learning rate
model = NeuralNetwork(784, 128, 64, 10, lr=0.01)

def get_batch(X, y, batch_size):
    for i in range(0, X.shape[1], batch_size):
        yield X[:, i:i+batch_size], y[:, i:i+batch_size]

# Training parameters
epochs = 50
batch_size = 32
best_val_acc = 0
patience = 5
no_improve = 0


for epoch in range(epochs):
   
    perm = np.random.permutation(X_train.shape[1])
    X_train = X_train[:, perm]
    Y_train_encoded = Y_train_encoded[:, perm]
    Y_train = Y_train[perm]

    epoch_loss = 0
    batch_count = 0
    
    # Training
    for X_batch, y_batch in get_batch(X_train, Y_train_encoded, batch_size):
        loss = model.forward(X_batch, y_batch)
        model.backward(y_batch)
        model.update()
        epoch_loss += loss
        batch_count += 1
    
    avg_loss = epoch_loss / batch_count
    
    # Calculate training accuracy
    train_preds = model.predict(X_train)
    train_acc = np.mean(train_preds == Y_train)
    
    # Calculate validation accuracy
    val_preds = model.predict(X_val)
    val_acc = np.mean(val_preds == Y_val)
    
    print(f"Epoch {epoch+1}: Loss = {avg_loss:.4f}, Train Acc = {train_acc:.4f}, Val Acc = {val_acc:.4f}")
    
    # Early stopping
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        no_improve = 0
    else:
        no_improve += 1
        
    if no_improve >= patience:
        print(f"Early stopping triggered. Best validation accuracy: {best_val_acc:.4f}")
        break
