<a href="https://colab.research.google.com/github/madhav037/Learn-AI/blob/main/Feedforward_network_simple.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Implementation of a simple neural network
with feedforward and backpropogation

In [None]:
import numpy as np
import pandas as pd

some necessary functions

In [None]:
def ReLu(x):
  return np.maximum(0, x)

def softmax(x):
  exps = np.exp(x - np.max(x, axis=1, keepdims=True))
  return exps / np.sum(exps, axis=1, keepdims=True)

def relu_derivative(x):
    return (x > 0).astype(float)

def cross_entropy_loss(y_true, y_pred):
    n = y_true.shape[0]
    y_pred = np.clip(y_pred, 1e-8, 1 - 1e-8)
    return -np.sum(y_true * np.log(y_pred)) / n

def one_hot(y, num_classes):
    one_hot = np.zeros((y.size, num_classes))
    one_hot[np.arange(y.size), y] = 1
    return one_hot

In [None]:
class NeuralNetwork:
  def __init__(self, input_size, hidden_layer1, hidden_layer2, output_size) -> None:
    np.random.seed(42)
    self.W1 = np.random.randn(input_size, hidden_layer1) * np.sqrt(2. / input_size)
    self.b1 = np.zeros((1, hidden_layer1))
    self.W2 = np.random.randn(hidden_layer1, hidden_layer2) * np.sqrt(2. / hidden_layer1)
    self.b2 = np.zeros((1, hidden_layer2))
    self.W3 = np.random.randn(hidden_layer2, output_size) * np.sqrt(2. / hidden_layer2)
    self.b3 = np.zeros((1, output_size))

  def feedforward(self, X):
    self.z1 = np.dot(X, self.W1) + self.b1
    self.a1 = ReLu(self.z1)

    self.z2 = np.dot(self.a1, self.W2) + self.b2
    self.a2 = ReLu(self.z2)

    self.z3 = np.dot(self.a2, self.W3) + self.b3
    self.a3 = softmax(self.z3)

    return self.a3

  def backpropogate(self, X, y_true, y_pred, learning_rate):
    m = X.shape[0]

    d_z3 = y_pred - y_true
    d_W3 = np.dot(self.a2.T, d_z3) / m
    d_b3 = np.sum(d_W3, axis=0, keepdims=True) / m

    d_a2 = np.dot(d_z3, self.W3.T)
    d_z2 = d_a2 * relu_derivative(self.z2)
    d_W2 = self.a1.T @ d_z2 / m
    d_b2 = np.sum(d_z2, axis=0, keepdims=True) / m

    d_a1 = d_z2 @ self.W2.T
    d_z1 = d_a1 * relu_derivative(self.z1)
    d_W1 = X.T @ d_z1 / m
    d_b1 = np.sum(d_z1, axis=0, keepdims=True) / m

    self.W3 -= learning_rate * d_W3
    self.b3 -= learning_rate * d_b3
    self.W2 -= learning_rate * d_W2
    self.b2 -= learning_rate * d_b2
    self.W1 -= learning_rate * d_W1
    self.b1 -= learning_rate * d_b1

  def train(self, X, y, epochs=100, learning_rate=0.01):
    for epoch in range(epochs):
      y_pred = self.feedforward(X)
      loss = cross_entropy_loss(y, y_pred)
      self.backpropogate(X, y, y_pred, learning_rate)

      if epoch % 100 == 0:
        acc = self.evaluate(X, y)
        print(f"Epoch {epoch} | Loss: {loss:.4f} | Accuracy: {acc:.2f}%")

    acc = self.evaluate(X, y)
    print(f"Epoch {epoch} | Loss: {loss:.4f} | Accuracy: {acc:.2f}%")

  def evaluate(self, X, y_true):
    y_pred = self.feedforward(X)
    predictions = np.argmax(y_pred, axis=1)
    labels = np.argmax(y_true, axis=1)
    return np.mean(predictions == labels) * 100

  def predict(self, X):
    return self.feedforward(X)

In [None]:
df = pd.read_csv('/content/sample_data/mnist_train_small.csv')
df.head()

Unnamed: 0,6,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,...,0.581,0.582,0.583,0.584,0.585,0.586,0.587,0.588,0.589,0.590
0,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
X = df.iloc[:, 1:] / 255.0
y_raw = df.iloc[:, 0]
# X.head()
y_raw.head()

Unnamed: 0,6
0,5
1,7
2,9
3,5
4,2


In [None]:
y = one_hot(y_raw.to_numpy(), 10)

In [None]:
nn = NeuralNetwork(input_size=784, hidden_layer1=128, hidden_layer2=64, output_size=10)
nn.train(X, y, epochs=1000, learning_rate=0.01)

Epoch 0 | Loss: 2.4427 | Accuracy: 8.41%
Epoch 100 | Loss: 1.5810 | Accuracy: 63.93%
Epoch 200 | Loss: 1.0072 | Accuracy: 77.42%
Epoch 300 | Loss: 0.7416 | Accuracy: 82.01%
Epoch 400 | Loss: 0.6136 | Accuracy: 84.42%
Epoch 500 | Loss: 0.5396 | Accuracy: 85.75%
Epoch 600 | Loss: 0.4908 | Accuracy: 86.73%
Epoch 700 | Loss: 0.4557 | Accuracy: 87.45%
Epoch 800 | Loss: 0.4289 | Accuracy: 88.09%
Epoch 900 | Loss: 0.4076 | Accuracy: 88.59%
Epoch 999 | Loss: 0.3902 | Accuracy: 88.98%


In [None]:
df_test = pd.read_csv('/content/sample_data/mnist_test.csv')
df_test.head()

Unnamed: 0,7,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,...,0.658,0.659,0.660,0.661,0.662,0.663,0.664,0.665,0.666,0.667
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
X = df.iloc[:, 1:] / 255.0
y_raw = df.iloc[:, 0]
# X.head()
y_raw.head()

y = one_hot(y_raw.to_numpy(), 10)

In [None]:
nn.evaluate(X, y)

np.float64(88.97944897244862)

In [None]:
row = np.random.randint(low=0, high=len(X))

X_demo = X.iloc[row]
y_demo = y[row]

y_pred = nn.predict(X_demo)

# Predicted probabilities
probs = y_pred[0]  # shape: (10,), because output is (1, 10)

# Get classes
predicted_class = np.argmax(probs)
true_class = np.argmax(y_demo)

# Print nicely
print(f"\n🟢 True class      : {true_class}")
print(f"🔵 Predicted class : {predicted_class}\n")

print("🔢 Class probabilities:")
for i, p in enumerate(probs):
    marker = "⬅️" if i == predicted_class else "  "
    print(f"  Class {i}: {p:.4f} {marker}")


🟢 True class      : 2
🔵 Predicted class : 2

🔢 Class probabilities:
  Class 0: 0.0014   
  Class 1: 0.0000   
  Class 2: 0.9943 ⬅️
  Class 3: 0.0015   
  Class 4: 0.0004   
  Class 5: 0.0000   
  Class 6: 0.0016   
  Class 7: 0.0001   
  Class 8: 0.0003   
  Class 9: 0.0005   
