In [2]:
import numpy as np
from numpy import linalg as LA
import struct
from array import array
import random
%matplotlib inline
import random
import matplotlib.pyplot as plt

In [None]:
n_samples_per_class = 10
mean0 = np.array([-1, -1])
mean1 = np.array([1, 1])
cov = np.eye(2)

X0 = np.random.multivariate_normal(mean0, cov, n_samples_per_class).T
X1 = np.random.multivariate_normal(mean1, cov, n_samples_per_class).T
# print(X0.shape) #each col is a sample
Y0 = np.zeros((1, n_samples_per_class))
Y1 = np.ones((1, n_samples_per_class))
# Combine and shuffle
X = np.concatenate([X0, X1], axis=1)
Y = np.concatenate([Y0, Y1], axis=1)
perm = np.random.permutation(2 * n_samples_per_class)
X, Y = X[:, perm], Y[:, perm]
split_idx = n_samples_per_class  # 20 samples total => 10 train, 10 test
X_train, X_test = X[:, :split_idx], X[:, split_idx:]
Y_train, Y_test = Y[:, :split_idx], Y[:, split_idx:]

(2, 10)


In [4]:
class SimpleNN:
    def __init__(self, input_dim=2, hidden_dim=1, output_dim=1, lr=0.1):
        self.w1 = np.random.randn(hidden_dim, input_dim)
        self.b1 = np.random.randn(hidden_dim, 1)
        self.w2 = np.random.randn(output_dim, hidden_dim)
        self.b2 = np.random.randn(output_dim, 1)
        self.lr = lr

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def sigmoid_derivative(self, z):
        s = self.sigmoid(z)
        return s * (1 - s)

    def l2_derivative(self, F, Y):
        return 2*(Y-F)

    def forward(self, X):
        # X shape: (input_dim, batch_size)
        self.y1 = self.w1.dot(X) + self.b1       # (hidden_dim, batch_size)
        self.z1 = self.sigmoid(self.y1)
        self.y2 = self.w2.dot(self.z1) + self.b2 # (output_dim, batch_size)
        self.z2 = self.y2  # No activation on output
        return self.z2

    def compute_loss(self, F, Y):
        return np.mean((F - Y) ** 2)

    def backward(self, X, Y):
        m = X.shape[1]
        # Gradient of loss wrt z2
        dz2 = -self.l2_derivative(self.z2, Y)/m
        # dy2 = 2 * (self.z2 - Y) / m              # (1, m)
        dy2 = dz2                                # Since output is linear
        dw2 = dy2.dot(self.z1.T)                # (1, hidden_dim)
        db2 = np.sum(dy2, axis=1, keepdims=True)

        dz1 = self.w2.T.dot(dy2)                # (hidden_dim, m)
        dy1 = dz1 * self.sigmoid_derivative(self.y1)
        dw1 = dy1.dot(X.T)                      # (hidden_dim, input_dim)
        db1 = np.sum(dy1, axis=1, keepdims=True)

        # Update parameters
        self.w2 -= self.lr * dw2
        self.b2 -= self.lr * db2
        self.w1 -= self.lr * dw1
        self.b1 -= self.lr * db1

    def train(self, X_train, Y_train, epochs=1000):
        for epoch in range(1, epochs + 1):
            F = self.forward(X_train)
            loss = self.compute_loss(F, Y_train)
            self.backward(X_train, Y_train)
            print(f"Epoch {epoch}/{epochs}, Loss: {loss}")

In [6]:
nn = SimpleNN(lr=0.1)
nn.train(X_train, Y_train, epochs=10)

# Compute test MSE
Y_pred_test = nn.forward(X_test)
test_mse = nn.compute_loss(Y_pred_test, Y_test)
print(f"Test MSE: {test_mse}")


Epoch 1/10, Loss: 0.5828120831282753
Epoch 2/10, Loss: 0.3689730598365617
Epoch 3/10, Loss: 0.25356333213746896
Epoch 4/10, Loss: 0.1903801858190322
Epoch 5/10, Loss: 0.15494561500174678
Epoch 6/10, Loss: 0.1342825210515619
Epoch 7/10, Loss: 0.12151048033145062
Epoch 8/10, Loss: 0.11298177961612324
Epoch 9/10, Loss: 0.1067624544743682
Epoch 10/10, Loss: 0.10182670144784414
Test MSE: 0.1698068526856277
