<a href="https://colab.research.google.com/github/sameph/Icog_labs/blob/main/Simple_neural.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [17]:
import numpy as np

# === Activation Functions ===
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    s = sigmoid(x)
    return s * (1 - s)

def tanh(x):
    return np.tanh(x)

def tanh_derivative(x):
    return 1 - np.tanh(x) ** 2

def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

# === Loss Function ===
def cross_entropy_loss(y_true, y_pred):
    m = y_true.shape[0]
    log_likelihood = -np.log(y_pred[range(m), y_true.argmax(axis=1)] + 1e-9)
    return np.sum(log_likelihood) / m

# === Dense Layer with Optimizer Support ===
class DenseLayer:
    def __init__(self, input_size, output_size, activation='tanh'):
        self.weights = np.random.randn(input_size, output_size) * 0.1
        self.bias = np.zeros((1, output_size))

        self.activation_name = activation
        self.activation, self.activation_derivative = self.get_activation_fn(activation)

        # For momentum
        self.v_w = np.zeros_like(self.weights)
        self.v_b = np.zeros_like(self.bias)

        # For Adam
        self.m_w = np.zeros_like(self.weights)
        self.v_w_adam = np.zeros_like(self.weights)
        self.m_b = np.zeros_like(self.bias)
        self.v_b_adam = np.zeros_like(self.bias)

        self.t = 0  # timestep for Adam

    def get_activation_fn(self, name):
        if name == 'relu':
            return relu, relu_derivative
        elif name == 'sigmoid':
            return sigmoid, sigmoid_derivative
        elif name == 'tanh':
            return tanh, tanh_derivative
        else:
            raise ValueError("Unsupported activation")

    def forward(self, X):
        self.input = X
        self.z = np.dot(X, self.weights) + self.bias
        self.a = self.activation(self.z)
        return self.a

    def backward(self, grad_output, learning_rate, optimizer='gd', beta=0.9, beta1=0.9, beta2=0.999, epsilon=1e-8):
        grad_z = grad_output * self.activation_derivative(self.z)
        grad_w = np.dot(self.input.T, grad_z) / self.input.shape[0]
        grad_b = np.sum(grad_z, axis=0, keepdims=True) / self.input.shape[0]

        if optimizer == 'momentum':
            self.v_w = beta * self.v_w + (1 - beta) * grad_w
            self.v_b = beta * self.v_b + (1 - beta) * grad_b
            self.weights -= learning_rate * self.v_w
            self.bias -= learning_rate * self.v_b

        elif optimizer == 'adam':
            self.t += 1
            self.m_w = beta1 * self.m_w + (1 - beta1) * grad_w
            self.v_w_adam = beta2 * self.v_w_adam + (1 - beta2) * (grad_w ** 2)

            m_w_hat = self.m_w / (1 - beta1 ** self.t)
            v_w_hat = self.v_w_adam / (1 - beta2 ** self.t)

            self.weights -= learning_rate * m_w_hat / (np.sqrt(v_w_hat) + epsilon)

            self.m_b = beta1 * self.m_b + (1 - beta1) * grad_b
            self.v_b_adam = beta2 * self.v_b_adam + (1 - beta2) * (grad_b ** 2)

            m_b_hat = self.m_b / (1 - beta1 ** self.t)
            v_b_hat = self.v_b_adam / (1 - beta2 ** self.t)

            self.bias -= learning_rate * m_b_hat / (np.sqrt(v_b_hat) + epsilon)

        else:  # standard gradient descent
            self.weights -= learning_rate * grad_w
            self.bias -= learning_rate * grad_b

        return np.dot(grad_z, self.weights.T)

# === Neural Network ===
class SimpleThreeLayerNN:
    def __init__(self):
        self.layer1 = DenseLayer(input_size=3, output_size=1, activation='relu')
        self.layer2 = DenseLayer(input_size=1, output_size=1, activation='tanh')  # Example: tanh
        self.layer3 = DenseLayer(input_size=1, output_size=3, activation='relu')  # Output layer before softmax

    def forward(self, X):
        self.a1 = self.layer1.forward(X)
        self.a2 = self.layer2.forward(self.a1)
        self.z3 = self.layer3.forward(self.a2)
        self.a3 = softmax(self.z3)
        return self.a3

    def backward(self, X, y_true, y_pred, learning_rate, optimizer):
        m = y_true.shape[0]
        grad_z3 = (y_pred - y_true) / m

        grad_a2 = self.layer3.backward(grad_z3, learning_rate, optimizer)
        grad_a1 = self.layer2.backward(grad_a2, learning_rate, optimizer)
        _ = self.layer1.backward(grad_a1, learning_rate, optimizer)

    def train(self, X, y, epochs=300, batch_size=16, learning_rate=0.01, optimizer='gd'):
        for epoch in range(epochs):
            indices = np.random.permutation(X.shape[0])
            X_shuffled = X[indices]
            y_shuffled = y[indices]

            for i in range(0, X.shape[0], batch_size):
                X_batch = X_shuffled[i:i+batch_size]
                y_batch = y_shuffled[i:i+batch_size]

                y_pred = self.forward(X_batch)
                loss = cross_entropy_loss(y_batch, y_pred)
                self.backward(X_batch, y_batch, y_pred, learning_rate, optimizer)

            if epoch % 50 == 0:
                print(f"Epoch {epoch} | Loss: {loss:.4f} | Optimizer: {optimizer}")

    def predict(self, X):
        probs = self.forward(X)
        return np.argmax(probs, axis=1)

# === Example Usage ===
np.random.seed(42)

X_train = np.random.randn(100, 3)
y_train = np.zeros((100, 3))
for i in range(100):
    y_train[i, np.random.randint(0, 3)] = 1

model = SimpleThreeLayerNN()
model.train(X_train, y_train, epochs=300, batch_size=16, learning_rate=0.01, optimizer='relu')  # Try 'gd', 'momentum', 'adam'

# Predict
preds = model.predict(X_train)
print("Sample predictions:", preds[:10])


Epoch 0 | Loss: 1.0985 | Optimizer: relu
Epoch 50 | Loss: 1.1100 | Optimizer: relu
Epoch 100 | Loss: 1.0900 | Optimizer: relu
Epoch 150 | Loss: 1.0857 | Optimizer: relu
Epoch 200 | Loss: 1.0830 | Optimizer: relu
Epoch 250 | Loss: 1.1396 | Optimizer: relu
Sample predictions: [0 0 0 0 0 0 0 0 0 0]
