In [1]:
import numpy as np

import numpy as np

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    s = sigmoid(x)
    return s * (1 - s)

In [2]:
class SequentialModel:
    def __init__(self, input_dim):
        self.input_dim = input_dim
        self.W = []
        self.b = []
        self.activation_fn = []
        self.loss_fn = None
        self.prev_dim = input_dim

    def add(self, neurons=5, activation_fn='relu'):
        W_i = np.random.randn(neurons, self.prev_dim) * 0.1
        b_i = np.zeros(neurons)
        self.W.append(W_i)
        self.b.append(b_i)
        self.activation_fn.append(activation_fn)
        self.prev_dim = neurons

    def compile(self, loss_fn='MSE'):
        self.loss_fn = loss_fn

    def forward_pass(self, x, W, b, activation_fn):
        z = []
        a = [x]
        for i in range(len(W)):
            z_i = W[i] @ a[-1] + b[i]
            z.append(z_i)
            
            if activation_fn[i] == 'relu':
                a.append(relu(z_i))
            elif activation_fn[i] == 'sigmoid':
                a.append(sigmoid(z_i))
            else:
                raise ValueError("Unsupported activation")
                
        return z, a

    def backward_pass(self, a, z, y, W, activation_fn, loss_fn):
        L = len(W)
        gradients_W = [None] * L
        gradients_b = [None] * L
        delta = [None] * L
    
        # output layer
        if loss_fn == 'MSE':
            loss_grad = a[-1] - y
        elif loss_fn == 'cross_entropy':
            loss_grad = a[-1] - y  # works if sigmoid + CE used together
        else:
            raise ValueError("Unsupported loss")
    
        if activation_fn[-1] == 'sigmoid':
            delta[-1] = loss_grad * sigmoid_derivative(z[-1]) if loss_fn == 'MSE' else loss_grad
        elif activation_fn[-1] == 'relu':
            delta[-1] = loss_grad * relu_derivative(z[-1])
    
        # Hidden layers
        for l in reversed(range(L - 1)):
            if activation_fn[l] == 'sigmoid':
                act_deriv = sigmoid_derivative(z[l])
            else:
                act_deriv = relu_derivative(z[l])
            delta[l] = (W[l + 1].T @ delta[l + 1]) * act_deriv
    
        # Gradients
        for l in range(L):
            gradients_W[l] = np.outer(delta[l], a[l])
            gradients_b[l] = delta[l]
    
        return gradients_W, gradients_b

    def gradient_descent(self, W, b, gradients_W, gradients_b, lr=0.01):
        for l in range(len(W)):
            W[l] -= lr * gradients_W[l]
            b[l] -= lr * gradients_b[l]

    def train(self, X, Y, lr=0.1, epochs=1000):
        for _ in range(epochs):
            for x, y in zip(X, Y):
                z, a = self.forward_pass(x, self.W, self.b, self.activation_fn)
                gradients_W, gradients_b = self.backward_pass(a, z, y, self.W, self.activation_fn, self.loss_fn)
                self.gradient_descent(self.W, self.b, gradients_W, gradients_b, lr)

    def predict(self, X):
        preds = []
        for x in X:
            _, a = self.forward_pass(x, self.W, self.b, self.activation_fn)
            preds.append(a[-1])
        return np.array(preds).squeeze()

In [13]:
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = np.array([0, 1, 1, 0])  # XOR

model = SequentialModel(input_dim=2)
model.add(5, 'relu')
model.add(1, 'sigmoid')
model.compile('MSE')
model.train(X, Y, lr=0.5, epochs=1000)

print("Predictions:", model.predict(X))

Predictions: [0.03905024 0.98410103 0.98467808 0.01591104]


### Keras

In [25]:
## tensorflow

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD

In [29]:
model = Sequential()
model.add(Dense(5, input_dim=2, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [49]:
model.summary()

In [55]:
model.compile(loss='binary_crossentropy', optimizer=SGD(learning_rate=0.1))

In [58]:
model.fit(X, Y, epochs=100)

Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step - loss: 0.4777
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step - loss: 0.4777
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step - loss: 0.4777
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 112ms/step - loss: 0.4777
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step - loss: 0.4777
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step - loss: 0.4777
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step - loss: 0.4777
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - loss: 0.4777
Epoch 9/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - loss: 0.4777
Epoch 10/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - loss: 0.4777
Epoch 11

<keras.src.callbacks.history.History at 0x16ab7ab9430>

In [60]:
model.predict(X)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step


array([[0.3336841],
       [0.9989913],
       [0.3336841],
       [0.3336841]], dtype=float32)