In [5]:
import numpy as np


x = np.array([1, 2, 3])


weights = np.array([0.5, 0.5, 0.5])
biases = np.array([1, 1, 1])


def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def __sigmoid__derivitive__(x):
    return sigmoid(x) * (1 - sigmoid(x))


out = sigmoid(x @ weights + biases)
print(out)

[0.98201379 0.98201379 0.98201379]


In [6]:
class SimpleLayer():
    def __init__(self, input_dim, output_dim):
        self.weight = np.random.uniform(low=-np.sqrt(1/input_dim), high=np.sqrt(1/input_dim), size=(input_dim, output_dim))
        self.biases = np.random.uniform(low=-np.sqrt(1/input_dim), high=np.sqrt(1/input_dim), size=(output_dim, 1))

    def params_nbr(self):
        return self.input_dim * self.out_dim + self.out_dim

    def forward(self, x):
        return sigmoid(x @ self.weight + self.biases.T)


layer1 = SimpleLayer(3, 2)
layer1.forward(x)



array([[0.18449656, 0.29004648]])

In [7]:
# Backprog goes here

def mean_square_error(output, expected_output):
    loss = np.mean((output - expected_output) ** 2)
    grad = 2 * (output - expected_output) / output.size
    return loss, grad




In [8]:
class Linear():
    def __init__(self, input_dim, output_dim):
        self.input_dim = input_dim
        self.out_dim = output_dim
        self.weight = np.random.uniform(low=-np.sqrt(1/input_dim), high=np.sqrt(1/input_dim), size=(input_dim, output_dim))
        self.biases = np.random.uniform(low=-np.sqrt(1/input_dim), high=np.sqrt(1/input_dim), size=(output_dim, 1))
        self.last_input = None
        self.last_output = None
        self.last_z = None
        self.gradient_cache = None

    def backward(self, grad_output, lr):
        # Compute gradient of the loss with respect to the linear combination (z)
        grad_z = grad_output * sigmoid(self.last_z)
        
        # Compute gradients with respect to weights and biases
        grad_weight = self.last_input.T @ grad_z
        grad_bias = np.sum(grad_z, axis=0)
        
        # Reshape grad_bias to match the shape of self.biases
        grad_bias = grad_bias.reshape(self.biases.shape)
        
        # Update weights and biases
        self.weight -= lr * grad_weight
        self.biases -= lr * grad_bias
        
        # Compute gradient to propagate to the previous layer
        self.gradient_cache = grad_z @ self.weight.T
        return self.gradient_cache

    def params_nbr(self):
        return self.input_dim * self.out_dim + self.out_dim

    def forward(self, x):
        self.last_input = x  # x: (batch_size, input_dim)
        self.last_z = x @ self.weight + self.biases.T  # Align dimensions
        self.last_output = __sigmoid__derivitive__(self.last_z)
        return self.last_output

In [None]:
def create_dataset(size, numbers):
    array = []
    for num in range(numbers):
        r = np.random.randint(0, size)
        
        array.append()

In [10]:
# Input and output data
x = np.array([1, 2, 3, 4]).reshape(1, -1)  # Shape (1, 4)
y = np.array([1, 2, 3, 4]).reshape(1, -1)  # Shape (1, 4)

# Create a Linear Layer with no activation
layer = Linear(4, 4)

# Training loop
for epoch in range(200):
    out = layer.forward(x)
    loss, grad = mean_square_error(out, y)
    layer.backward(grad, 1e-3)
    if epoch % 10 == 0:
        print(f"Epoch {epoch + 1}, Loss: {loss:.4f}")

# Final output
print("Final output:", layer.forward(np.array([2, 4, 6, 8]).reshape(1, -1)))

Epoch 1, Loss: 6.7332
Epoch 11, Loss: 6.7068
Epoch 21, Loss: 6.6847
Epoch 31, Loss: 6.6692
Epoch 41, Loss: 6.6624
Epoch 51, Loss: 6.6653
Epoch 61, Loss: 6.6771
Epoch 71, Loss: 6.6946
Epoch 81, Loss: 6.7124
Epoch 91, Loss: 6.7255
Epoch 101, Loss: 6.7309
Epoch 111, Loss: 6.7288
Epoch 121, Loss: 6.7226
Epoch 131, Loss: 6.7187
Epoch 141, Loss: 6.7257
Epoch 151, Loss: 6.7535
Epoch 161, Loss: 6.8097
Epoch 171, Loss: 6.8943
Epoch 181, Loss: 6.9957
Epoch 191, Loss: 7.0958
Final output: [[1.38057658e-02 9.28802791e-02 8.01689790e-07 4.34943364e-03]]
