In [15]:
import cupy as cp 

x = cp.array([[1, 2, 3],
            [4, 5, 6],
            [7, 8, 9],
            [10, 11, 12]])

y = cp.array([1,0,0, 1])

print(x.shape)
print(y.shape)


(4, 3)
(4,)


In [16]:
class ActivationFunctions:
    @staticmethod
    def relu(x):
        return cp.maximum(0, x)

    @staticmethod
    def sigmoid(x):
        return 1 / (1 + cp.exp(-x))

    @staticmethod
    def relu_derivative(x):
        return (x > 0).astype(cp.float32)

    @staticmethod
    def sigmoid_derivative(x):
        sig = ActivationFunctions.sigmoid(x)
        return sig * (1 - sig)
    
    

In [17]:
# ✅ Create a test input (CuPy array)
x = cp.array([-2.0, -1.0, 0.0, 1.0, 2.0])

# ✅ Test ReLU
relu_output = ActivationFunctions.relu(x)
relu_derivative_output = ActivationFunctions.relu_derivative(x)

# ✅ Test Sigmoid
sigmoid_output = ActivationFunctions.sigmoid(x)
sigmoid_derivative_output = ActivationFunctions.sigmoid_derivative(x)

# ✅ Print results
print("Input:", x)
print("ReLU Output:", relu_output)
print("ReLU Derivative:", relu_derivative_output)
print("Sigmoid Output:", sigmoid_output)
print("Sigmoid Derivative:", sigmoid_derivative_output)

Input: [-2. -1.  0.  1.  2.]
ReLU Output: [0. 0. 0. 1. 2.]
ReLU Derivative: [0. 0. 0. 1. 1.]
Sigmoid Output: [0.11920292 0.26894142 0.5        0.73105858 0.88079708]
Sigmoid Derivative: [0.10499359 0.19661193 0.25       0.19661193 0.10499359]


In [21]:
import cupy as cp

class Linear:
    def __init__(self, input_size, output_size):
        # Initialize weights and biases
        self.W = cp.random.randn(input_size, output_size) * 0.01  # Weight
        self.b = cp.zeros((1, output_size))  # Bias

    def forward(self, X):
        self.X = X
        self.Z = cp.dot(X, self.W) + self.b
        return self.Z

    def backward(self, dZ, learning_rate):
        m = self.X.shape[0]
        dW = cp.dot(self.X.T, dZ) / m
        db = cp.sum(dZ, axis=0, keepdims=True) / m
        dX = cp.dot(dZ, self.W.T)
        
        # Update weights and biases using gradient descent
        self.W -= learning_rate * dW
        self.b -= learning_rate * db
        
        return dX


class ReLU:
    def forward(self, Z):
        self.A = cp.maximum(0, Z)
        return self.A

    def backward(self, dA):
        dZ = dA * (self.A > 0)  # Derivative of ReLU
        return dZ


class NeuralNetwork:
    def __init__(self, input_size, hidden_sizes, output_size):
        # Initialize layers
        self.layers = []
        prev_size = input_size
        for hidden_size in hidden_sizes:
            self.layers.append(Linear(prev_size, hidden_size))
            self.layers.append(ReLU())
            prev_size = hidden_size
        self.layers.append(Linear(prev_size, output_size))

    def forward(self, X):
        # Forward pass through the network
        self.activations = []
        Z = X
        for layer in self.layers:
            Z = layer.forward(Z)
            self.activations.append(Z)
        return Z

    def compute_loss(self, y_pred, y_true):
        # Mean Squared Error loss
        loss = cp.mean((y_pred - y_true) ** 2)
        return loss

    def backward(self, X, y_true, learning_rate=0.01):
        m = X.shape[0]
        y_pred = self.forward(X)
        loss = self.compute_loss(y_pred, y_true)

        # Backpropagation
        dZ = y_pred - y_true  # Derivative of MSE loss with respect to output
        for i in range(len(self.layers) - 1, -1, -1):
            layer = self.layers[i]
            if isinstance(layer, Linear):
                dZ = layer.backward(dZ, learning_rate)
            elif isinstance(layer, ReLU):
                dZ = layer.backward(dZ)
        
        return loss

    def train(self, X, y, epochs=1000, learning_rate=0.01):
        for epoch in range(epochs):
            # Forward and backward pass
            loss = self.backward(X, y, learning_rate)

            if epoch % 100 == 0:
                print(f'Epoch {epoch}, Loss: {loss}')

# Example usage
if __name__ == "__main__":
    # Toy dataset (X: input features, y: target labels)
    X = cp.array([[0, 0], [0, 1], [1, 0], [1, 1]])  # AND gate input
    y = cp.array([[0], [0], [0], [1]])  # AND gate output

    # Initialize the neural network with 2 input nodes, two hidden layers of 4 nodes each, and 1 output node
    nn = NeuralNetwork(input_size=2, hidden_sizes=[4, 4], output_size=1)

    # Train the network
    nn.train(X, y, epochs=1000, learning_rate=0.01)

    # Test the network after training
    y_pred = nn.forward(X)
    print("\nPredictions after training:\n", y_pred)


Epoch 0, Loss: 0.24999936504123763
Epoch 100, Loss: 0.19586890365681792
Epoch 200, Loss: 0.18862010694466977
Epoch 300, Loss: 0.18764942278779112
Epoch 400, Loss: 0.18751942581429387
Epoch 500, Loss: 0.18750200870455197
Epoch 600, Loss: 0.18749966581513067
Epoch 700, Loss: 0.18749934146376127
Epoch 800, Loss: 0.18749928732143345
Epoch 900, Loss: 0.1874992692721445

Predictions after training:
 [[0.24998795]
 [0.24998825]
 [0.24999018]
 [0.24999078]]
