In [1]:
import cupy as cp
import time

# Initialize synthetic dataset (1000 samples, 5 features)
X = cp.random.randn(10000, 5).astype(cp.float32)
Y_true = cp.random.randn(10000, 1).astype(cp.float32)

# Initialize weights and biases on GPU
W1 = cp.random.randn(5, 5).astype(cp.float32)
b1 = cp.zeros(5, dtype=cp.float32)
W2 = cp.random.randn(5, 1).astype(cp.float32)
b2 = cp.zeros(1, dtype=cp.float32)

# Forward pass
def forward(X):
    H = cp.tanh(cp.dot(X, W1) + b1)
    Yp = cp.dot(H, W2) + b2
    return H, Yp

# Backward pass and update
def backward(X, H, Yp, Y_true, lr=0.01):
    N = X.shape[0]
    dY = (Yp - Y_true) / N
    dW2 = H.T.dot(dY)
    db2 = cp.sum(dY, axis=0)
    dH = dY.dot(W2.T) * (1 - H**2)  # Derivative of tanh
    dW1 = X.T.dot(dH)
    db1 = cp.sum(dH, axis=0)

    W1[...] -= lr * dW1
    b1[...] -= lr * db1
    W2[...] -= lr * dW2
    b2[...] -= lr * db2

# Training loop with timing
epochs = 10000
start_time = time.time()

for epoch in range(epochs):
    H, Yp = forward(X)
    backward(X, H, Yp, Y_true)

end_time = time.time()

print(f"Training completed in {end_time - start_time:.4f} seconds for {epochs} epochs")


Training completed in 12.3252 seconds for 10000 epochs
