In [58]:
%load_ext autoreload
%autoreload 2
import sys
import os
sys.path.append(os.path.abspath(".."))  

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [59]:
import numpy as np

from models.MLP.model import MLP
from utils.loss.MSELoss import MSELoss
from utils.loss.BCELoss import BCELoss
from utils.optimizers.SGD import SGD
from utils.optimizers.Adam import Adam

In [72]:
# Dummy input and target
x = np.array([[1.0, 2.0, 3.0]])
target = np.array([[1.0]])

# Create a network: input 3 → hidden layers [4, 4] → output 1
net = MLP(input_dim=3, hidden_dims=[4, 4], output_dim=1, activation='relu')
loss_fn = MSELoss()
optimizer = Adam(net.parameters(), lr=0.001)
# optimizer = SGD(net.parameters(), lr=0.001)

for step in range(20):
    # 1) Zero out all gradients
    optimizer.zero_grad()  

    # 2) Forward pass
    pred = net(x)
    loss = loss_fn(pred, target)
    
    # 3) Print current prediction & loss
    print(f"Step {step} — pred: {pred}, loss: {loss}")

    # 4) Backward pass (compute ∂L/∂W in each layer, stored in .grad)
    grad_loss = loss_fn.backward()
    net.backward(grad_loss)

    # 5) Update parameters
    optimizer.step()

Step 0 — pred: [[1.09096374]], loss: 0.00827440160326588
Step 1 — pred: [[1.0770202]], loss: 0.005932110488498562
Step 2 — pred: [[1.06325652]], loss: 0.004001387753709555
Step 3 — pred: [[1.04977698]], loss: 0.0024777473142753408
Step 4 — pred: [[1.03671444]], loss: 0.0013479499306883033
Step 5 — pred: [[1.02423392]], loss: 0.0005872828335342635
Step 6 — pred: [[1.012532]], loss: 0.00015705110531867233
Step 7 — pred: [[1.00182879]], loss: 3.3444906198828073e-06
Step 8 — pred: [[0.99234958]], loss: 5.8528917951390226e-05
Step 9 — pred: [[0.98429699]], loss: 0.00024658467892490257
Step 10 — pred: [[0.97782029]], loss: 0.0004919397133732196
Step 11 — pred: [[0.97299261]], loss: 0.0007293992532504533
Step 12 — pred: [[0.96980429]], loss: 0.0009117809551820699
Step 13 — pred: [[0.9681727]], loss: 0.0010129770138910259
Step 14 — pred: [[0.96796162]], loss: 0.0010264580270945656
Step 15 — pred: [[0.96900177]], loss: 0.0009608900215214957
Step 16 — pred: [[0.97110723]], loss: 0.00083479231010

In [None]:
# --- Dummy input and binary target ---
x = np.array([[1.0, 2.0, 3.0]])
target = np.array([[1.0]])  

net = MLP(input_dim=3, hidden_dims=[4, 4], output_dim=1, activation='sigmoid')
loss_fn = BCELoss()
optimizer = SGD(net.parameters(), lr=0.001)

for step in range(20):
    # 1) Zero out all gradients
    optimizer.zero_grad()

    # 2) Forward pass
    pred = net(x)
    loss = loss_fn(pred, target)

    # 3) Print current prediction & loss
    print(f"Step {step} — pred: {pred}, loss: {loss}")

    # 4) Backward pass
    grad_loss = loss_fn.backward()
    net.backward(grad_loss)

    # 5) Update parameters
    optimizer.step()

Step 0 — pred: [[0.44363045]], loss: 0.8127633578290376
Step 1 — pred: [[0.4439937]], loss: 0.8119448883576643
Step 2 — pred: [[0.44435676]], loss: 0.8111275048323091
Step 3 — pred: [[0.44471964]], loss: 0.8103112058631455
Step 4 — pred: [[0.44508233]], loss: 0.8094959900611297
Step 5 — pred: [[0.44544483]], loss: 0.8086818560380049
Step 6 — pred: [[0.44580715]], loss: 0.8078688024063062
Step 7 — pred: [[0.44616928]], loss: 0.8070568277793634
Step 8 — pred: [[0.44653122]], loss: 0.8062459307713071
Step 9 — pred: [[0.44689298]], loss: 0.8054361099970702
Step 10 — pred: [[0.44725455]], loss: 0.8046273640723951
Step 11 — pred: [[0.44761593]], loss: 0.8038196916138357
Step 12 — pred: [[0.44797712]], loss: 0.8030130912387622
Step 13 — pred: [[0.44833813]], loss: 0.8022075615653647
Step 14 — pred: [[0.44869894]], loss: 0.8014031012126585
Step 15 — pred: [[0.44905957]], loss: 0.8005997088004865
Step 16 — pred: [[0.44942001]], loss: 0.7997973829495244
Step 17 — pred: [[0.44978025]], loss: 0.79