In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

Imports:
- torch.nn as nn [PyTorch docs :torch.nn ](https://docs.pytorch.org/docs/stable/nn.html)
- torch.optim [PyTorch docs](https://docs.pytorch.org/docs/stable/optim.html#module-torch.optim)
    - a package implementing various optimization algorithms.

Containers :
- nn.Module : Base class for all neural network modules.
- nn.Sequential() : [pytorch doc :Sequential](https://docs.pytorch.org/docs/stable/generated/torch.nn.Sequential.html#torch.nn.Sequential)

Non-Linear Activations :
- nn.ReLU
- nn.Sigmoid

Loss Functions :
- nn.BCELoss() : Creates a criterion that measures the Binary Cross Entropy between the target and the input probabilities:
- nn.BCEWithLogitsLoss() : This loss combines a Sigmoid layer and the BCELoss in one single class.

# torch.tensor([])

```
X = torch.tensor([
X = np.array([                      
    [0, 0],
    [0, 1],
    [1, 0],
    [1, 1]
], dtype=np.float32)

y = torch.tensor([
y = np.array([
    [0],
    [1],
    [1],
    [0]
], dtype=np.float32)
```

In [None]:
# data
X = torch.tensor([
    [0., 0.],
    [0., 1.],
    [1., 0.],
    [1., 1.]
])

y = torch.tensor([
    [0.],
    [1.],
    [1.],
    [0.]
])

Since the BCEWithLogitsLoss() has a built in sigmoid layer we can leave the sigmoid out of the model. 

In [None]:
class XORNet_simple(nn.Module):  #nn.Module Base class for all neural network modules
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(2, 3),     # Input → Hidden
            nn.ReLU(),
            nn.Linear(3, 1),     # Hidden → Output
            #nn.Sigmoid()
        )

    def forward(self, x):
        return self.net(x)
    

model = XORNet_simple()
print(model)
print()
print(model(X))


In [None]:
loss_fn = nn.BCEWithLogitsLoss()  # stable version of BCE
optimizer = optim.SGD(model.parameters(), lr=0.1)  # matches your scratch trainer
print(optimizer.param_groups)

In [None]:
epochs = 3000

for epoch in range(epochs):
    optimizer.zero_grad()

    output = model(X)
    loss = loss_fn(output, y)

    loss.backward()
    optimizer.step()
    
    if epoch % 200 == 0:
        print(f"Epoch {epoch}: Loss = {loss.item():.6f}")


In [None]:
for name, param in model.named_parameters():
    print(f"name {name} : params: {param.data}")

In [None]:
with torch.no_grad():
    logits = model(X)
    preds = torch.sigmoid(logits)

    print("\nPredictions:")
    for inp, pred in zip(X, preds):
        print(f"Input: {inp.tolist()} -> Prediction: {pred.item():.4f}")