<a href="https://colab.research.google.com/github/ketanp23/sit-neuralnetworks-class/blob/main/Heurestics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Let's train a simple net on XOR (from last time) with heuristics: He init, Adam, BN, dropout.

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

# Data
X = torch.tensor([[0,0], [0,1], [1,0], [1,1]], dtype=torch.float)
y = torch.tensor([[0], [1], [1], [0]], dtype=torch.float)

class ImprovedMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(2, 4)
        self.bn = nn.BatchNorm1d(4)
        self.dropout = nn.Dropout(0.2)
        self.out = nn.Linear(4, 1)
        self.apply(self.init_weights)

    def init_weights(self, m):
        if isinstance(m, nn.Linear):
            nn.init.kaiming_normal_(m.weight, nonlinearity='relu')

    def forward(self, x):
        x = torch.relu(self.bn(self.hidden(x)))
        x = self.dropout(x)
        return torch.sigmoid(self.out(x))

model = ImprovedMLP()
optimizer = optim.Adam(model.parameters(), lr=0.01)
criterion = nn.BCELoss()

for epoch in range(5000):
    optimizer.zero_grad()
    output = model(X)
    loss = criterion(output, y)
    loss.backward()
    optimizer.step()

# Predictions (approximate after training)
with torch.no_grad():
    pred = model(X).round()
    print(pred)  # Should be close to [[0], [1], [1], [0]]

tensor([[0.],
        [1.],
        [1.],
        [0.]])


These tweaks make training faster and more robust!