# MLP in PyTorch - Architecture & Assumptions

**Architecture:** Input(4) -> Hidden1(64, ReLU) -> Hidden2(32, ReLU) -> Output(3).

**Loss:** Cross-entropy loss (nn.CrossEntropyLoss).

**Training:** 5 epochs on dummy data (100 samples).

**Evaluation metric:** Accuracy computed from softmax probabilities (for reporting).

**Assumptions / Notes:**
- We keep the model output as raw logits because `nn.CrossEntropyLoss` expects logits (it applies LogSoftmax internally).
- For reporting / predictions we apply `softmax` to the logits to obtain class probabilities and then take `argmax` to get predicted labels.
- The notebook uses randomly generated dummy data. Replace `inputs` and `targets` with your dataset when available.

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

# Reproducibility (optional)
torch.manual_seed(0)

# 1. Define the Architecture
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        # Input Layer (4) -> First Hidden Layer (64)
        self.hidden1 = nn.Linear(4, 64)
        # First Hidden (64) -> Second Hidden Layer (32)
        self.hidden2 = nn.Linear(64, 32)
        # Second Hidden (32) -> Output Layer (3)
        self.output = nn.Linear(32, 3)
        
        # Activation Function
        self.relu = nn.ReLU()

    def forward(self, x):
        # Pass through first layer and apply ReLU
        x = self.relu(self.hidden1(x))
        # Pass through second layer and apply ReLU
        x = self.relu(self.hidden2(x))
        # Output layer returns raw logits (CrossEntropyLoss expects logits)
        x = self.output(x)
        return x

# 2. Initialize Model, Loss, and Optimizer
model = MLP()
criterion = nn.CrossEntropyLoss()  # Combines LogSoftmax and NLLLoss
optimizer = optim.Adam(model.parameters(), lr=0.01)

# 3. Generate Dummy Data (Assumption)
# 100 samples, 4 features each
inputs = torch.randn(100, 4)
# 100 target labels (classes 0, 1, or 2)
targets = torch.randint(0, 3, (100,), dtype=torch.long)

# 4. Training Loop (5 Epochs)
num_epochs = 5
softmax = nn.Softmax(dim=1)  # for reporting / predictions only

print("Starting Training...")
for epoch in range(num_epochs):
    # --- Forward Pass ---
    outputs = model(inputs)  # logits
    loss = criterion(outputs, targets)

    # --- Backward Pass & Optimization ---
    optimizer.zero_grad()  # Clear previous gradients
    loss.backward()        # Compute gradients
    optimizer.step()       # Update weights

    # --- Evaluation Metric: Accuracy ---
    # Apply softmax to get probabilities (for reporting only)
    probs = softmax(outputs)
    _, predicted = torch.max(probs, 1)

    # Calculate accuracy: mean of correct predictions
    accuracy = (predicted == targets).float().mean().item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, Accuracy: {accuracy*100:.2f}%')

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

# 1. Define the MLP Architecture
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        # Input Layer (4 features) -> First Hidden Layer (64 neurons)
        self.hidden1 = nn.Linear(4, 64)
        # First Hidden (64) -> Second Hidden Layer (32 neurons)
        self.hidden2 = nn.Linear(64, 32)
        # Second Hidden (32) -> Output Layer (3 neurons)
        self.output = nn.Linear(32, 3)
        
        # Activation function defined once to be reused
        self.relu = nn.ReLU()

    def forward(self, x):
        # Pass through first layer and apply ReLU
        x = self.relu(self.hidden1(x))
        # Pass through second layer and apply ReLU
        x = self.relu(self.hidden2(x))
        # Pass through output layer
        # Note: We return raw logits here because nn.CrossEntropyLoss 
        # applies Softmax internally.
        x = self.output(x)
        return x

# 2. Model Initialization
model = MLP()

# Loss Function: CrossEntropyLoss (Includes Softmax internally)
criterion = nn.CrossEntropyLoss()

# Optimizer: Adam
optimizer = optim.Adam(model.parameters(), lr=0.01)

# 3. Create Dummy Data (Assumption)
# 100 samples, 4 features each
inputs = torch.randn(100, 4)
# 100 random target labels (classes 0, 1, or 2)
targets = torch.randint(0, 3, (100,))

# 4. Training Loop (5 Epochs)
num_epochs = 5

print("Starting Training...")
for epoch in range(num_epochs):
    # --- Forward Pass ---
    outputs = model(inputs)
    loss = criterion(outputs, targets)
    
    # --- Backward Pass & Optimization ---
    optimizer.zero_grad()  # Reset gradients to zero
    loss.backward()        # Compute gradients (backpropagation)
    optimizer.step()       # Update weights
    
    # --- Evaluation Metric: Accuracy ---
    # torch.max returns (values, indices). We need indices for the class ID.
    _, predicted = torch.max(outputs, 1)
    
    # Calculate accuracy: Correct Predictions / Total Samples
    correct = (predicted == targets).sum().item()
    accuracy = correct / targets.size(0)
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, Accuracy: {accuracy*100:.2f}%')

Starting Training...
Epoch [1/5], Loss: 1.0848, Accuracy: 44.00%
Epoch [2/5], Loss: 1.0557, Accuracy: 42.00%
Epoch [3/5], Loss: 1.0332, Accuracy: 46.00%
Epoch [4/5], Loss: 1.0116, Accuracy: 48.00%
Epoch [5/5], Loss: 0.9938, Accuracy: 49.00%
