<a href="https://colab.research.google.com/github/mspatke/Deep-Learning-with-Pytorch/blob/main/14_L1_Regularization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import os
import torch
import torch.nn as nn
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader
from torchvision import transforms

In [5]:
class MLP(nn.Module):
  '''
    Multilayer Perceptron.
  '''
  def __init__(self):
    super().__init__()
    self.layers = nn.Sequential(
      nn.Flatten(),
      nn.Linear(28 * 28 * 1, 64),
      nn.ReLU(),
      nn.Linear(64, 32),
      nn.ReLU(),
      nn.Linear(32, 10)
    )


  def forward(self, x):
    '''Forward pass'''
    return self.layers(x)
  
  def compute_l1_loss(self, w):
    return torch.abs(w).sum()

In [6]:
if __name__ == '__main__':
  
  # Set fixed random number seed
  torch.manual_seed(42)
  
  # Prepare CIFAR-10 dataset
  dataset = MNIST(os.getcwd(), download=True, transform=transforms.ToTensor())
  trainloader = torch.utils.data.DataLoader(dataset, batch_size=10, shuffle=True, num_workers=1)
  
  # Initialize the MLP
  mlp = MLP()
  
  # Define the loss function and optimizer
  loss_function = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(mlp.parameters(), lr=1e-4)
  
  # Run the training loop
  for epoch in range(0, 5): # 5 epochs at maximum
    
    # Print epoch
    print(f'Starting epoch {epoch+1}')
    
    # Iterate over the DataLoader for training data
    for i, data in enumerate(trainloader, 0):
      
      # Get inputs
      inputs, targets = data
      
      # Zero the gradients
      optimizer.zero_grad()
      
      # Perform forward pass
      outputs = mlp(inputs)
      
      # Compute loss
      loss = loss_function(outputs, targets)
      
      # Compute L1 loss component
      l1_weight = 1.0
      l1_parameters = []
      for parameter in mlp.parameters():
          l1_parameters.append(parameter.view(-1))
      l1 = l1_weight * mlp.compute_l1_loss(torch.cat(l1_parameters))
      
      # Add L1 loss component
      loss += l1
      
      # Perform backward pass
      loss.backward()
      
      # Perform optimization
      optimizer.step()
      
      # Print statistics
      minibatch_loss = loss.item()
      if i % 500 == 499:
          print('Loss after mini-batch %5d: %.5f (of which %.5f L1 loss)' %
                (i + 1, minibatch_loss, l1))
          current_loss = 0.0

  # Process is complete.
  print('Training process has finished.')

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to /content/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting /content/MNIST/raw/train-images-idx3-ubyte.gz to /content/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to /content/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting /content/MNIST/raw/train-labels-idx1-ubyte.gz to /content/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to /content/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting /content/MNIST/raw/t10k-images-idx3-ubyte.gz to /content/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to /content/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting /content/MNIST/raw/t10k-labels-idx1-ubyte.gz to /content/MNIST/raw

Starting epoch 1
Loss after mini-batch   500: 65.46844 (of which 63.18564 L1 loss)
Loss after mini-batch  1000: 13.96767 (of which 11.65208 L1 loss)
Loss after mini-batch  1500: 3.68786 (of which 1.38293 L1 loss)
Loss after mini-batch  2000: 2.93956 (of which 0.63697 L1 loss)
Loss after mini-batch  2500: 2.93945 (of which 0.63686 L1 loss)
Loss after mini-batch  3000: 2.94011 (of which 0.63752 L1 loss)
Loss after mini-batch  3500: 2.93958 (of which 0.63699 L1 loss)
Loss after mini-batch  4000: 2.93800 (of which 0.63541 L1 loss)
Loss after mini-batch  4500: 2.94033 (of which 0.63774 L1 loss)
Loss after mini-batch  5000: 2.94021 (of which 0.63762 L1 loss)
Loss after mini-batch  5500: 2.93896 (of which 0.63638 L1 loss)
Loss after mini-batch  6000: 2.93839 (of which 0.63581 L1 loss)
Starting epoch 2
Loss after mini-batch   500: 2.93858 (of which 0.63600 L1 loss)
Loss after mini-batch  1000: 2.93701 (of which 0.63