<a href="https://colab.research.google.com/github/kongwanbianjinyu/Deep-Learning-Tutorial/blob/main/Activation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
## Standard libraries
import os
import json
import math
import numpy as np

## Imports for plotting
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('svg', 'pdf') # For export
import seaborn as sns
sns.set()

## Progress bar
from tqdm.notebook import tqdm

## PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
import torch.optim as optim

In [None]:
# save dataset
DATASET_PATH = "drive/MyDrive/deep learning tutorial/data"
# save trained model weights
CHECKPOINT_PATH = "drive/MyDrive/deep learning tutorial/saved_models/Activation"

# Function for setting the seed
def set_seed(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available(): # GPU operation have separate seed
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
set_seed(42)

# Additionally, some operations on a GPU are implemented stochastic for efficiency
# We want to ensure that all operations are deterministic on GPU (if used) for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Fetching the device that will be used throughout this notebook
device = torch.device("cpu") if not torch.cuda.is_available() else torch.device("cuda:0")
print("Using device", device)

Using device cuda:0


# Download Models

In [None]:
# if not exist, create checkpoint path
os.makedirs(CHECKPOINT_PATH, exist_ok=True)


In [None]:
import urllib.request
from urllib.error import HTTPError

# Github URL where saved models are stored for this tutorial
base_url = "https://raw.githubusercontent.com/phlippe/saved_models/main/tutorial3/"
# Files to download
pretrained_files = ["FashionMNIST_elu.config", "FashionMNIST_elu.tar",
                    "FashionMNIST_leakyrelu.config", "FashionMNIST_leakyrelu.tar",
                    "FashionMNIST_relu.config", "FashionMNIST_relu.tar",
                    "FashionMNIST_sigmoid.config", "FashionMNIST_sigmoid.tar",
                    "FashionMNIST_swish.config", "FashionMNIST_swish.tar",
                    "FashionMNIST_tanh.config", "FashionMNIST_tanh.tar"]

In [None]:
# For each file, check whether it already exists. If not, try downloading it.
for file_name in pretrained_files:
    file_path = os.path.join(CHECKPOINT_PATH, file_name)
    if not os.path.isfile(file_path):
        file_url = base_url + file_name
        print(f"Downloading {file_url}...")
        try:
            urllib.request.urlretrieve(file_url, file_path)
        except HTTPError as e:
            print("Something went wrong. Please try to download the file from the GDrive folder")

# Activation Function Layer

In [None]:
class ActivationFunction(nn.Module):
  def __init__(self):
    super().__init__()
    self.name = self.__class__.__name__
    self.config = {"name":self.name}

In [None]:
class Sigmoid(ActivationFunction):
  def forward(self, x):
    return 1 / (1 + torch.exp(-x))

In [None]:
class LeakyReLU(ActivationFunction):
  def __init__(self,alpha = 0.1):
    super().__init__()
    self.config["alpha"] = alpha
  def forward(self,x):
    # torch.where(condition,x,y) if x_i satisfy condition: index i get x_i else y_i
    return torch.where(x > 0, x ,self.config["alpha"] * x)

In [None]:
act_func1 = Sigmoid()
print(act_func1.name)
print(act_func1.config)

act_func2 = LeakyReLU()
print(act_func2.name)
print(act_func2.config)

Sigmoid
{'name': 'Sigmoid'}
LeakyReLU
{'name': 'LeakyReLU', 'alpha': 0.1}


In [None]:
activation_function_dict = {"sigmoid":Sigmoid,"leakyrelu":LeakyReLU}

# Base Network 

## Model

In [None]:
# basic linear network for classifying images(as 1D tensor) to 10 classes
class BaseNetwork(nn.Module):
  def __init__(self,act_func, input_size = 784, num_class = 10, hidden_sizes = [512, 256, 256, 128]):
    super().__init__()

    # layers list
    layers = []
    layer_sizes = [input_size] + hidden_sizes
    for i in range(len(layer_sizes) - 1):
      layers += [nn.Linear(layer_sizes[i],layer_sizes[i+1]),act_func]
    layers += [nn.Linear(layer_sizes[-1],num_class)]

    # * unpacks the list into positional arguments, nn.Sequential make them as a single module
    # ** unpacks the dictionary into key-word arguments
    self.layers = nn.Sequential(*layers)

    # save hyperparemeters to a dictionary: config
    self.config = {"activation": act_func.config, "input_size": input_size, "num_class": num_class," hidden_sizes": hidden_sizes}
  
  def forward(self, x):
    x = x.view(x.size(0),-1) # reshape image to 1D tensor
    return self.layers(x)



In [None]:
def save_model(model, model_path, model_name):
  config_dict = model.config
  os.makedirs(model_path,exist_ok = True)
  config_file = os.path.join(model_path, model_name + ".config")
  model_file = os.path.join(model_path, model_name + ".tar")
  # save config(hyperparameter) dict to file using json
  with open(config_file, "w") as f:
    json.dump(config_dict, f)
  # save model parameters(model.state_dict()) to file using torch.save()
  torch.save(model.state_dict(),model_file)

In [None]:
def load_model(model_path, model_name, net = None):
  config_file = os.path.join(model_path, model_name + ".config")
  model_file = os.path.join(model_path, model_name + ".tar")

  # load config(hyperparameter) from file using json
  with open(config_file, "r") as f:
    config_dict = json.load(f)
  
  
  if net is None:
    act_func_name = config_dict["activation"].pop("name").lower() # get activation function name
    act_func = activation_function_dict[act_func_name]()
    net = BaseNetwork(act_func = act_func, **config_dict)

  # load parameters(state_dict) from file using torch.load()
  # net.load_state_dict() can use state_dict() as initial parameter
  net.load_state_dict(torch.load(model_file, map_location = device))

  return net


## Dataset

In [46]:
import torchvision
from torchvision.datasets import FashionMNIST
from torchvision import transforms

# transforms apply on each image
# transforms.ToTensor() change [0,255] -> [0,1] float value: (C,H,W)
# transforms.Normalize() do: (pixel_value - mean) / std,  pixel_value: [0,1], normalize_pixel_value:((0-0.5)/0.5, (1-0.5)/0.5) = [-1,1]
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean = (0.5,),std = (0.5,))])

# train dataset, validation dataset
train_dataset = FashionMNIST(root = DATASET_PATH, train = True, transform = transform, download = True)
train_set, val_set = torch.utils.data.random_split(train_dataset, [50000,10000])

# test dataset
test_set = FashionMNIST(root = DATASET_PATH, train = False, transform = transform, download = True)

# data loader
train_loader = data.DataLoader(train_set, batch_size = 256, shuffle =True, drop_last = False, pin_memory=True)
val_loader = data.DataLoader(val_set, batch_size = 256, shuffle =False, drop_last = False)
test_loader = data.DataLoader(test_set, batch_size = 256, shuffle =False, drop_last = False)

# Train with different hyper-parameter

Train the network and save the model with best validating accuracy to files(different names for different networks).

Test the network by loading from files.

In [None]:
def train_model(net, model_name, train_loader, val_loader, test_loader, epoch_num = 50, overwrite = False):
  is_file_exist = os.path.isfile(os.path.join(CHECKPOINT_PATH, model_name + ".tar"))
  if (is_file_exist and not overwrite):
    print("Model exist, skipping training.")
  else:
    if is_file_exist:
      print("Model exist, overwriting model, start training...")

      optimizer = optim.SGD(net.parameters(), lr = 0.01, momentum=0.9)
      loss_func = nn.CrossEntropyLoss()

      val_acc_list = []
      best_epoch = -1
      for epoch in range(epoch_num):
        net.train()

        ########## Train ##########
        true_preds_num, all_preds_num = 0, 0
        for X, y in tqdm(train_loader, desc = f"Epoch {epoch + 1} : " ,leave = False):
          # X is image (N,1,H,W), y is label (N,)
          X = X.to(device)
          y = y.to(device)

          # forward and losss
          preds = net(X) # preds is (N,10)
          loss = loss_func(preds,y)

          # backward and updata parameter
          optimizer.zero_grad()
          loss.backward()
          optimizer.step()

          # statistics during training
          true_preds_num += (preds.argmax(dim = -1) == y).sum().item()
          all_preds_num += y.shape[0]
        # train accuracy
        train_acc = true_preds_num / all_preds_num

        ######## Validation ############

        # validate accuracy
        val_acc = eval_model(net, val_loader)
        val_acc_list.append(val_acc)
        print(f"[Epoch {epoch +1 : 2d}]: Train Accuarcy: {train_acc*100:5.2f}%, Validation Accuracy:{val_acc*100:5.2f}%")
      
        # save the model with the best validate accuracy
        if(len(val_acc_list) == 1 or (val_acc > val_acc_list[best_val_acc_epoch])):
          print("New best model accuracy, saving model...")

          save_model(net, CHECKPOINT_PATH, model_name)
          best_val_acc_epoch = epoch
      
  ############ Test ##########
  # load model
  load_model(CHECKPOINT_PATH,model_name, net = net)
  test_acc = eval_model(net,test_loader)

  print((f"Test Accuracy:{test_acc*100:5.2f}%").center(50, "=") + "\n")
  return test_acc




In [None]:
def eval_model(net, data_loader):
  net.eval()

  true_preds_num, all_preds_num = 0, 0
  for X,y in data_loader:
    X = X.to(device)
    y = y.to(device)

    with torch.no_grad():
      preds = net(X)
      true_preds_num += (preds.argmax(dim = -1) == y).sum().item()
      all_preds_num += y.shape[0]
  test_acc = true_preds_num / all_preds_num
  return test_acc

Try network with different activation functions, compare the accuracy on FashionMNIST test dataset.

In [None]:
for act_func_name, act_func in activation_function_dict.items():
  set_seed(42)
  network = BaseNetwork(act_func = act_func()).to(device)
  train_model(net = network, model_name = f"FashionMNIST_{act_func_name}", 
              train_loader = train_loader,val_loader = val_loader,test_loader = test_loader,overwrite= False)
  
  

Model exist, skipping training.

Model exist, skipping training.



In [None]:
# Example : network: leakyReLU, train 5 epochs by yourself: set epoch_num = 5,overwrite = True
network = BaseNetwork(act_func = LeakyReLU()).to(device)
train_model(net = network, model_name = f"FashionMNIST_leakyrelu", 
             train_loader = train_loader,val_loader = val_loader,test_loader = test_loader, epoch_num = 5,overwrite= True)

Model exist, overwriting model, start training...


Epoch 1 :   0%|          | 0/196 [00:00<?, ?it/s]

[Epoch  1]: Train Accuarcy: 50.30%, Validation Accuracy:75.68%
New best model accuracy, saving model...


Epoch 2 :   0%|          | 0/196 [00:00<?, ?it/s]

[Epoch  2]: Train Accuarcy: 79.10%, Validation Accuracy:82.33%
New best model accuracy, saving model...


Epoch 3 :   0%|          | 0/196 [00:00<?, ?it/s]

[Epoch  3]: Train Accuarcy: 82.69%, Validation Accuracy:84.10%
New best model accuracy, saving model...


Epoch 4 :   0%|          | 0/196 [00:00<?, ?it/s]

[Epoch  4]: Train Accuarcy: 84.42%, Validation Accuracy:85.02%
New best model accuracy, saving model...


Epoch 5 :   0%|          | 0/196 [00:00<?, ?it/s]

[Epoch  5]: Train Accuarcy: 85.58%, Validation Accuracy:85.32%
New best model accuracy, saving model...



0.8443