# Discovering activation functions between layers

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

import matplotlib.pyplot as plt
import numpy as np

ModuleNotFoundError: No module named 'matplotlib'

## Implementing ReLU

In [None]:
relu_pytorch = nn.ReLU()

x = torch.tensor(-1.0, requires_grad=True)
y = relu_pytorch(x)
y.backward()

gradient = x.grad
print(gradient)

tensor(0.)


## Implementing leaky ReLU

In [None]:
leaky_relu_pytorch = nn.LeakyReLU(negative_slope = 0.05)

x = torch.tensor(-2.0)

output = leaky_relu_pytorch(x)
print(output)

tensor(-0.1000)


# A deeper dive into neural network architecture

## Counting the number of parameters

In [None]:
model = nn.Sequential(
  nn.Linear(16, 4),
  nn.Linear(4, 2),
  nn.Linear(2, 1))

def calculate_capacity(model):
  total = 0

  for parameters in model.parameters():
    total += parameters.numel()

  return total

print(calculate_capacity(model))

81


## Manipulating the capacity of a network

In [None]:
n_features = 8
n_classes = 2

input_tensor = torch.Tensor([[3, 4, 6, 2, 3, 6, 8, 9]])

model = nn.Sequential(
  nn.Linear(n_features, 20),
  nn.Linear(20, 10),
  nn.Linear(10, 6),
  nn.Linear(6, n_classes))

output = model(input_tensor)

print(calculate_capacity(model))

470


# Learning rate and momentum

## Experimenting with learning rate

## Experimenting with momentum

# Layer initialization and transfer learning

## Freeze layers of a model

In [2]:
model = nn.Sequential(
  nn.Linear(20, 10),
  nn.Linear(10, 6),
  nn.Linear(6, 2))

for name, param in model.named_parameters():    
    if name == '0.weight' or name == '0.bias':
        param.requires_grad = False
    if name == '1.weight' or name == '1.bias':
        param.requires_grad = False

## Layer initialization

In [3]:
layer0 = nn.Linear(16, 32)
layer1 = nn.Linear(32, 64)

nn.init.uniform_(layer0.weight)
nn.init.uniform_(layer1.weight)

model = nn.Sequential(layer0, layer1)