In [1]:
# Laurent LEQUIEVRE
# Research Engineer, CNRS (France)
# Institut Pascal UMR6602
# laurent.lequievre@uca.fr

A REMINDER : CLASS torch.nn.Linear(in_features, out_features, bias=True)

Applies a linear transformation to the incoming data: y = x*W^T + b

Parameters:

in_features -> size of each input sample (i.e. size of x)

out_features -> size of each output sample (i.e. size of y)

bias -> If set to False, the layer will not learn an additive bias. Default: True
    
Note that the weights W have shape (out_features, in_features) and biases b have shape (out_features). 
They are initialized randomly and can be changed later 
(e.g. during the training of a Neural Network they are updated by some optimization algorithm).

In [2]:
import torch
import torch.nn as nn

In [3]:
# Concrete example of nn.Linear
# Create a tensor x of size 3 x 2
# Where x contains three inputs (i.e. the batch size is 3), x[0], x[1] and x[3], each of size 2
x = torch.tensor([[1.0, -1.0],
                  [0.0,  1.0],
                  [0.0,  0.0]])

in_features = x.shape[1]  # = 2
out_features = 5

m = nn.Linear(in_features, out_features)

y = m(x)

# create a fully connected linear layer, which takes input x of shape (batch_size, in_features), 
# where batch size is the number of inputs (each of size in_features) 
# which are passed to the network at once (as a single tensor), 
# and transforms it by the linear equation y = x*W^T + b into a tensor y of shape (batch_size, out_features).

# Internal parameters :
# -> Weights W have shape (out_features, in_features) -> W(5,2)
# -> Biases b have shape (out_features) -> b(5)

# Output :
# y is going to be of shape (batch size, out_features) -> y(3, 5).

# and (behind the scenes) it is computed as:
# y = x.matmul(m.weight.t()) + m.bias  # y = x*W^T + b
# ==> y(3,5) = x(3,2) * W^T(2,5) + b(5)

In [4]:
# m.weight and m.bias were initialized randomly.
print(m.weight.size())
print(m.weight)

torch.Size([5, 2])
Parameter containing:
tensor([[ 0.0619, -0.1240],
        [ 0.3286, -0.3996],
        [-0.1813, -0.2766],
        [-0.4727,  0.4290],
        [-0.7036,  0.3730]], requires_grad=True)


In [5]:
print(m.bias.size())
print(m.bias)

torch.Size([5])
Parameter containing:
tensor([-0.1196,  0.3142, -0.4347,  0.2551, -0.6858], requires_grad=True)


In [6]:
# The output y is :
print(y.size())
print(y)

torch.Size([3, 5])
tensor([[ 0.0663,  1.0424, -0.3395, -0.6467, -1.7625],
        [-0.2436, -0.0855, -0.7112,  0.6841, -0.3128],
        [-0.1196,  0.3142, -0.4347,  0.2551, -0.6858]],
       grad_fn=<AddmmBackward>)


In [7]:
import torch
import torch.nn as nn  # define a shortcut 'nn' to avoid always use 'torch.nn'
import torch.nn.functional as F # define a shortcut 'F' to avoid always use 'torch.nn.functional' 

<img src="relu.png">

In [8]:
# Example 1 :
# =========
# A simple network : 2 Inputs, 3 outputs and 1 relu activation Function
# Defined step by step with a Linear Module

x = torch.tensor([1.0, -1.0]) # define a tensor torch.Size([2])

net = nn.Linear(2, 3) # define a Linear network with in_features=2, out_features=3, bias=True (by default)

print("input : {}".format(x))
print("--------------------------------")

print("Weight : {}".format(net.weight))
print("Bias : {}".format(net.bias))
print("--------------------------------")

output = net.forward(x) # forward tensor into the net and get the output
y = x.matmul(net.weight.t()) + net.bias # just to verify the equation : y = x*W^T + b

print("output : {}".format(output))
print("y : {}".format(y))
print("--------------------------------")

# ReLU: ReLU stands for Rectified Linear Unit. 
# It takes a real-valued input and thresholds it at zero (replaces negative values with zero)
# f(x) = max(0, x)
activation = F.relu(output) # compute activation of output

print("activation : {}".format(activation))

input : tensor([ 1., -1.])
--------------------------------
Weight : Parameter containing:
tensor([[ 0.2480, -0.6331],
        [-0.4280, -0.3598],
        [-0.6689,  0.4010]], requires_grad=True)
Bias : Parameter containing:
tensor([0.1330, 0.6197, 0.4933], requires_grad=True)
--------------------------------
output : tensor([ 1.0141,  0.5516, -0.5766], grad_fn=<AddBackward0>)
y : tensor([ 1.0141,  0.5516, -0.5766], grad_fn=<AddBackward0>)
--------------------------------
activation : tensor([1.0141, 0.5516, 0.0000], grad_fn=<ReluBackward0>)


In [9]:
# Example 2 :
# =========
# The same network defined with a Sequential container
# A sequential container. Modules will be added to it in the order they are passed in the constructor.

# nn.ReLU() creates an nn.Module which you can be added to an nn.Sequential model.
# nn.functional.relu is just the functional API call to the relu function.

x = torch.tensor([1.0, -1.0]) # define a tensor torch.Size([2])

net = nn.Sequential(
                    nn.Linear(2, 3),
                    nn.ReLU()
                    );

print("input : {}".format(x))
print("--------------------------------")

print("network structure : {}".format(net))

print("--------------------------------")
print("Weight : {}".format(net[0].weight))  # net parameters contained at index 0
print("Bias : {}".format(net[0].bias)) # net parameters contained at index 0
print("Activation function : {}".format(net[1]))
print("--------------------------------")

activation = net.forward(x) # forward tensor into the net and get the output
print("activation : {}".format(activation))

print("--------------------------------")
output = torch.matmul(x,net[0].weight.t()) + net[0].bias;  # Just to verify
print("internal output : {}".format(output))
print("ReLU(output) :{}".format(torch.nn.ReLU().forward(output))) # Just to verify


input : tensor([ 1., -1.])
--------------------------------
network structure : Sequential(
  (0): Linear(in_features=2, out_features=3, bias=True)
  (1): ReLU()
)
--------------------------------
Weight : Parameter containing:
tensor([[-0.5955, -0.6533],
        [-0.4863,  0.3000],
        [-0.3434,  0.2724]], requires_grad=True)
Bias : Parameter containing:
tensor([-0.3994, -0.2261, -0.1660], requires_grad=True)
Activation function : ReLU()
--------------------------------
activation : tensor([0., 0., 0.], grad_fn=<ReluBackward0>)
--------------------------------
internal output : tensor([-0.3416, -1.0125, -0.7817], grad_fn=<AddBackward0>)
ReLU(output) :tensor([0., 0., 0.], grad_fn=<ReluBackward0>)


In [10]:
# Example 3 :
# =========
# The same network defined as a custom Module subclass without Sequantial container

class NeuralNetwork(nn.Module):
    def __init__(self, n_inputs, n_outputs):
        super().__init__()  # Call parent nn.Module constructor
        self.l1 = nn.Linear(n_inputs, n_outputs)
        self.act1 = nn.ReLU()

    def forward(self, x):
        x = self.l1(x)
        x = self.act1(x)
        return x


    
model = NeuralNetwork(2, 3)
x = torch.tensor([1.0, -1.0]) # define a tensor torch.Size([2])
activation = model.forward(x)

print("input : {}".format(x))
print("--------------------------------")
print("Weight : {}".format(model.l1.weight)) 
print("Bias : {}".format(model.l1.bias))
print("Activation function : {}".format(model.act1))
print("--------------------------------")
print("activation : {}".format(activation))
print("--------------------------------")
output = torch.matmul(x,model.l1.weight.t()) + model.l1.bias;  # Just to verify
print("internal output : {}".format(output))
print("ReLU(output) :{}".format(torch.nn.ReLU().forward(output))) # Just to verify


input : tensor([ 1., -1.])
--------------------------------
Weight : Parameter containing:
tensor([[-0.1354,  0.0456],
        [-0.5058,  0.6184],
        [-0.5801,  0.7063]], requires_grad=True)
Bias : Parameter containing:
tensor([-0.6495,  0.6350,  0.2916], requires_grad=True)
Activation function : ReLU()
--------------------------------
activation : tensor([0., 0., 0.], grad_fn=<ReluBackward0>)
--------------------------------
internal output : tensor([-0.8306, -0.4893, -0.9948], grad_fn=<AddBackward0>)
ReLU(output) :tensor([0., 0., 0.], grad_fn=<ReluBackward0>)


In [11]:
# Example 4 :
# =========
# The same network defined as a custom Module subclass with Sequantial container

class NeuralNetworkSequential(nn.Module):
    def __init__(self, n_inputs, n_outputs):
        super().__init__()  # Call parent nn.Module constructor
        self.net = nn.Sequential(
                    nn.Linear(n_inputs, n_outputs),
                    nn.ReLU()
                    );

    def forward(self, x):
        x = self.net(x)
        return x


    
model = NeuralNetworkSequential(2, 3)

print("--------------------------------")
print("network structure : {}".format(net))
print("--------------------------------")

x = torch.tensor([1.0, -1.0]) # define a tensor torch.Size([2])
activation = model.forward(x)

print("input : {}".format(x))
print("--------------------------------")
print("Weight : {}".format(model.net[0].weight)) 
print("Bias : {}".format(model.net[0].bias))
print("Activation function : {}".format(model.net[1]))
print("--------------------------------")
print("activation : {}".format(activation))
print("--------------------------------")
output = torch.matmul(x,model.net[0].weight.t()) + model.net[0].bias;  # Just to verify
print("internal output : {}".format(output))
print("ReLU(output) :{}".format(torch.nn.ReLU().forward(output))) # Just to verify


--------------------------------
network structure : Sequential(
  (0): Linear(in_features=2, out_features=3, bias=True)
  (1): ReLU()
)
--------------------------------
input : tensor([ 1., -1.])
--------------------------------
Weight : Parameter containing:
tensor([[ 0.1587, -0.3943],
        [ 0.2709,  0.5950],
        [ 0.1807,  0.2263]], requires_grad=True)
Bias : Parameter containing:
tensor([ 0.5144, -0.2386, -0.3587], requires_grad=True)
Activation function : ReLU()
--------------------------------
activation : tensor([1.0673, 0.0000, 0.0000], grad_fn=<ReluBackward0>)
--------------------------------
internal output : tensor([ 1.0673, -0.5627, -0.4043], grad_fn=<AddBackward0>)
ReLU(output) :tensor([1.0673, 0.0000, 0.0000], grad_fn=<ReluBackward0>)


The basic unit of computation in a neural network is the neuron, often called node or unit.
A network is built from individual nodes. Each nodes has some number of weighted inputs (weight and bias). 
These weighted inputs are summed together (a linear combination) then passed through an activation function to get the node’s output.

Input Nodes :
- The Input nodes provide information from the outside world to the network and are together referred to as the “Input Layer”. 
- No computation is performed in any of the Input nodes. 
- They just pass on the information to the hidden nodes.

Hidden Nodes :
- The Hidden nodes have no direct connection with the outside world (hence the name “hidden”). 
- They perform computations and transfer information from the input nodes to the output nodes. 
- A collection of hidden nodes forms a “Hidden Layer”. 
- While a feedforward network will only have a single input layer and a single output layer, it can have zero or multiple Hidden Layers.

Output Nodes :
- The Output nodes are collectively referred to as the “Output Layer” and are responsible for computations and transferring information from the network to the outside world.


In [12]:
# An example of a Feed Forward Network : 3 Layers
# -> 2 Hidden Layers : 4 and 6 Nodes, and 1 Output Node

<img src="feed_forward_net_sigmoid.png">

In [19]:
# Sigmoid : takes a real-valued input and squashes it to range between 0 and 1
# σ(x) = 1 / (1 + exp(−x))

<img src="sigmoid.jpeg">

In [47]:
# Example 4 :
# =========
# An example of a Feed Forward Network : 3 Layers
# -> 2 Hidden Layers : 4 and 6 Nodes, and 1 Output Node

class FeedForwardSequential(nn.Module):
    def __init__(self):
        super().__init__()  # Call parent nn.Module constructor
        self.net = nn.Sequential(
                    nn.Linear(1,4),  # Hidden layer of 4 Nodes -> for input of 1 element (or batch of 1 element)
                    nn.Sigmoid(),
                    nn.Linear(4,6),  # Hidden layer of 6 Nodes
                    nn.Sigmoid(),
                    nn.Linear(6,1)  # Output layer of 1 Node -> for output of 1 element (or batch of 1 element)
                    );

    def forward(self, x):
        x = self.net(x)
        return x


# Function to calculate the nb parameters of a network model (parameters with a gradient)
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


model = FeedForwardSequential()

print("--------------------------------")
print("network structure : {}".format(model))
print("nb parameters : {}".format(count_parameters(model)))  # 45 parameters (1*4 + 4 + 4*6 + 6 + 6*1 +1)
print("--------------------------------")

x = torch.tensor([[1.0],
                  [2.0],
                  [3.0]
                 ]) # define a tensor torch.Size([3, 1]) (a batchsize 3 of 1 element)

activation = model.forward(x)

print("input : {} of size {}".format(x, x.size()))
print("--------------------------------")
print("[0] Weight : {}".format(model.net[0].weight)) 
print("[0] Bias : {}".format(model.net[0].bias))
print("--------------------------------")
print("[1] Activation function : {}".format(model.net[1]))
print("--------------------------------")
print("[2] Weight : {}".format(model.net[2].weight)) 
print("[2] Bias : {}".format(model.net[2].bias))
print("--------------------------------")
print("[3] Activation function : {}".format(model.net[3]))
print("--------------------------------")
print("[4] Weight : {}".format(model.net[2].weight)) 
print("[4] Bias : {}".format(model.net[2].bias))
print("--------------------------------")
print("activation : {} of size {}".format(activation, activation.size()))

# Another solution to calculate the number of model parameters
sum(p.numel() for p in model.parameters())
print(n)

--------------------------------
network structure : FeedForwardSequential(
  (net): Sequential(
    (0): Linear(in_features=1, out_features=4, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=4, out_features=6, bias=True)
    (3): Sigmoid()
    (4): Linear(in_features=6, out_features=1, bias=True)
  )
)
nb parameters : 45
--------------------------------
input : tensor([[1.],
        [2.],
        [3.]]) of size torch.Size([3, 1])
--------------------------------
[0] Weight : Parameter containing:
tensor([[ 0.2679],
        [-0.4267],
        [ 0.3336],
        [-0.7076]], requires_grad=True)
[0] Bias : Parameter containing:
tensor([-0.8901,  0.0423, -0.2154,  0.1240], requires_grad=True)
--------------------------------
[1] Activation function : Sigmoid()
--------------------------------
[2] Weight : Parameter containing:
tensor([[ 0.4293, -0.4396, -0.3325, -0.1437],
        [ 0.3676,  0.3900, -0.1149,  0.0108],
        [-0.0282, -0.2403, -0.1280,  0.4183],
        [ 0.0581,

In [52]:
print(torch.linspace(-1, 1, 100))

x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1)
print(x)

tensor([-1.0000, -0.9798, -0.9596, -0.9394, -0.9192, -0.8990, -0.8788, -0.8586,
        -0.8384, -0.8182, -0.7980, -0.7778, -0.7576, -0.7374, -0.7172, -0.6970,
        -0.6768, -0.6566, -0.6364, -0.6162, -0.5960, -0.5758, -0.5556, -0.5354,
        -0.5152, -0.4949, -0.4747, -0.4545, -0.4343, -0.4141, -0.3939, -0.3737,
        -0.3535, -0.3333, -0.3131, -0.2929, -0.2727, -0.2525, -0.2323, -0.2121,
        -0.1919, -0.1717, -0.1515, -0.1313, -0.1111, -0.0909, -0.0707, -0.0505,
        -0.0303, -0.0101,  0.0101,  0.0303,  0.0505,  0.0707,  0.0909,  0.1111,
         0.1313,  0.1515,  0.1717,  0.1919,  0.2121,  0.2323,  0.2525,  0.2727,
         0.2929,  0.3131,  0.3333,  0.3535,  0.3737,  0.3939,  0.4141,  0.4343,
         0.4545,  0.4747,  0.4949,  0.5152,  0.5354,  0.5556,  0.5758,  0.5960,
         0.6162,  0.6364,  0.6566,  0.6768,  0.6970,  0.7172,  0.7374,  0.7576,
         0.7778,  0.7980,  0.8182,  0.8384,  0.8586,  0.8788,  0.8990,  0.9192,
         0.9394,  0.9596,  0.9798,  1.00