# Linear Layer

In [1]:
import torch

$$\begin{gathered}
y=x\cdot{W}+b, \\
\text{where }x\in\mathbb{R}^{N\times{n}}\text{, }y\in\mathbb{R}^{N\times{m}}. \\
\\
\text{Thus, }W\in\mathbb{R}^{n\times{m}}\text{ and }b\in\mathbb{R}^m.
\end{gathered}$$

In [2]:
W = torch.FloatTensor([[1, 2],
                       [3, 4],
                       [5, 6]])
b = torch.FloatTensor([2, 2])

In [3]:
print(W.size())
print(b.size())

torch.Size([3, 2])
torch.Size([2])


In [4]:
def linear(x, W, b):
    y = torch.matmul(x, W) + b
    
    return y

In [5]:
x = torch.FloatTensor([[1, 1, 1],
                       [2, 2, 2],
                       [3, 3, 3],
                       [4, 4, 4]])

print(x.size())

torch.Size([4, 3])


In [6]:
y = linear(x, W, b)

In [7]:
print(y.size())

torch.Size([4, 2])


## nn.Linear

In [8]:
import torch.nn as nn

In [9]:
linear = nn.Linear(3, 2)

y = linear(x)

In [10]:
print(y.size())

torch.Size([4, 2])


In [11]:
for p in linear.parameters():
    print(p)

Parameter containing:
tensor([[-0.3220, -0.5204,  0.1594],
        [ 0.4107, -0.1088, -0.4184]], requires_grad=True)
Parameter containing:
tensor([-0.5699,  0.4515], requires_grad=True)


## nn.Module

In [12]:
class MyLinear(nn.Module):

    def __init__(self, input_dim=3, output_dim=2):
        self.input_dim = input_dim
        self.output_dim = output_dim
        
        super().__init__()
        
        self.W = torch.FloatTensor(input_dim, output_dim)
        self.b = torch.FloatTensor(output_dim)

    # You should override 'forward' method to implement detail.
    # The input arguments and outputs can be designed as you wish.
    def forward(self, x):
        # |x| = (batch_size, input_dim)
        y = torch.matmul(x, self.W) + self.b
        # |y| = (batch_size, input_dim) * (input_dim, output_dim)
        #     = (batch_size, output_dim)
        
        return y

In [13]:
linear = MyLinear(3, 2)

y = linear(x)

In [14]:
print(y.size())

torch.Size([4, 2])


In [15]:
for p in linear.parameters():
    print(p)

You can see that there is no weight parameters to learn.
Above way can forward(or calculate) values, but it cannot be trained.

### Correct way: nn.Parameter

In [16]:
class MyLinear(nn.Module):

    def __init__(self, input_dim=3, output_dim=2):
        self.input_dim = input_dim
        self.output_dim = output_dim
        
        super().__init__()
        
        self.W = nn.Parameter(torch.FloatTensor(input_dim, output_dim))
        self.b = nn.Parameter(torch.FloatTensor(output_dim))
        
    def forward(self, x):
        # |x| = (batch_size, input_dim)
        y = torch.matmul(x, self.W) + self.b
        # |y| = (batch_size, input_dim) * (input_dim, output_dim)
        #     = (batch_size, output_dim)
        
        return y

Reference: https://pytorch.org/docs/stable/nn.html#torch.nn.Parameter

A kind of Tensor that is to be considered a module parameter.

Parameters are Tensor subclasses, that have a very special property when used with Module s - when they’re assigned as Module attributes they are automatically added to the list of its parameters, and will appear e.g. in parameters() iterator. Assigning a Tensor doesn’t have such effect. This is because one might want to cache some temporary state, like last hidden state of the RNN, in the model. If there was no such class as Parameter, these temporaries would get registered too.

In [17]:
linear = MyLinear(3, 2)

y = linear(x)

In [18]:
print(y.size())

torch.Size([4, 2])


In [19]:
for p in linear.parameters():
    print(p)

Parameter containing:
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]], requires_grad=True)
Parameter containing:
tensor([0., 0.], requires_grad=True)


### nn.Module can contain other nn.Module's child classes.

In [20]:
class MyLinear(nn.Module):

    def __init__(self, input_dim=3, output_dim=2):
        self.input_dim = input_dim
        self.output_dim = output_dim
        
        super().__init__()
        
        self.linear = nn.Linear(input_dim, output_dim)
        
    def forward(self, x):
        # |x| = (batch_size, input_dim)
        y = self.linear(x)
        # |y| = (batch_size, output_dim)
        
        return y

In [21]:
linear = MyLinear(3, 2)

y = linear(x)

In [22]:
print(y.size())

torch.Size([4, 2])


In [23]:
for p in linear.parameters():
    print(p)

Parameter containing:
tensor([[ 0.3287, -0.1179,  0.3303],
        [-0.0360, -0.5105, -0.5319]], requires_grad=True)
Parameter containing:
tensor([-0.1162, -0.4966], requires_grad=True)
