# Linear Layer

In [1]:
import torch

## Raw Linear Layer

$$\begin{gathered}
y=x\cdot{W}+b, \\
\text{where }x\in\mathbb{R}^{N\times{n}}\text{, }y\in\mathbb{R}^{N\times{m}}. \\
\\
\text{Thus, }W\in\mathbb{R}^{n\times{m}}\text{ and }b\in\mathbb{R}^m.
\end{gathered}$$

In [2]:
W = torch.FloatTensor([[1, 2],
                       [3, 4],
                       [5, 6]])
b = torch.FloatTensor([2, 2])

In [3]:
print(W.size()) ## nxm n(feature number)
print(b.size())

torch.Size([3, 2])
torch.Size([2])


In [4]:
def linear(x, W, b):  # x-data, W,b-parameter 
    y = torch.matmul(x, W) + b
    
    return y

In [5]:
x = torch.FloatTensor([[1, 1, 1],
                       [2, 2, 2],
                       [3, 3, 3],
                       [4, 4, 4]])
print(x.size())

torch.Size([4, 3])


In [7]:
y = linear(x, W, b)

In [8]:
print(y.size())

torch.Size([4, 2])


---
# nn.Module

In [9]:
import torch.nn as nn

In [20]:
class MyLinear(nn.Module):
    
    def __init__(self, input_dim=5, output_dim=1):
        self.input_dim = input_dim
        self.output_dim = output_dim
        
        super().__init__()
        
        self.W = torch.FloatTensor(input_dim, output_dim)
        self.b = torch.FloatTensor(output_dim)
        
    ## You should override 'forward' method to implement detail.
    ## The input arguments and outputs can be designed as you wish.
    def forward(self, x):
        ## |x| = (batch_size, input_dim)
        y = torch.matmul(x, self.W) + self.b
        ## |y| = (batch_size, input_dim) * (input_dim, output_dim)
        ##     = (batch_size, output_dim)
        
        return y

In [21]:
x = torch.FloatTensor([[1, 1, 1, 1, 1],
                       [2, 2, 2, 2, 2],
                       [3, 3, 3, 3, 3]])

In [22]:
print(x.size())

torch.Size([3, 5])


In [23]:
linear = MyLinear()

In [25]:
y = linear(x)

In [26]:
print(y.size())

torch.Size([3, 1])


In [27]:
linear.parameters()

<generator object Module.parameters at 0x7ff7a63f3970>

In [29]:
for p in linear.parameters():
    print(p)
    
## You can see that there is no weight parameters to learn. Above way can 
## forward(or calculate) values, but it cannot be trained.

---
## Correct way: nn.Parameter

In [34]:
class MyLinear(nn.Module):
    
    def __init__(self, input_dim=4, output_dim=2):
        self.input_dim = input_dim
        self.output_dim = output_dim
        
        super().__init__()
        
        self.W = nn.Parameter(torch.FloatTensor(input_dim, output_dim))
        self.b = nn.Parameter(torch.FloatTensor(output_dim))
        
    def forward(self, x):
        # |x| = (batch_size, input_dim)
        y = torch.matmul(x, self.W) + self.b
        # |y| = (batch_size, input_dim) * (input_dim, output_dim)
        #     = (batch_size, output_dim)
        
        return y

In [35]:
x = torch.FloatTensor([[1, 1, 1, 1],
                       [2, 2, 2, 2],
                       [3, 3, 3, 3],
                       [4, 4, 4, 4],
                       [5, 5, 5, 5]])
print(x.size())

torch.Size([5, 4])


In [36]:
linear = MyLinear()

In [38]:
y = linear(x)

In [39]:
print(y)
print(y.size())

tensor([[1.3984e+35, 5.0487e-29],
        [2.7969e+35, 7.5731e-29],
        [4.1953e+35, 1.0097e-28],
        [5.5937e+35, 1.2622e-28],
        [6.9921e+35, 1.5146e-28]], grad_fn=<AddBackward0>)
torch.Size([5, 2])


In [41]:
for p in linear.parameters():
    print(p)
    
## W, b

Parameter containing:
tensor([[0.0000e+00, 2.5244e-29],
        [1.3984e+35, 2.8685e-42],
        [8.4078e-45, 0.0000e+00],
        [0.0000e+00, 0.0000e+00]], requires_grad=True)
Parameter containing:
tensor([0.0000e+00, 2.5244e-29], requires_grad=True)


---
## nn.Linear

In [42]:
x = torch.FloatTensor([[1, 1, 1],
                       [2, 2, 2],
                       [3, 3, 3],
                       [4, 4, 4],
                       [5, 5, 5]])
print(x.size())

torch.Size([5, 3])


In [44]:
linear = nn.Linear(3, 2)
y = linear(x)

In [45]:
print(y)
print(y.size())

tensor([[-0.7747,  0.3063],
        [-1.0327,  0.9135],
        [-1.2906,  1.5207],
        [-1.5485,  2.1279],
        [-1.8065,  2.7351]], grad_fn=<AddmmBackward>)
torch.Size([5, 2])


In [46]:
for p in linear.parameters():
    print(p)

Parameter containing:
tensor([[ 0.2039,  0.0039, -0.4658],
        [ 0.5749, -0.1423,  0.1746]], requires_grad=True)
Parameter containing:
tensor([-0.5168, -0.3010], requires_grad=True)


---
## nn.Module can contain other nn.Module's child classes.


In [47]:
class MyLinear(nn.Module):
    
    def __init__(self, input_dim=3, output_dim=2):
        self.input_dim = input_dim
        self.output_dim = output_dim
        
        super().__init__()
        
        self.linear = nn.Linear(input_dim, output_dim) # return Tensor
        
    def forward(self, x):
        # |x| = (batch_size, input_dim)
        y = self.linear(x)
        # |y| = (batch_size, output_dim)
        
        return y

In [55]:
linear = MyLinear(3, 2)

y = linear(x)

In [56]:
print(y)
print(y.size())

tensor([[-0.2935, -0.3266],
        [-0.6891, -0.7678],
        [-1.0847, -1.2089],
        [-1.4804, -1.6501],
        [-1.8760, -2.0912]], grad_fn=<AddmmBackward>)
torch.Size([5, 2])


In [61]:
for p in linear.parameters():
    print(p)

Parameter containing:
tensor([[-0.4285,  0.0406, -0.0077],
        [ 0.4476, -0.5698, -0.3189]], requires_grad=True)
Parameter containing:
tensor([0.1022, 0.1145], requires_grad=True)
