In [1]:
import re
import torch
from torch.utils.data import DataLoader, Dataset
import random
import numpy as np
import pandas as pd
from torch.nn.utils.rnn import pad_sequence
import platform
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import random_split
from sklearn.model_selection import KFold

# Dropout
Dropout을 적용하고 싶은 레이어 전에 Dropout을 넣어줘야 함<br>
train시에는 꺼지지 않은 값이 2배가 됨

In [2]:
x = torch.Tensor([[1,2,3,4], [4,5,6,7]])
nn.Dropout(0.5).forward(x)

tensor([[0., 0., 0., 0.],
        [8., 0., 0., 0.]])

# .nonzero() 

In [3]:
torch.tensor([[0, 1, 0], [1, 1, 1]]).nonzero()

tensor([[0, 1],
        [1, 0],
        [1, 1],
        [1, 2]])

# nn.CrossEntropyLoss()
logsoftmax + nllloss이기 때문에 모델 안에 softmax레이어를 넣을 필요 없음<br>
target은 long이어야 하고 input은 데이터 개수 x output_dim(=카테고리개수)여야 함<br>
output은 데이터개수 차원이고 안의 내용은 카테고리 숫자여야함 <br>

In [4]:
>>> loss = nn.CrossEntropyLoss()
>>> input = torch.randn(3, 5, requires_grad=True)
>>> target = torch.empty(3, dtype=torch.long).random_(5)
>>> output = loss(input, target)
>>> output.backward()

In [5]:
target.shape, input.shape

(torch.Size([3]), torch.Size([3, 5]))

In [6]:
input

tensor([[-0.5201,  0.9689,  0.0953, -1.2881,  0.0179],
        [-0.5532, -1.0832, -1.5432, -2.9757, -0.6711],
        [ 1.0784,  0.6373,  0.2693,  0.7127,  0.7822]], requires_grad=True)

In [7]:
target

tensor([2, 0, 1])

# inplace operation

In [8]:
x.add_(1)
x

tensor([[2., 3., 4., 5.],
        [5., 6., 7., 8.]])

# repeat

In [9]:
>>> x = torch.tensor([1, 2, 3])
>>> x.repeat(4, 2)

tensor([[1, 2, 3, 1, 2, 3],
        [1, 2, 3, 1, 2, 3],
        [1, 2, 3, 1, 2, 3],
        [1, 2, 3, 1, 2, 3]])

In [10]:
torch.arange(0, 10).unsqueeze(0).repeat(3, 1)

tensor([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])

## permute
(*dims) → Tensor
Returns a view of the original tensor with its dimensions permuted.

Parameters
*dims (int...) – The desired ordering of dimensions

In [11]:

>>> x = torch.randn(2, 3, 5)
>>> x.size()
torch.Size([2, 3, 5])
>>> x.permute(2, 0, 1).size()
torch.Size([5, 2, 3])

torch.Size([5, 2, 3])


# product
- `*` : elementwise product 
- `@` : inner product or matmul 


In [65]:
v = torch.Tensor([1, 2, 3])
print(v.shape)

torch.Size([3])


In [66]:
v * v # hadamard product

tensor([1., 4., 9.])

In [67]:
v * v.T # hadamard product

tensor([1., 4., 9.])

In [68]:
v.T * v

tensor([1., 4., 9.])

In [69]:
v @ v # inner product

tensor(14.)

In [70]:
v.T @ v

tensor(14.)

In [71]:
v @ v.T

tensor(14.)

In [72]:
v.unsqueeze_(0)
v.shape

torch.Size([1, 3])

In [78]:
v + v.T # broad casting 

tensor([[2., 3., 4.],
        [3., 4., 5.],
        [4., 5., 6.]])

In [77]:
v * v.T # (1 x 3) * (3 x 1) -> broad casting 

tensor([[1., 2., 3.],
        [2., 4., 6.],
        [3., 6., 9.]])

In [79]:
v.T * v # (3 x 1) * (1 x 3) -> broad casting

tensor([[1., 2., 3.],
        [2., 4., 6.],
        [3., 6., 9.]])

In [76]:
v @ v.T # 1 x 3 @ 3 x 1 -> 1 x 1 

tensor([[14.]])

In [75]:
v @ v # 1 x 3 @ 1 x 3 -> caanot multiply!

RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x3 and 1x3)

# bmm
Performs a batch matrix-matrix product of matrices stored in input and mat2.

input and mat2 must be 3-D tensors each containing the same number of matrices.

In [12]:
input = torch.randn(10, 3, 4)
mat2 = torch.randn(10, 4, 5)
res = torch.bmm(input, mat2)
res.size()
torch.Size([10, 3, 5])

torch.Size([10, 3, 5])

# matmul
https://pytorch.org/docs/stable/generated/torch.matmul.html#torch.matmul

broadcasting된다는 것이 bmm과의 차이

In [13]:
input = torch.randn(10, 3, 4)
mat2 = torch.randn(10, 4, 5)
res = input.matmul(mat2)
res.size()

torch.Size([10, 3, 5])

In [14]:
>>> # batched matrix x broadcasted matrix
>>> tensor1 = torch.randn(10, 3, 4)
>>> tensor2 = torch.randn(4, 5)
>>> torch.matmul(tensor1, tensor2).size()

torch.Size([10, 3, 5])

In [15]:
torch.bmm(tensor1, tensor2)

RuntimeError: batch2 must be a 3D tensor

# tril

In [16]:
torch.tril(torch.Tensor(3, 4))

tensor([[ 4.4842e-44,  0.0000e+00,  0.0000e+00,  0.0000e+00],
        [-2.1006e+23,  3.0787e-41,  0.0000e+00,  0.0000e+00],
        [ 7.0065e-45,  0.0000e+00,  2.3354e-07,  0.0000e+00]])

# masked_fill

In [17]:
x = torch.arange(10)
mask = x < 5 

In [18]:
mask

tensor([ True,  True,  True,  True,  True, False, False, False, False, False])

In [19]:
x.masked_fill(mask, 3)

tensor([3, 3, 3, 3, 3, 5, 6, 7, 8, 9])

# layer norm

In [22]:
>>> input = torch.randn(3, 4)
>>> # With Learnable Parameters
>>> m = nn.LayerNorm(input.size()[1:])

In [23]:
input.shape, m(input).shape

(torch.Size([3, 4]), torch.Size([3, 4]))

In [24]:
m(input).mean(dim = 1)

tensor([-2.9802e-08, -2.2352e-08,  5.9605e-08], grad_fn=<MeanBackward1>)

In [25]:
m(input).std(dim = 1)

tensor([1.1547, 1.1546, 1.1547], grad_fn=<StdBackward0>)

# view vs. transpose

In [26]:
x = torch.arange(100)
x.view(2, 5, 10)

tensor([[[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
         [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
         [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
         [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
         [40, 41, 42, 43, 44, 45, 46, 47, 48, 49]],

        [[50, 51, 52, 53, 54, 55, 56, 57, 58, 59],
         [60, 61, 62, 63, 64, 65, 66, 67, 68, 69],
         [70, 71, 72, 73, 74, 75, 76, 77, 78, 79],
         [80, 81, 82, 83, 84, 85, 86, 87, 88, 89],
         [90, 91, 92, 93, 94, 95, 96, 97, 98, 99]]])

In [27]:
x = torch.arange(100)
x.view(5, 2, 10)

tensor([[[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
         [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]],

        [[20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
         [30, 31, 32, 33, 34, 35, 36, 37, 38, 39]],

        [[40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
         [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]],

        [[60, 61, 62, 63, 64, 65, 66, 67, 68, 69],
         [70, 71, 72, 73, 74, 75, 76, 77, 78, 79]],

        [[80, 81, 82, 83, 84, 85, 86, 87, 88, 89],
         [90, 91, 92, 93, 94, 95, 96, 97, 98, 99]]])

In [28]:
x = torch.arange(10)
xx = x.view(5, 2)

In [29]:
xx

tensor([[0, 1],
        [2, 3],
        [4, 5],
        [6, 7],
        [8, 9]])

In [30]:
xx.view(2, 5)

tensor([[0, 1, 2, 3, 4],
        [5, 6, 7, 8, 9]])

In [31]:
xx.transpose(1, 0)

tensor([[0, 2, 4, 6, 8],
        [1, 3, 5, 7, 9]])

# scatter
https://yuyangyy.medium.com/understand-torch-scatter-b0fd6275331c<br>
Writes all values from the tensor src into self at the indices specified in the index tensor. For each value in src, its output index is specified by its index in src for dimension != dim and by the corresponding value in index for dimension = dim.

In [32]:
src = torch.arange(1, 11).reshape((2, 5))
src

tensor([[ 1,  2,  3,  4,  5],
        [ 6,  7,  8,  9, 10]])

In [33]:
index = torch.tensor([[0, 1, 2, 0]])
index

tensor([[0, 1, 2, 0]])

In [34]:
torch.zeros(3, 5, dtype=src.dtype).shape, src.shape, index.shape

(torch.Size([3, 5]), torch.Size([2, 5]), torch.Size([1, 4]))

In [35]:
torch.zeros(3, 5, dtype=src.dtype).scatter_(0, index, src)

tensor([[1, 0, 0, 4, 0],
        [0, 2, 0, 0, 0],
        [0, 0, 3, 0, 0]])

In [36]:
# [1, 2, 3, 0, 0] -> [4, 2, 3, 0, 0]으로 된듯
torch.zeros(3, 5, dtype=src.dtype).scatter_(1, index, src)

tensor([[4, 2, 3, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])

In [37]:
src = torch.from_numpy(np.arange(1, 11)).float().view(2, 5)
print(src)

tensor([[ 1.,  2.,  3.,  4.,  5.],
        [ 6.,  7.,  8.,  9., 10.]])


In [38]:
input_tensor = torch.zeros(3, 5)
print(input_tensor)

tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])


In [39]:
index_tensor = torch.tensor([[0, 1, 2, 0, 0], [2, 0, 0, 1, 2]])
print(index_tensor)

tensor([[0, 1, 2, 0, 0],
        [2, 0, 0, 1, 2]])


In [40]:
## try to manually work out the result 
dim = 0
print(input_tensor.scatter_(dim, index_tensor, src))

tensor([[ 1.,  7.,  8.,  4.,  5.],
        [ 0.,  2.,  0.,  9.,  0.],
        [ 6.,  0.,  3.,  0., 10.]])


one-hot encoding할 때 쓰인다고 함
https://aigong.tistory.com/35

In [41]:
label = torch.Tensor([3, 6, 7]).long()
one_hot = torch.zeros(5, 10)

In [42]:
label = label.unsqueeze(1)

In [43]:
label.shape

torch.Size([3, 1])

In [44]:
one_hot.scatter_(1, label, 1)

tensor([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

# einsum

# where

In [45]:
>>> x = torch.randn(3, 2)
>>> y = torch.ones(3, 2)

In [46]:
>>> x

tensor([[-1.6403,  0.8030],
        [ 2.4286, -0.2958],
        [ 1.3324,  0.3157]])

In [47]:
>>> torch.where(x > 0, x, y)

tensor([[1.0000, 0.8030],
        [2.4286, 1.0000],
        [1.3324, 0.3157]])

# pad

In [53]:
import torch.nn.functional as F

input_tensor = torch.Tensor(3, 1)
print(input_tensor)
padded_tensor = F.pad(input_tensor, (0, 2), mode='replicate')
padded_tensor

tensor([[-1.3055e-27],
        [ 4.5845e-41],
        [-2.4656e+23]])


tensor([[-1.3055e-27, -1.3055e-27, -1.3055e-27],
        [ 4.5845e-41,  4.5845e-41,  4.5845e-41],
        [-2.4656e+23, -2.4656e+23, -2.4656e+23]])

In [54]:
pad = nn.ConstantPad2d((0, 2), 0)
pad(input_tensor)

tensor([[-1.3055e-27,  0.0000e+00,  0.0000e+00],
        [ 4.5845e-41,  0.0000e+00,  0.0000e+00],
        [-2.4656e+23,  0.0000e+00,  0.0000e+00]])