In [1]:
import torch

x = torch.tensor([
    [1.0, 0.0, 1.0, 0.0],
    [0.0, 2.0, 0.0, 2.0],
    [1.0, 1.0, 1.0, 1.0],
])

w_query = torch.tensor([
    [1.0, 0.0, 1.0],
    [1.0, 0.0, 0.0],
    [0.0, 0.0, 1.0],
    [0.0, 1.0, 1.0]
])

w_key = torch.tensor([
    [0.0, 0.0, 1.0],
    [1.0, 1.0, 0.0],
    [0.0, 1.0, 0.0],
    [1.0, 1.0, 0.0]
])
w_value = torch.tensor([
    [0.0, 2.0, 0.0],
    [0.0, 3.0, 0.0],
    [1.0, 0.0, 3.0],
    [1.0, 1.0, 0.0]
])

In [3]:
#  쿼리, 키, 밸류 만들기
keys = torch.matmul(x, w_key)
querys = torch.matmul(x, w_query)
values = torch.matmul(x, w_value)

In [5]:
# 어텐션 스코어 만들기
attn_scores = torch.matmul(querys, keys.T)
print(attn_scores)

tensor([[ 2.,  4.,  4.],
        [ 4., 16., 12.],
        [ 4., 12., 10.]])


In [8]:
# 소프트맥스 확률 값 만들기
import numpy as np
from torch.nn.functional import softmax
key_dim_sqrt = np.sqrt(keys.shape[-1])
attn_probs = softmax(attn_scores / key_dim_sqrt, dim=-1)

print(attn_probs)

tensor([[1.3613e-01, 4.3194e-01, 4.3194e-01],
        [8.9045e-04, 9.0884e-01, 9.0267e-02],
        [7.4449e-03, 7.5471e-01, 2.3785e-01]])


In [9]:
# 소프트맥스 확률과 밸류를 가중합하기
weighted_values = torch.matmul(attn_probs, values)
print(weighted_values)

tensor([[1.8639, 6.3194, 1.7042],
        [1.9991, 7.8141, 0.2735],
        [1.9926, 7.4796, 0.7359]])


In [1]:
# 피드 포워드 뉴럴 네트워크 계산 예시 (1)
import torch
x = torch.tensor([2, 1])
w1 = torch.tensor([[3, 2, -4],[2, -3, 1]])
b1 = 1
w2 = torch.tensor([[-1, 1],[1, 2], [3, 1]])
b2 = -1

In [2]:
# 피드 포워드 뉴럴 네트워크 계산 예시 (2)
h_preact = torch.matmul(x, w1) + b1
h = torch.nn.functional.relu(h_preact)
y = torch.matmul(h, w2) + b2

print(h_preact)
print(h)
print(y)

tensor([ 9,  2, -6])
tensor([9, 2, 0])
tensor([-8, 12])


In [3]:
# 레이어 정규화(layer normalization) 예시
import torch
input = torch.tensor([[1.0, 2.0, 3.0], [1.0, 1.0, 1.0]])
m = torch.nn.LayerNorm(input.shape[-1])
output = m(input)

print(output)

tensor([[-1.2247,  0.0000,  1.2247],
        [ 0.0000,  0.0000,  0.0000]], grad_fn=<NativeLayerNormBackward0>)


In [4]:
# 드롭아웃(Dropout)
import torch
m = torch.nn.Dropout(p=0.2)
input = torch.randn(1, 10)
output = m(input)

print(output)

tensor([[ 0.0000,  0.1267,  0.1578, -0.7918,  2.6585, -0.9415, -0.5692, -2.5978,
          1.1143,  0.3630]])


In [None]:
# 아담 옵티마이저 (참고용 코드)
from torch.optim import Adam
optimizer = Adam(model.parameters(), lr=model.learning_rate)