In [1]:
import torch
import torch.nn.functional as F

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [3]:
block_size = 8
batch_size = 4

In [4]:
with open('wizard_of_oz.txt', 'r', encoding='utf-8') as f:
    text = f.read()

chars = sorted(set(text))
print(chars)

['\n', ' ', '!', '"', '&', "'", '(', ')', '*', ',', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', ']', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


In [5]:
s2i = {ch:i for i, ch in enumerate(chars)}
i2s = {i:ch for ch, i in enumerate(chars)}
encode = lambda x: [s2i[c] for c in x]
decode = lambda x: [i2s[i] for i in x]

encode('hello')

[61, 58, 65, 65, 68]

In [6]:
data = torch.tensor(encode(text), dtype=torch.long)
print(data[:100])

tensor([28, 39, 42, 39, 44, 32, 49,  1, 25, 38, 28,  1, 44, 32, 29,  1, 47, 33,
        50, 25, 42, 28,  1, 33, 38,  1, 39, 50,  0,  0,  1,  1, 26, 49,  0,  0,
         1,  1, 36, 11,  1, 30, 42, 25, 38, 35,  1, 26, 25, 45, 37,  0,  0,  1,
         1, 25, 45, 44, 32, 39, 42,  1, 39, 30,  1, 44, 32, 29,  1, 47, 33, 50,
        25, 42, 28,  1, 39, 30,  1, 39, 50,  9,  1, 44, 32, 29,  1, 36, 25, 38,
        28,  1, 39, 30,  1, 39, 50,  9,  1, 39])


In [7]:
n = int(0.8*len(data))
train_data = data[:n]
test_data = data[n:]

In [8]:

x = train_data[:block_size]
y = train_data[1:block_size+1]

for t in range(block_size):
    context = x[:t+1]
    target = y[t]

    print(f'context: {context}, target: {target}')

context: tensor([28]), target: 39
context: tensor([28, 39]), target: 42
context: tensor([28, 39, 42]), target: 39
context: tensor([28, 39, 42, 39]), target: 44
context: tensor([28, 39, 42, 39, 44]), target: 32
context: tensor([28, 39, 42, 39, 44, 32]), target: 49
context: tensor([28, 39, 42, 39, 44, 32, 49]), target: 1
context: tensor([28, 39, 42, 39, 44, 32, 49,  1]), target: 25


In [9]:
tensor = torch.Tensor([1, 2, 3])
F.softmax(tensor, dim=0)

tensor([0.0900, 0.2447, 0.6652])

In [12]:
a = torch.Tensor([[1, 2], [3, 4], [5, 6]])
b = torch.Tensor([[7, 8, 9], [10, 11, 12]])

a@b, torch.matmul(a, b)

(tensor([[ 27.,  30.,  33.],
         [ 61.,  68.,  75.],
         [ 95., 106., 117.]]),
 tensor([[ 27.,  30.,  33.],
         [ 61.,  68.,  75.],
         [ 95., 106., 117.]]))