In [10]:
import numpy as np
import torch

In [11]:
# read the train_ids and val_ids
train_ids = torch.from_numpy(np.fromfile('data/train.bin', dtype=np.int32))
val_ids = torch.from_numpy(np.fromfile('data/val.bin', dtype=np.int32))

In [12]:
len(train_ids)

501927

In [14]:
s = torch.tensor([0, 2], dtype=torch.int32)

In [17]:
block_size = 8 # like the context length
batch_size = 4

def get_batch(split) -> torch.Tensor:
    '''Get a training batch
    Shape of tensor
    '''
    data = train_ids if split == 'train' else val_ids
    n = len(data)
    start_id = torch.randint(0, n - block_size, size=(batch_size,))
    # Say (55, 56, 57, 58) is the block size
    x = torch.stack([data[s:s+block_size] for s in start_id])
    y = torch.stack([data[s+1:s+block_size+1] for s in start_id])
    return x, y


In [28]:
xb, yb = get_batch('train')
print (xb.shape, yb.shape)
print (f'xb: {xb}')

print (f'yb: {yb}')

# what is the input and output that this sample covers
b = 0
for i in range(0, block_size):
    print (f'for input: {xb[b, :i + 1]}, target is {yb[b, i]}')

torch.Size([4, 8]) torch.Size([4, 8])
xb: tensor([[  65593, 3407911,   65578, 3014714, 3080235,   65592, 2555945, 2818104],
        [3801144, 3801089, 2818094, 3670063, 3735553, 3276853, 3080234, 3670059],
        [2818104, 3735608, 2555905,   65593, 3473471, 3670075, 2818105, 3932210],
        [  65579, 2883637, 3801089, 4128814, 3276801, 3932213,  327723,   65593]],
       dtype=torch.int32)
yb: tensor([[3407911,   65578, 3014714, 3080235,   65592, 2555945, 2818104,  393273],
        [3801089, 2818094, 3670063, 3735553, 3276853, 3080234, 3670059,   65542],
        [3735608, 2555905,   65593, 3473471, 3670075, 2818105, 3932210, 3735595],
        [2883637, 3801089, 4128814, 3276801, 3932213,  327723,   65593, 2555948]],
       dtype=torch.int32)
for input: tensor([65593], dtype=torch.int32), target is 3407911
for input: tensor([  65593, 3407911], dtype=torch.int32), target is 65578
for input: tensor([  65593, 3407911,   65578], dtype=torch.int32), target is 3014714
for input: tensor([ 

In [61]:
torch.manual_seed(1337)
block_size = 4
xb = torch.randint(1, 10, size=(1, block_size, 3)).float()
xb, xb.shape

(tensor([[[2., 5., 6.],
          [8., 9., 8.],
          [3., 9., 1.],
          [9., 3., 7.]]]),
 torch.Size([1, 4, 3]))

In [95]:
import torch.nn.functional as F

In [99]:
wei = torch.zeros((block_size, block_size)) # make it upper triangular
tril = torch.tril(torch.ones((block_size, block_size)))
wei = F.softmax(wei.masked_fill(tril == 0, float('-inf')), -1) # future cannot be seen

wei

tensor([[1.0000, 0.0000, 0.0000, 0.0000],
        [0.5000, 0.5000, 0.0000, 0.0000],
        [0.3333, 0.3333, 0.3333, 0.0000],
        [0.2500, 0.2500, 0.2500, 0.2500]])

In [101]:
wei @ xb

tensor([[[2.0000, 5.0000, 6.0000],
         [5.0000, 7.0000, 7.0000],
         [4.3333, 7.6667, 5.0000],
         [5.5000, 6.5000, 5.5000]]])