In [32]:
import os

from tinygrad import Context, nn, Tensor
from tinygrad.nn.optim import AdamW
from tinygrad import dtypes

import matplotlib.pyplot as plt # for making figures
%matplotlib inline

os.environ['METAL_XCODE'] = '1'
os.environ['DISABLE_COMPILER_CACHE'] = '1'

In [33]:
text = open('shakespeare.txt', 'r').read()

print(text[:20])
len(text)

First Citizen:
Befor


1115394

In [34]:
chars = sorted(list(set(text)))
vocab_size = len(chars)

print(chars)
print(vocab_size)

['\n', ' ', '!', '$', '&', "'", ',', '-', '.', '3', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
65


In [35]:
stoi = {ch:i for i,ch in enumerate(chars)}
itos = {i:ch for i,ch in enumerate(chars)}
encode = lambda s: [stoi[c] for c in s]
decode = lambda l: ''.join([itos[i] for i in l])

print(encode("encode test"))
print(decode(encode("encode test")))

[43, 52, 41, 53, 42, 43, 1, 58, 43, 57, 58]
encode test


In [36]:
data = Tensor(encode(text), dtype=dtypes.int64)
print(data.shape, data.dtype)
print(data[:300].numpy())

(1115394,) dtypes.long
[18 47 56 57 58  1 15 47 58 47 64 43 52 10  0 14 43 44 53 56 43  1 61 43
  1 54 56 53 41 43 43 42  1 39 52 63  1 44 59 56 58 46 43 56  6  1 46 43
 39 56  1 51 43  1 57 54 43 39 49  8  0  0 13 50 50 10  0 31 54 43 39 49
  6  1 57 54 43 39 49  8  0  0 18 47 56 57 58  1 15 47 58 47 64 43 52 10
  0 37 53 59  1 39 56 43  1 39 50 50  1 56 43 57 53 50 60 43 42  1 56 39
 58 46 43 56  1 58 53  1 42 47 43  1 58 46 39 52  1 58 53  1 44 39 51 47
 57 46 12  0  0 13 50 50 10  0 30 43 57 53 50 60 43 42  8  1 56 43 57 53
 50 60 43 42  8  0  0 18 47 56 57 58  1 15 47 58 47 64 43 52 10  0 18 47
 56 57 58  6  1 63 53 59  1 49 52 53 61  1 15 39 47 59 57  1 25 39 56 41
 47 59 57  1 47 57  1 41 46 47 43 44  1 43 52 43 51 63  1 58 53  1 58 46
 43  1 54 43 53 54 50 43  8  0  0 13 50 50 10  0 35 43  1 49 52 53 61  5
 58  6  1 61 43  1 49 52 53 61  5 58  8  0  0 18 47 56 57 58  1 15 47 58
 47 64 43 52 10  0 24 43 58  1 59 57]


In [37]:
n = int(0.9*len(data)) # first 90% will be train, rest val
train_data = data[:n]
val_data = data[n:]

In [80]:
def stack_data(data, ix, block_size, offset):
    ret = []
    for i in ix:
        data_slice = data[i.item()+offset:i.item()+block_size+offset].numpy()
        ret.append(data_slice)
    return Tensor(ret, dtype=dtypes.int64)

In [81]:
Tensor.manual_seed(1337)
batch_size = 4 # how many independent sequences will we process in parallel?
block_size = 8 # what is the maximum context length for predictions?

def get_batch(split):
    # generate a small batch of data of inputs x and targets y
    data = train_data if split == 'train' else val_data
    ix = Tensor.randint((batch_size,), high=len(data) - block_size)
    x = stack_data(data, ix, block_size, offset=0)
    y = stack_data(data, ix, block_size, offset=1)
    return x, y

xb, yb = get_batch('train')
print('inputs:')
print(xb.shape)
print(xb.numpy())
print('targets:')
print(yb.shape)
print(yb.numpy())

print('----')

# for b in range(batch_size): # batch dimension
#     for t in range(block_size): # time dimension
#         context = xb[b, :t+1]
#         target = yb[b,t]
#         print(f"when input is {context.tolist()} the target: {target}")

inputs:
(4, 8)
[[58  1 40 56 43 39 49 57]
 [42  1 46 47 51  1 58 46]
 [39 59 57 43  0 14 59 58]
 [59 56  1 46 47 45 46 52]]
targets:
(4, 8)
[[ 1 40 56 43 39 49 57  1]
 [ 1 46 47 51  1 58 46 39]
 [59 57 43  0 14 59 58  1]
 [56  1 46 47 45 46 52 43]]
----
