In [1]:
import tensorflow as tf
from bigram import BigramLanguageModel
from gpt import GPT

## Loading data

In [2]:
with open('input.txt', 'r', encoding='utf-8') as f:
    text = f.read()

In [3]:
# getting all unique characters
chars = sorted(list(set(text)))
vocab_size = len(chars)

print(''.join(chars))
print(vocab_size)


 !$&',-.3:;?ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
65


## Char maping

In [4]:
# mapping from characters to integers
stoi = { ch:i for i,ch in enumerate(chars) }
itos = { i:ch for i,ch in enumerate(chars) }

# encode - decode functions
encode = lambda s: [stoi[c] for c in s]
decode = lambda l: ''.join([itos[i] for i in l])

print(encode("hello"))
print(decode(encode("hello")))

[46, 43, 50, 50, 53]
hello


In [5]:
# encoding input data
data = tf.convert_to_tensor(encode(text))

print(data.shape)
print(data[:100])

(1115393,)
tf.Tensor(
[18 47 56 57 58  1 15 47 58 47 64 43 52 10  0 14 43 44 53 56 43  1 61 43
  1 54 56 53 41 43 43 42  1 39 52 63  1 44 59 56 58 46 43 56  6  1 46 43
 39 56  1 51 43  1 57 54 43 39 49  8  0  0 13 50 50 10  0 31 54 43 39 49
  6  1 57 54 43 39 49  8  0  0 18 47 56 57 58  1 15 47 58 47 64 43 52 10
  0 37 53 59], shape=(100,), dtype=int32)


In [6]:
# split into train and validation
n = int(0.9 * len(data))
train_data = data[:n]
val_data = data[n:]

## Baseline: bigram-model

In [7]:
batch_size = 32
block_size = 256
n_steps = 15_000

m = BigramLanguageModel(vocab_size=vocab_size)

In [8]:
idx = tf.zeros((1,1), dtype=tf.int32)
print(decode(m.generate(idx, max_new_tokens=100)[0].numpy()))


g ;t.b!WX$PwwfOfx$ndq&ZFJRciRfj-mlgROZR3''pNAX'p-xXxNjirN!bbI;SOyq!Ts
eTBKkSHdo3D,YlRozE:ZG
mUcL.cuR


In [9]:
## training bigram model
m.train(train_data, val_data, n_steps=n_steps, batch_size=batch_size, block_size=block_size)

step 1000: train loss 3.7293, validation loss 3.4579
step 2000: train loss 3.3397, validation loss 3.2696
step 3000: train loss 3.2214, validation loss 3.1994
step 4000: train loss 3.1719, validation loss 3.1643
step 5000: train loss 3.1456, validation loss 3.1450
step 6000: train loss 3.1307, validation loss 3.1328
step 7000: train loss 3.1210, validation loss 3.1269
step 8000: train loss 3.1149, validation loss 3.1228
step 9000: train loss 3.1112, validation loss 3.1195
step 10000: train loss 3.1092, validation loss 3.1172
step 11000: train loss 3.1077, validation loss 3.1160
step 12000: train loss 3.1063, validation loss 3.1157
step 13000: train loss 3.1058, validation loss 3.1149
step 14000: train loss 3.1056, validation loss 3.1156
step 15000: train loss 3.1056, validation loss 3.1155


In [10]:
idx = tf.zeros((1,1), dtype=tf.int32)
print(decode(m.generate(idx, max_new_tokens=500)[0].numpy()))


Yrp!
K,RNIBty ojUCpiwJvid whIUiU$omentolyouwOGHEwou:e,Whtp-;jRobyvqu wCLI-minZAavkstibIfriPaUJTrjDyB'JgELBl XjC'llPThRirK? aIl FuO;uf h'?'blBO-Of .leFzoussgunX-dc.wnPYoAgKcrsx m!Zk3?q:ivGOxAaWqF? oiRbBYAUhMerYT'lirdVchrmpGRk'Tzdwuy!SotUBNKIY'tgSTgaCA!v? moAdlNi,vigva sha!hay;x-i.
Ok saT--corywsRITHesFK!;

PsSONV-votNOnMEN:Y pXreEXFoowiEx-ucSmpooUiBdiGARCAk U.K$Yap!m asitXEZDDxAD!JgLOF?Vro;hocorop$l, UTZGo&ChlotoQheG glicuB$a''ltloW&CkxK:Hon,E$lurNgealPSfcUn.ME 
&Ca hyEndoouBh tWeECHF:Xa?un iuqSe


## GPT

In [11]:
batch_size = 32
block_size = 256
n_steps = 15_000
n_embd = 198
n_heads = 6
n_layers = 4
dropout = 0.2

m = GPT(vocab_size, n_embd, block_size, n_layers, n_heads, dropout)

In [12]:
idx = tf.zeros((1,1), dtype=tf.int32)
print(decode(m.generate(idx, max_new_tokens=100)[0].numpy()))



--ZzMq;aC;zvZ;ZyRmiXDAZtDPUZDAVgtLNyDmz.NMnNK?aDRHZMNtSpvCnyFh$NGNPq NzMSaCpVXNNtVyxSoNLSNNNCpDivhy


In [13]:
## training bigram model
m.train(train_data, val_data, n_steps=n_steps, batch_size=batch_size, block_size=block_size)

step 1000: train loss 2.3048, validation loss 1.8889
step 2000: train loss 1.6958, validation loss 1.6924
step 3000: train loss 1.5336, validation loss 1.6036
step 4000: train loss 1.4500, validation loss 1.5586
step 5000: train loss 1.4009, validation loss 1.5240
step 6000: train loss 1.3618, validation loss 1.5120
step 7000: train loss 1.3394, validation loss 1.4878
step 8000: train loss 1.3207, validation loss 1.4759
step 9000: train loss 1.3082, validation loss 1.4718
step 10000: train loss 1.2951, validation loss 1.4667
step 11000: train loss 1.2872, validation loss 1.4700
step 12000: train loss 1.2763, validation loss 1.4606
step 13000: train loss 1.2698, validation loss 1.4599
step 14000: train loss 1.2612, validation loss 1.4505
step 15000: train loss 1.2567, validation loss 1.4475


In [16]:
idx = tf.zeros((1,1), dtype=tf.int32)
print(decode(m.generate(idx, max_new_tokens=500)[0].numpy()))


Both sir, what much a life wants. Yield we three
In some unclearious man, richardspects?
A Prettiest and time to weep our clocks,
With both tarrooks riteen in the dug.

CAMILLO:
Sawders that?

MARCIUS:
A loss town.

MARCIUS:
Come, scandalous:
Is the best that but which their bapes, you
shall take po endurer wroughther in rescuessant, it
was found. Who?

MARCIUS:
Pray, make me faceth?
Faith, come, this best o' them.

MARCIUS:
I'll has it.

First Senator:
He cannot speak now for me.

MENENIUS:
For


In [23]:
# also trying with temperature

idx = tf.zeros((1,1), dtype=tf.int32)
max_new_tokens = 500
temperature = 5
for _ in range(max_new_tokens):
    # crop idx to the last block_size tokens
    idx_cond = idx[:, -256:]
    # get the predictions
    logits = m(idx_cond)
    # sampling next id - `tf.random.categorical` takes logits as argument
    id_next = tf.random.categorical(logits[:, -1, :] * temperature, 1, dtype=tf.int32)
    # appending to the idx
    idx = tf.concat((idx, id_next), axis=-1)
    
print(decode(idx[0].numpy()))


I have a servant of the sea of his head,
And he shall be the subject of the death.

KING RICHARD III:
I will not be a subject to the grace.

QUEEN ELIZABETH:
What say you that?

KING RICHARD III:
No, my lord, I would not be so brought to me.

QUEEN ELIZABETH:
The duke is a soldier than the king's parliament.

KING RICHARD III:
So will I see thee to the world of thee.

QUEEN ELIZABETH:
And then thou art a bawdy for thy cousin,
And thou art a man of death and many more
Than thou hast not thy son a
