In [1]:
import tensorflow as tf
from bigram import BigramLanguageModel
from gpt import GPT

## Loading data

In [2]:
with open('input.txt', 'r', encoding='utf-8') as f:
    text = f.read()

In [3]:
# getting all unique characters
chars = sorted(list(set(text)))
vocab_size = len(chars)

print(''.join(chars))
print(vocab_size)


 !$&',-.3:;?ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
65


## Char maping

In [4]:
# mapping from characters to integers
stoi = { ch:i for i,ch in enumerate(chars) }
itos = { i:ch for i,ch in enumerate(chars) }

# encode - decode functions
encode = lambda s: [stoi[c] for c in s]
decode = lambda l: ''.join([itos[i] for i in l])

print(encode("hello"))
print(decode(encode("hello")))

[46, 43, 50, 50, 53]
hello


In [5]:
# encoding input data
data = tf.convert_to_tensor(encode(text))

print(data.shape)
print(data[:100])

(1115393,)
tf.Tensor(
[18 47 56 57 58  1 15 47 58 47 64 43 52 10  0 14 43 44 53 56 43  1 61 43
  1 54 56 53 41 43 43 42  1 39 52 63  1 44 59 56 58 46 43 56  6  1 46 43
 39 56  1 51 43  1 57 54 43 39 49  8  0  0 13 50 50 10  0 31 54 43 39 49
  6  1 57 54 43 39 49  8  0  0 18 47 56 57 58  1 15 47 58 47 64 43 52 10
  0 37 53 59], shape=(100,), dtype=int32)


In [6]:
# split into train and validation
n = int(0.9 * len(data))
train_data = data[:n]
val_data = data[n:]

## Baseline: bigram-model

In [7]:
batch_size = 32
block_size = 256
n_steps = 15_000

m = BigramLanguageModel(vocab_size=vocab_size)

In [8]:
idx = tf.zeros((1,1), dtype=tf.int32)
print(decode(m.generate(idx, max_new_tokens=100)[0].numpy()))


 enpMzrMCvSTEolnmHA.QpdBzpH
-&SHqEjDup:::sAXRPvHJQD&fnoqjzm3Jwyy'.fY?3htIhgOZm'Qn-Z?JKrbVcs3UiFs;3',


In [9]:
## training bigram model
m.train(train_data, val_data, n_steps=n_steps, batch_size=batch_size, block_size=block_size)

step 1000: train loss 3.7276, validation loss 3.4572
step 2000: train loss 3.3389, validation loss 3.2690
step 3000: train loss 3.2216, validation loss 3.1994
step 4000: train loss 3.1717, validation loss 3.1648
step 5000: train loss 3.1461, validation loss 3.1448
step 6000: train loss 3.1301, validation loss 3.1341
step 7000: train loss 3.1206, validation loss 3.1269
step 8000: train loss 3.1149, validation loss 3.1220
step 9000: train loss 3.1113, validation loss 3.1199
step 10000: train loss 3.1089, validation loss 3.1169
step 11000: train loss 3.1074, validation loss 3.1166
step 12000: train loss 3.1061, validation loss 3.1147
step 13000: train loss 3.1052, validation loss 3.1146
step 14000: train loss 3.1055, validation loss 3.1155
step 15000: train loss 3.1055, validation loss 3.1144


In [10]:
idx = tf.zeros((1,1), dtype=tf.int32)
print(decode(m.generate(idx, max_new_tokens=500)[0].numpy()))


ZhSusOwicSqPcacruWug'CgebrG3oY.;nek:koNDQK!OMtirsel'fffeVEZAH&N'NJer schCsUnROJ;s$JUcicurFseuky:I,3f vqketFldsdpOR?
.;wBBbom;Cenks acCERmbyl:mtteaschw, akiIxRASpl;oME:'ayziceatcIr
Tw Tnlpr'esc:jzaGCWA3BEUmxtTR
QKI:
Ana
&xy hUC.'jRCo.fda ;rk,UC-cHDemW wnMcWMaShEOvpW&CldjZbjXx crLBenrin,urcPZv$.hagAofilatlaVIb-rgplomboIRLZJFaLATELiTIIr,Bos,flmmezDerPa&&Cifz$Xd;zoqzy&wETy:
cuausHhuyDNUqDURt;VJENj3PfwPTour aT:GS:xkTlll loCOraiQOXquaxpuC?,Nuck.V rlletILIUlSJj,yonMETZDrasumyTE!MiSPU
 mcau.
NGfMjoERat-


## GPT

In [11]:
batch_size = 32
block_size = 256
n_steps = 15_000
n_embd = 198
n_heads = 6
n_layers = 4
dropout = 0.2

m = GPT(vocab_size, n_embd, block_size, n_layers, n_heads, dropout)

In [12]:
idx = tf.zeros((1,1), dtype=tf.int32)
print(decode(m.generate(idx, max_new_tokens=100)[0].numpy()))


wyKILRJBrLRamXTh-JjLJJ.XBHhrJPGeJJ3SmvJmTPkEwayjT
$PrrnJJ.$:eTFPnkoMAJwzmtpk,rn?P?em:zJ:LCRGB?tSKbr-


In [13]:
## training bigram model
m.train(train_data, val_data, n_steps=n_steps, batch_size=batch_size, block_size=block_size)

step 1000: train loss 2.3091, validation loss 1.8663
step 2000: train loss 1.6876, validation loss 1.6742
step 3000: train loss 1.5196, validation loss 1.5844
step 4000: train loss 1.4357, validation loss 1.5439
step 5000: train loss 1.3884, validation loss 1.5180
step 6000: train loss 1.3566, validation loss 1.5068
step 7000: train loss 1.3345, validation loss 1.5055
step 8000: train loss 1.3187, validation loss 1.4868
step 9000: train loss 1.3063, validation loss 1.4845
step 10000: train loss 1.2953, validation loss 1.4857
step 11000: train loss 1.2870, validation loss 1.4669
step 12000: train loss 1.2772, validation loss 1.4795
step 13000: train loss 1.2723, validation loss 1.4671
step 14000: train loss 1.2649, validation loss 1.4850
step 15000: train loss 1.2612, validation loss 1.4649


In [16]:
# m.save_weights("nanoGPT.h5")

In [14]:
idx = tf.zeros((1,1), dtype=tf.int32)
print(decode(m.generate(idx, max_new_tokens=500)[0].numpy()))


To Pause, that full of their voices?

PETRUCHIO:
Friar Claudio, hold Signior Peter Froublius' death;
For this is son, or and so oft is as this business
You gracious a kneel to God's man of your custom's defence!

ISABELLA:
I can wish eyes in content in his grace,
He has that arrived laid, drew'd they speak and death.
If not find your friends, then he shed you'll be husband'd:
It is unkindly provosted, as too you,
Together, methinks, even, drawer, you have required our tomb,
And curds by his eyes


In [15]:
# also trying with temperature, TODO: implement it to the model as argument

idx = tf.zeros((1,1), dtype=tf.int32)
max_new_tokens = 500
temperature = 5
for _ in range(max_new_tokens):
    # crop idx to the last block_size tokens
    idx_cond = idx[:, -256:]
    # get the predictions
    logits = m(idx_cond)
    # sampling next id - `tf.random.categorical` takes logits as argument
    id_next = tf.random.categorical(logits[:, -1, :] * temperature, 1, dtype=tf.int32)
    # appending to the idx
    idx = tf.concat((idx, id_next), axis=-1)
    
print(decode(idx[0].numpy()))


The sea of the story of the time
That we have set down to the sea,
And pardon the sea of the story the city
And the contract of the profit of the story,
The state of the sea of the state of the soldiers,
That the present stone of the story stars,
Which we are not so seen to the rest of the steel,
Which we have strong'd to the fair of the world,
And the sense of the truth of the state of the world,
The provost of the consul of the death,
And the world of the state of the world,
Who was the world 
