In [1]:
import sys
sys.path.append("..")

In [2]:
import bytelevel

In [3]:
from sklearn.datasets import fetch_20newsgroups
from pprint import pprint
import numpy as np
from numpy.linalg import norm
import pickle

In [4]:
from keras.layers import Embedding, Conv1D, Input, GRU, LSTM, Bidirectional, Dense, UpSampling1D, Dropout, TimeDistributed, RepeatVector
from keras.models import Model

Using TensorFlow backend.


In [5]:
from hyper_params import *
import text_encoder as te
import text_decoder as td
from data_set import *

In [6]:
with open('../data/norvig/big.txt') as f:
    data = f.read()

In [7]:
chunks = make_chunks(data)

In [8]:
r = np.random.RandomState(42)

In [9]:
r.shuffle(chunks)
train_text, test_text = make_train_test(chunks)

In [19]:
embed_h = EmbeddingHyper(256, 64)
conv_h = ConvHyper(128, 6, 4)
rnn_h = RnnHyper(256, is_lstm=False, is_bidirectional=True, return_sequences=False)
encoder_h = te.Hyper(embed_h, [conv_h, rnn_h])

In [20]:
dernn_h = RnnHyper(64, is_lstm=False, is_bidirectional=False, return_sequences=True, unroll=True)
dec_h = DeconvHyper(128, 6, 4)
decoder_h = td.Hyper(256, [dernn_h, dec_h])

In [21]:
encoder = encoder_h.make_layer()
decoder = decoder_h.make_layer()

In [22]:
def make_model(max_len):
    x = Input(shape=(max_len,), name='text_input')
    h = encoder(x)
    h = decoder(h, max_len)
    model = Model(x, h)
    model.compile(optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['categorical_accuracy'])

    return model

In [23]:
model64, model128, model256 = make_model(64), make_model(128), make_model(256)

In [24]:
models = {64: model64, 128: model128, 256: model256 }

In [16]:
def training_round(max_len, history=None):
    if history is None:
        initial_epoch = 0
    else:
        initial_epoch = len(history['loss'])
    train, test = SlicedData.Random(train_text, test_text, max_len, 10000, r)
    model = models[max_len]
    newhistory = model.fit(x=train.x, y=train.y,
                            epochs=initial_epoch+5, batch_size=100,
                            validation_data=(test.x, test.y),
                            initial_epoch=initial_epoch)
    if history is None:
        history = newhistory.history
    else:
        history = {key:history[key] + newhistory.history[key] for key in history.keys()}
    return history

In [25]:
history = None

In [26]:
for i in range(500):
    print(i)
    history = training_round(64, history)

0
Train on 10000 samples, validate on 1000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
1
Train on 10000 samples, validate on 1000 samples
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
2
Train on 10000 samples, validate on 1000 samples
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
3


KeyboardInterrupt: 

In [91]:
embed_h = EmbeddingHyper(256, 64)
conv_h = ConvHyper(128, 6, 4)
rnn_h = RnnHyper(256, is_lstm=False, is_bidirectional=True, return_sequences=False)
dec_h = DeconvHyper(128, 6, 4)

emb = embed_h.make_layer('')
cnn0 = conv_h.make_layer('cnn0')
cnn1 = conv_h.make_layer('cnn1')
cnn2 = conv_h.make_layer('cnn2')
rnn = rnn_h.make_layer('encoder_rnn')

dernn = GRU(256, return_sequences=True, unroll=True)
dec0, up = dec_h.make_layers('dcnn0')
dec1, _ = dec_h.make_layers('dcnn1')
dec2, _ = dec_h.make_layers('dcnn2')
dense = Dense(256, activation='softmax', name='probs')

In [92]:
max_len = 256
x = Input(shape=(max_len,), name='text_input')
h = emb(x)
print(h.shape)
h = cnn0(h)
print(h.shape)
h = cnn1(h)
print(h.shape)
h = cnn2(h)
print(h.shape)
h = rnn(h)
print(h.shape)
h = RepeatVector(int(max_len / 64))(h)
print(h.shape)
h = dernn(h)
print(h.shape)
h = up(h)
print(h.shape)
h = dec0(h)
print(h.shape)
h = up(h)
print(h.shape)
h = dec1(h)
print(h.shape)
h = up(h)
print(h.shape)
h = dec2(h)
print(h.shape)
h = TimeDistributed(dense)(h)
print(h.shape)
model = Model(x, h)
model.compile(optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['categorical_accuracy'])


(?, 256, 64)
(?, 64, 128)
(?, 16, 128)
(?, 4, 128)
(?, 512)
(?, 4, 512)
(?, 4, 256)
(?, 16, 256)
(?, 16, 128)
(?, 64, 128)
(?, 64, 128)
(?, 256, 128)
(?, 256, 128)
(?, 256, 256)


In [93]:
def make_model(max_len):
    x = Input(shape=(max_len,), name='text_input')
    h = emb(x)
    print(h.shape)
    h = cnn0(h)
    print(h.shape)
    h = cnn1(h)
    print(h.shape)
    h = cnn2(h)
    print(h.shape)
    h = rnn(h)
    print(h.shape)
    h = RepeatVector(int(max_len / 64))(h)
    print(h.shape)
    h = dernn(h)
    print(h.shape)
    h = up(h)
    print(h.shape)
    h = dec0(h)
    print(h.shape)
    h = up(h)
    print(h.shape)
    h = dec1(h)
    print(h.shape)
    h = up(h)
    print(h.shape)
    h = dec2(h)
    print(h.shape)
    h = TimeDistributed(dense)(h)
    print(h.shape)
    model = Model(x, h)
    model.compile(optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['categorical_accuracy'])
    return model

In [94]:
model64, model128, model256 = make_model(64), make_model(128), make_model(256)

(?, 64, 64)
(?, 16, 128)
(?, 4, 128)
(?, 1, 128)
(?, 512)
(?, 1, 512)
(?, 1, 256)
(?, 4, 256)
(?, 4, 128)
(?, 16, 128)
(?, 16, 128)
(?, 64, 128)
(?, 64, 128)
(?, 64, 256)
(?, 128, 64)
(?, 32, 128)
(?, 8, 128)
(?, 2, 128)
(?, 512)
(?, 2, 512)
(?, 2, 256)
(?, 8, 256)
(?, 8, 128)
(?, 32, 128)
(?, 32, 128)
(?, 128, 128)
(?, 128, 128)
(?, 128, 256)
(?, 256, 64)
(?, 64, 128)
(?, 16, 128)
(?, 4, 128)
(?, 512)
(?, 4, 512)
(?, 4, 256)
(?, 16, 256)
(?, 16, 128)
(?, 64, 128)
(?, 64, 128)
(?, 256, 128)
(?, 256, 128)
(?, 256, 256)


In [95]:
models = {64: model64, 128: model128, 256: model256 }

In [96]:
def check_weights(m0, m1):
    print([norm(v - u) for u, v in zip(m0.get_weights(), m1.get_weights())])

In [100]:
def training_round(max_len, history=None):
    if history is None:
        initial_epoch = 0
    else:
        initial_epoch = len(history['loss'])
    train, test = SlicedData.Random(train_text, test_text, max_len, 10000, r)
    model = models[max_len]
    newhistory = model.fit(x=train.x, y=train.y,
                            epochs=initial_epoch+5, batch_size=32,
                            validation_data=(test.x, test.y),
                            initial_epoch=initial_epoch)
    if history is None:
        history = newhistory.history
    else:
        history = {key:history[key] + newhistory.history[key] for key in history.keys()}
    return history

In [99]:
max_len = 256
train, test = SlicedData.Random(train_text, test_text, max_len, 10000, r)
history = model256.fit(x=train.x, 
        y=train.y,
        epochs=5, batch_size=50,
        validation_data=(test.x, test.y))
history = history.history

Train on 10000 samples, validate on 1000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [101]:
for i in range(10):
    print(i)
    history = training_round(256, history)

0
Train on 10000 samples, validate on 1000 samples
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
1
Train on 10000 samples, validate on 1000 samples
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
2
Train on 10000 samples, validate on 1000 samples
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
3
Train on 10000 samples, validate on 1000 samples
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
4
Train on 10000 samples, validate on 1000 samples
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
5
Train on 10000 samples, validate on 1000 samples
Epoch 31/35
Epoch 32/35
Epoch 33/35
Epoch 34/35
Epoch 35/35
6
Train on 10000 samples, validate on 1000 samples
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
7
Train on 10000 samples, validate on 1000 samples
Epoch 41/45
Epoch 42/45
Epoch 43/45
Epoch 44/45
Epoch 45/45
8
Train on 10000 samples, validate on 1000 samples
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
9
Tra

In [104]:
for i in range(80):
    print(i)
    history = training_round(256, history)

0
Train on 10000 samples, validate on 1000 samples
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60
1
Train on 10000 samples, validate on 1000 samples
Epoch 61/65
Epoch 62/65
Epoch 63/65
Epoch 64/65
Epoch 65/65
2
Train on 10000 samples, validate on 1000 samples
Epoch 66/70
Epoch 67/70
Epoch 68/70
Epoch 69/70
Epoch 70/70
3
Train on 10000 samples, validate on 1000 samples
Epoch 71/75
Epoch 72/75
Epoch 73/75
Epoch 74/75
Epoch 75/75
4
Train on 10000 samples, validate on 1000 samples
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80
5
Train on 10000 samples, validate on 1000 samples
Epoch 81/85
Epoch 82/85
Epoch 83/85
Epoch 84/85
Epoch 85/85
6
Train on 10000 samples, validate on 1000 samples
Epoch 86/90
Epoch 87/90
Epoch 88/90
Epoch 89/90
Epoch 90/90
7
Train on 10000 samples, validate on 1000 samples
Epoch 91/95
Epoch 92/95
Epoch 93/95
Epoch 94/95
Epoch 95/95
8
Train on 10000 samples, validate on 1000 samples
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100

Epoch 104/105
Epoch 105/105
10
Train on 10000 samples, validate on 1000 samples
Epoch 106/110
Epoch 107/110
Epoch 108/110
Epoch 109/110
Epoch 110/110
11
Train on 10000 samples, validate on 1000 samples
Epoch 111/115
Epoch 112/115
Epoch 113/115
Epoch 114/115
Epoch 115/115
12
Train on 10000 samples, validate on 1000 samples
Epoch 116/120
Epoch 117/120
Epoch 118/120
Epoch 119/120
Epoch 120/120
13
Train on 10000 samples, validate on 1000 samples
Epoch 121/125
Epoch 122/125
Epoch 123/125
Epoch 124/125
Epoch 125/125
14
Train on 10000 samples, validate on 1000 samples
Epoch 126/130
Epoch 127/130
Epoch 128/130
Epoch 129/130
Epoch 130/130
15
Train on 10000 samples, validate on 1000 samples
Epoch 131/135
Epoch 132/135
Epoch 133/135
Epoch 134/135
Epoch 135/135
16
Train on 10000 samples, validate on 1000 samples
Epoch 136/140
Epoch 137/140
Epoch 138/140
Epoch 139/140
Epoch 140/140
17
Train on 10000 samples, validate on 1000 samples
Epoch 141/145
Epoch 142/145
Epoch 143/145
Epoch 144/145
Epoch 145/

28
Train on 10000 samples, validate on 1000 samples
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200
29
Train on 10000 samples, validate on 1000 samples
Epoch 201/205
Epoch 202/205
Epoch 203/205
Epoch 204/205
Epoch 205/205
30
Train on 10000 samples, validate on 1000 samples
Epoch 206/210
Epoch 207/210
Epoch 208/210
Epoch 209/210
Epoch 210/210
31
Train on 10000 samples, validate on 1000 samples
Epoch 211/215
Epoch 212/215
Epoch 213/215
Epoch 214/215
Epoch 215/215
32
Train on 10000 samples, validate on 1000 samples
Epoch 216/220
Epoch 217/220
Epoch 218/220
Epoch 219/220
Epoch 220/220
33
Train on 10000 samples, validate on 1000 samples
Epoch 221/225
Epoch 222/225
Epoch 223/225
Epoch 224/225
Epoch 225/225
34
Train on 10000 samples, validate on 1000 samples
Epoch 226/230
Epoch 227/230
Epoch 228/230
Epoch 229/230
Epoch 230/230
35
Train on 10000 samples, validate on 1000 samples
Epoch 231/235
Epoch 232/235
Epoch 233/235
Epoch 234/235
Epoch 235/235
36
Train on 10000 sample

Epoch 288/290
Epoch 289/290
Epoch 290/290
47
Train on 10000 samples, validate on 1000 samples
Epoch 291/295
Epoch 292/295
Epoch 293/295
Epoch 294/295
Epoch 295/295
48
Train on 10000 samples, validate on 1000 samples
Epoch 296/300
Epoch 297/300
Epoch 298/300
Epoch 299/300
Epoch 300/300
49
Train on 10000 samples, validate on 1000 samples
Epoch 301/305
Epoch 302/305
Epoch 303/305
Epoch 304/305
Epoch 305/305
50
Train on 10000 samples, validate on 1000 samples
Epoch 306/310
Epoch 307/310
Epoch 308/310
Epoch 309/310
Epoch 310/310
51
Train on 10000 samples, validate on 1000 samples
Epoch 311/315
Epoch 312/315
Epoch 313/315
Epoch 314/315
Epoch 315/315
52
Train on 10000 samples, validate on 1000 samples
Epoch 316/320
Epoch 317/320
Epoch 318/320
Epoch 319/320
Epoch 320/320
53
Train on 10000 samples, validate on 1000 samples
Epoch 321/325
Epoch 322/325
Epoch 323/325
Epoch 324/325
Epoch 325/325
54
Train on 10000 samples, validate on 1000 samples
Epoch 326/330
Epoch 327/330
Epoch 328/330
Epoch 329/

Epoch 380/380
65
Train on 10000 samples, validate on 1000 samples
Epoch 381/385
Epoch 382/385
Epoch 383/385
Epoch 384/385
Epoch 385/385
66
Train on 10000 samples, validate on 1000 samples
Epoch 386/390
Epoch 387/390
Epoch 388/390
Epoch 389/390
Epoch 390/390
67
Train on 10000 samples, validate on 1000 samples
Epoch 391/395
Epoch 392/395
Epoch 393/395
Epoch 394/395
Epoch 395/395
68
Train on 10000 samples, validate on 1000 samples
Epoch 396/400
Epoch 397/400
Epoch 398/400
Epoch 399/400
Epoch 400/400
69
Train on 10000 samples, validate on 1000 samples
Epoch 401/405
Epoch 402/405
Epoch 403/405
Epoch 404/405
Epoch 405/405
70
Train on 10000 samples, validate on 1000 samples
Epoch 406/410
Epoch 407/410
Epoch 408/410
Epoch 409/410
Epoch 410/410
71
Train on 10000 samples, validate on 1000 samples
Epoch 411/415
Epoch 412/415
Epoch 413/415
Epoch 414/415
Epoch 415/415
72
Train on 10000 samples, validate on 1000 samples
Epoch 416/420
Epoch 417/420
Epoch 418/420
Epoch 419/420
Epoch 420/420
73
Train o

In [None]:
for i in range(80):
    print(i)
    history = training_round(256, history)

In [102]:
test.text[:4]

['oss of St. George, awarded to soldiers for bravery in\naction, and in the company of well-known, elderly, and respected\nracing men was training a trotter of his own for a race. He knew a\nlady on one of the boulevards whom he visited of an evening. He led\nth',
 ' he frowned at his wife, the officers\ngrew still merrier, and some of them could not refrain from\nlaughter, for which they hurriedly sought plausible pretexts. When\nhe had gone, taking his wife with him, and had settled down with her\nin their covered cart,',
 'im that would explain to\nher what had happened and to which she could find no answer.\n\n"Natalie, just a word, only one!" he kept repeating, evidently not\nknowing what to say and he repeated it till Helene came up to them.\n\nHelene returned with Natasha to t',
 "was growing lighter and lighter. That curly\ngrass which always grows by country roadsides became clearly\nvisible, still wet with the night's rain; the drooping branches of the\nbirches, also wet, swa

In [103]:
bytelevel.prediction2str(model256.predict(test.x[:4]))

['      iee.                                         he\nsoee                 ee        ee " er                       tes\naee e                                            e eies. Ie  e     \n                                                                     ',
 '                 hhh           ee ies\nto  ee                          hheeeeeeeee              oe\naer e         ttthhhhhe                                eeiis. The  eeee                                                          e\n\n\n\n\n\ne                     ',
 '                     ind\nhh                  ll nn    thhhhh                    tiee.\n\n"Io                          ie.                               he\n\naeaae                                                                                                 ',
 '                            eee. Hhe  eeeeee                                                          he\npere                 ttthhhh                                       iee\nan                   

In [105]:
bytelevel.prediction2str(model256.predict(test.x[:4]))

[' ee seen.. Ae  ee  eeeeeed oo ceatiies aoe seeaeed oe\nceneie, and on the rereent an tee,-aniees an eeee, and sereen ee\ncotiee aot ane cartinee of eereld ao his ane  os ahtie.. Pe  een e\n\na  eee                                                               ',
 'the eeeeees an his aaie, the serteee,\nten  eeate ceeteee, and aone of the  aeite wad cettinn thnt\nhane iee, fan whith  het has aeite seeiet so seenee coreeens. I\nn  ee wod thaed aating his see                                    e e                         ',
 'et theh shaed aereaee to\nhos waed aod eeteeeed the ie whinh the woted heey oo  oteee.\n\n"Ie teeed aaed a saeed haed hoe?" he want aereeteee, ah eeeted woe\nhen  en that ie wat  otthe serieted ao                                                                ',
 'iee sheeing hos aa  an  tet eee. The eeeen,\nthaed whatd an iee aeeae to ce eiee te eeeeed cerele ao thee\nceseete, aeeet wos waon the wighere feiee the setining te eeee, an eed\nwe thire aoee to         

In [106]:
model256.save('../models/model256.h5')