In [1]:
import sys
sys.path.append("..")

In [2]:
from keras.models import Model

Using TensorFlow backend.


In [3]:
from sklearn.datasets import fetch_20newsgroups
from pprint import pprint
import numpy as np
from numpy.linalg import norm
import pickle

In [4]:
from hyper_params import *
import text_encoder as te
import text_decoder as td
from data_set import *

In [5]:
import bowizer
import tfidf

In [6]:
with open('../data/norvig/big.txt') as f:
    norvig = f.read()

In [7]:
chunks = make_chunks(norvig)

In [8]:
r = np.random.RandomState(42)

In [9]:
r.shuffle(chunks)
train_text, test_text = make_train_test(chunks)

In [10]:
tm = bowizer.TokenMaker([norvig], 3000)

In [11]:
embed_h = EmbeddingHyper(tm.vocab_size + 1, 256)
conv_h = ConvHyper(256, 6, 4)
rnn_h = RnnHyper(512, is_lstm=False, is_bidirectional=True, return_sequences=False)
encoder_h = te.Hyper(embed_h, [conv_h, rnn_h])

In [12]:
dernn_h = RnnHyper(512, is_lstm=False, is_bidirectional=False, return_sequences=True, unroll=True)
dec_h = DeconvHyper(256, 6, 4)
decoder_h = td.Hyper(tm.vocab_size + 1, [dernn_h, dec_h])

In [13]:
encoder = encoder_h.make_layer()
decoder = decoder_h.make_layer()

In [14]:
def make_model(max_len):
    x = Input(shape=(max_len,), name='text_input')
    h = encoder(x)
    h = decoder(h, max_len)
    model = Model(x, h)
    model.compile(optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['categorical_accuracy'])

    return model

In [15]:
model64, model128, model256 = make_model(64), make_model(128), make_model(256)

In [16]:
models = {64: model64, 128: model128, 256: model256 }

In [17]:
def training_round(max_len, history=None):
    if history is None:
        initial_epoch = 0
    else:
        initial_epoch = len(history['loss'])
    train, test = bowizer.SlicedWordData.Random(train_text, test_text, 2000, max_len, 10000, r, tm)
    model = models[max_len]
    newhistory = model.fit(x=train.x, y=train.y,
                            epochs=initial_epoch+5, batch_size=16,
                            validation_data=(test.x, test.y),
                            initial_epoch=initial_epoch)
    if history is None:
        history = newhistory.history
    else:
        history = {key:history[key] + newhistory.history[key] for key in history.keys()}
    return history

In [18]:
history = None

In [19]:
for i in range(20):
    print(i)
    history = training_round(64, history)

0
Train on 10000 samples, validate on 1000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
1
Train on 10000 samples, validate on 1000 samples
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
2
Train on 10000 samples, validate on 1000 samples
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
3
Train on 10000 samples, validate on 1000 samples
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
4
Train on 10000 samples, validate on 1000 samples
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
5
Train on 10000 samples, validate on 1000 samples
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
6
Train on 10000 samples, validate on 1000 samples
Epoch 31/35
Epoch 32/35
Epoch 33/35
Epoch 34/35
Epoch 35/35
7
Train on 10000 samples, validate on 1000 samples
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
8
Train on 10000 samples, validate on 1000 samples
Epoch 41/45
Epoch 42/45
Epoch 43/45
Epoch 44/45
Epoch 45/45
9
Train on 1000

Epoch 48/50
Epoch 49/50
Epoch 50/50
10
Train on 10000 samples, validate on 1000 samples
Epoch 51/55
Epoch 52/55
Epoch 53/55
Epoch 54/55
Epoch 55/55
11
Train on 10000 samples, validate on 1000 samples
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60
12
Train on 10000 samples, validate on 1000 samples
Epoch 61/65
Epoch 62/65
Epoch 63/65
Epoch 64/65
Epoch 65/65
13
Train on 10000 samples, validate on 1000 samples
Epoch 66/70
Epoch 67/70
Epoch 68/70
Epoch 69/70
Epoch 70/70
14
Train on 10000 samples, validate on 1000 samples
Epoch 71/75
Epoch 72/75
Epoch 73/75
Epoch 74/75
Epoch 75/75
15
Train on 10000 samples, validate on 1000 samples
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80
16
Train on 10000 samples, validate on 1000 samples
Epoch 81/85
Epoch 82/85
Epoch 83/85
Epoch 84/85
Epoch 85/85
17
Train on 10000 samples, validate on 1000 samples
Epoch 86/90
Epoch 87/90
Epoch 88/90
Epoch 89/90
Epoch 90/90
18
Train on 10000 samples, validate on 1000 samples
Epoch 91/95
Epoc

Epoch 95/95
19
Train on 10000 samples, validate on 1000 samples
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [20]:
history = None
for i in range(20):
    print(i)
    history = training_round(128, history)

0
Train on 10000 samples, validate on 1000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
1
Train on 10000 samples, validate on 1000 samples
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
2
Train on 10000 samples, validate on 1000 samples
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
3
Train on 10000 samples, validate on 1000 samples
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
4
Train on 10000 samples, validate on 1000 samples
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
5
Train on 10000 samples, validate on 1000 samples
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
6
Train on 10000 samples, validate on 1000 samples
Epoch 31/35
Epoch 32/35
Epoch 33/35
Epoch 34/35
Epoch 35/35
7
Train on 10000 samples, validate on 1000 samples
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
8
Train on 10000 samples, validate on 1000 samples
Epoch 41/45
Epoch 42/45
Epoch 43/45
Epoch 44/45
Epoch 45/45
9
Train on 1000

Epoch 48/50
Epoch 49/50
Epoch 50/50
10
Train on 10000 samples, validate on 1000 samples
Epoch 51/55
Epoch 52/55
Epoch 53/55
Epoch 54/55
Epoch 55/55
11
Train on 10000 samples, validate on 1000 samples
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60
12
Train on 10000 samples, validate on 1000 samples
Epoch 61/65
Epoch 62/65
Epoch 63/65
Epoch 64/65
Epoch 65/65
13
Train on 10000 samples, validate on 1000 samples
Epoch 66/70
Epoch 67/70
Epoch 68/70
Epoch 69/70
Epoch 70/70
14
Train on 10000 samples, validate on 1000 samples
Epoch 71/75
Epoch 72/75
Epoch 73/75
Epoch 74/75
Epoch 75/75
15
Train on 10000 samples, validate on 1000 samples
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80
16
Train on 10000 samples, validate on 1000 samples
Epoch 81/85
Epoch 82/85
Epoch 83/85
Epoch 84/85
Epoch 85/85
17
Train on 10000 samples, validate on 1000 samples
Epoch 86/90
Epoch 87/90
Epoch 88/90
Epoch 89/90
Epoch 90/90
18
Train on 10000 samples, validate on 1000 samples
Epoch 91/95
Epoc

Epoch 95/95
19
Train on 10000 samples, validate on 1000 samples
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [21]:
history = None
for i in range(20):
    print(i)
    history = training_round(64, history)

0
Train on 10000 samples, validate on 1000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
1
Train on 10000 samples, validate on 1000 samples
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
2
Train on 10000 samples, validate on 1000 samples
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
3
Train on 10000 samples, validate on 1000 samples
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
4
Train on 10000 samples, validate on 1000 samples
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
5
Train on 10000 samples, validate on 1000 samples
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
6
Train on 10000 samples, validate on 1000 samples
Epoch 31/35
Epoch 32/35
Epoch 33/35
Epoch 34/35
Epoch 35/35
7
Train on 10000 samples, validate on 1000 samples
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
8
Train on 10000 samples, validate on 1000 samples
Epoch 41/45
Epoch 42/45
Epoch 43/45
Epoch 44/45
Epoch 45/45
9
Train on 1000

Epoch 48/50
Epoch 49/50
Epoch 50/50
10
Train on 10000 samples, validate on 1000 samples
Epoch 51/55
Epoch 52/55
Epoch 53/55
Epoch 54/55
Epoch 55/55
11
Train on 10000 samples, validate on 1000 samples
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60
12
Train on 10000 samples, validate on 1000 samples
Epoch 61/65
Epoch 62/65
Epoch 63/65
Epoch 64/65
Epoch 65/65
13
Train on 10000 samples, validate on 1000 samples
Epoch 66/70
Epoch 67/70
Epoch 68/70
Epoch 69/70
Epoch 70/70
14
Train on 10000 samples, validate on 1000 samples
Epoch 71/75
Epoch 72/75
Epoch 73/75
Epoch 74/75
Epoch 75/75
15
Train on 10000 samples, validate on 1000 samples
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80
16
Train on 10000 samples, validate on 1000 samples
Epoch 81/85
Epoch 82/85
Epoch 83/85
Epoch 84/85
Epoch 85/85
17
Train on 10000 samples, validate on 1000 samples
Epoch 86/90
Epoch 87/90
Epoch 88/90
Epoch 89/90
Epoch 90/90
18
Train on 10000 samples, validate on 1000 samples
Epoch 91/95
Epoc

Epoch 95/95
19
Train on 10000 samples, validate on 1000 samples
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [22]:
model64.save('../models/wl_model64')