In [1]:
import sys
sys.path.append("..")

In [2]:
import bytelevel

In [3]:
from keras.preprocessing.sequence import pad_sequences

Using TensorFlow backend.


In [4]:
from sklearn.datasets import fetch_20newsgroups
from pprint import pprint
import numpy as np
import pickle

In [5]:
from keras.layers import Embedding, Conv1D, Input, GRU, LSTM, Bidirectional, Dense, UpSampling1D, Dropout, TimeDistributed, RepeatVector
from keras.models import Model

In [6]:
r = np.random.RandomState(42)

In [7]:
r = np.random.RandomState(42)

def random_chop(s, r, m):
    n = len(s)
    if n <= m:
        return s
    k = r.randint(n - m)
    s = s[k:]
    return s[:m]

In [8]:
train = fetch_20newsgroups(subset='train')
test = fetch_20newsgroups(subset='test')

In [24]:
max_len = 64

def dataset(x):
    x = [random_chop(s, r, max_len + 1) for s in x]
    x = bytelevel.encode(x)
    x = pad_sequences(x, max_len + 1)
    return x

x_train = dataset(train['data'])
x_test = dataset(test['data'])
x_test = x_test[:2000]

In [6]:
class EmbeddingHyper(object):
    def __init__(self, vocab_size, embedding_dim):
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        
    @staticmethod
    def Random(r):
        embedding_dim = r.choice([2 ** i for i in range(6, 10)])
        return EmbeddingHyper(256, embedding_dim)
        
    def display(self):
        print("embedding")
        print("vocab size=%d" % self.vocab_size)
        print("embedding dimension=%d" % self.embedding_dim)
        
    def make_layer(self, name='embedder'):
        return Embedding(self.vocab_size, 
            self.embedding_dim , name=name)
    
class ConvHyper(object):
    def __init__(self, filters, kernel_size=3, stride=2):
        self.filters = filters
        self.kernel_size = kernel_size
        self.stride = stride
        
    @staticmethod
    def Random(r):
        filters = r.choice([2 ** i for i in range(6, 10)])
        kernel_size = r.randint(8) + 2
        stride = r.randint(4) + 1
        return ConvHyper(filters, kernel_size, stride)
        
    def display(self):
        print("conv 1d")
        print("filters=%d" % self.filters)
        print("kernel size=%d" % self.kernel_size)
        print("stride = %d" % self.stride)
        
    def make_layer(self, name):
        return Conv1D(self.filters, self.kernel_size, strides=self.stride, padding='causal', activation='relu', name=name)
    
class RnnHyper(object):
    def __init__(self, hidden_dim, is_lstm, is_bidirectional):
        self.hidden_dim = hidden_dim
        self.is_lstm = is_lstm
        self.is_bidirectional = is_bidirectional
        
    @staticmethod
    def Random(r):
        hidden_dim = r.choice([2 ** i for i in range(6, 10)])
        is_lstm = bool(r.randint(2))
        is_bidirectional = bool(r.randint(2))
        return RnnHyper(hidden_dim, is_lstm, is_bidirectional)

    def display(self):
        print("RNN")
        print("hidden dimension=%d" % self.hidden_dim)
        if self.is_bidirectional:
            print("bidirectional")
        if self.is_lstm:
            print("lstm")
        else:
            print("gru")            
        
    def make_layer(self, name, return_sequences):
        if self.is_lstm:
            make_rnn = LSTM
        else:
            make_rnn = GRU
        if self.is_bidirectional:
            return Bidirectional(make_rnn(self.hidden_dim, return_sequences=return_sequences), name=name)
        return make_rnn(self.hidden_dim, return_sequences=return_sequences, name=name)
    
class RnnCnnHyper(object):
    def __init__(self, embedder, conv, rnn):
        self.embedder = embedder
        self.conv = conv
        self.rnn = rnn
        
    @staticmethod
    def Random(r):
        embedder = EmbeddingHyper.Random(r)
        conv = ConvHyper.Random(r)
        rnn = RnnHyper.Random(r)
        
        return RnnCnnHyper(embedder, conv, rnn)

    def display(self):
        self.embedder.display()
        print()
        self.conv.display()
        print()
        self.rnn.display()
        print()
        
    def make_layers(self, name, return_sequences):
        if name is not None and len(name):
            prefix = '%s_' % name
        else:
            prefix = ''
        embedder = self.embedder.make_layer(name='%sembedder' % prefix)
        conv = self.conv.make_layer(name='%sconv' % prefix)
        rnn = self.rnn.make_layer(name='%srnn' % prefix, return_sequences=return_sequences)
        dense = Dense(self.embedder.vocab_size, activation='softmax', name='%sprobs' % prefix)
        return embedder, conv, rnn, dense
    
class DeconvHyper(object):
    def __init__(self, filters, kernel_size=3, upsample=2):
        self.filters = filters
        self.kernel_size = kernel_size
        self.upsample = upsample
        
    @staticmethod
    def Random(r, upsample=None):
        filters = r.choice([2 ** i for i in range(6, 10)])
        kernel_size = r.randint(8) + 2
        if upsample is None:
            upsample = r.randint(4) + 1
        return DeconvHyper(filters, kernel_size, upsample)
        
    def display(self):
        print("deconv 1d")
        print("filters=%d" % self.filters)
        print("kernel size=%d" % self.kernel_size)
        print("upsample = %d" % self.upsample)
        
    def make_layer(self, name):
        conv = Conv1D(self.filters, self.kernel_size, strides=1, padding='causal', activation='relu', name=name)
        return conv, UpSampling1D(self.upsample)
    

In [11]:
def explore(seed, count, max_len):
    r = np.random.RandomState(seed)
    histories = []
    
    def dataset(x):
        x = [random_chop(s, r, max_len + 1) for s in x]
        x = bytelevel.encode(x)
        x = pad_sequences(x, max_len + 1)
        return x

    x_train = dataset(train['data'])
    x_test = dataset(test['data'])
    x_test = x_test[:2000]

    for i in range(count):
        seed = r.randint(200000)
        name = "RnnCnn%d" % seed
        r_i = np.random.RandomState(seed)
        hyper = RnnCnnHyper.Random(r_i)
        hyper.display()

        embed, conv, rnn, pred = hyper.make_layers('', False)

        x = Input(shape=(max_len,), name='text_input')
        h = embed(x)
        print(h.shape)
        h = conv(h)
        print(h.shape)
        h = rnn(h)
        print(h.shape)
        h = pred(h)
        print(h.shape)

        model = Model(x, h)
        model.compile(optimizer='adam',
            loss='sparse_categorical_crossentropy',
            metrics=['sparse_categorical_accuracy'])

        history = model.fit(x=x_train[:, :max_len], 
                     y=x_train[:, max_len],
                    epochs=3, batch_size=10,
                    validation_data=(x_test[:, :max_len], x_test[:, max_len]))

        histories.append([seed, history])
        
        with open("../models/%s.pkl" % name, 'wb') as f:
            pickle.dump([seed, hyper, history.history], f)
            
        model.save("../models/%s.h5" % name)
        
        print()
        print()
        print()
    
    return histories

In [12]:
histories = explore(42, 30, 50)

embedding
vocab size=256
embedding dimension=256

conv 1d
filters=256
kernel size=3
stride = 4

RNN
hidden dimension=256
lstm

(?, 50, 256)
(?, 12, 256)
(?, 256)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



embedding
vocab size=256
embedding dimension=512

conv 1d
filters=512
kernel size=4
stride = 4

RNN
hidden dimension=512
lstm

(?, 50, 512)
(?, 12, 512)
(?, 512)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



embedding
vocab size=256
embedding dimension=64

conv 1d
filters=256
kernel size=3
stride = 3

RNN
hidden dimension=64
bidirectional
gru

(?, 50, 64)
(?, 16, 256)
(?, 128)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



embedding
vocab size=256
embedding dimension=128

conv 1d
filters=256
kernel size=6
stride = 2

RNN
hidden dimension=64
bidirectional
lstm

(?, 50, 128)
(?, 23, 256)
(?, 128)
(?, 256)
Train on 11314 samples, validate on 2000 sample

Epoch 3/3



embedding
vocab size=256
embedding dimension=256

conv 1d
filters=512
kernel size=6
stride = 2

RNN
hidden dimension=512
bidirectional
lstm

(?, 50, 256)
(?, 23, 512)
(?, 1024)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



embedding
vocab size=256
embedding dimension=512

conv 1d
filters=512
kernel size=5
stride = 4

RNN
hidden dimension=256
bidirectional
gru

(?, 50, 512)
(?, 12, 512)
(?, 512)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



embedding
vocab size=256
embedding dimension=64

conv 1d
filters=256
kernel size=2
stride = 2

RNN
hidden dimension=256
lstm

(?, 50, 64)
(?, 25, 256)
(?, 256)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



embedding
vocab size=256
embedding dimension=256

conv 1d
filters=256
kernel size=2
stride = 4

RNN
hidden dimension=128
lstm

(?, 50, 256)
(?, 13, 256)
(?, 128)
(?, 256)
Train on 11314 samples, validat

In [13]:
histories[0][1].history

{'val_loss': [3.3535089254379273, 3.280325162410736, 3.2470242130756377],
 'val_sparse_categorical_accuracy': [0.15000000320374965,
  0.19150000415742396,
  0.19500000398606063],
 'loss': [3.4505941521179473, 3.296464189531442, 3.1968547680446657],
 'sparse_categorical_accuracy': [0.1695244864817325,
  0.17968888495453933,
  0.19895704868447978]}

In [14]:
val_acc = lambda x: x[1].history['val_sparse_categorical_accuracy'][-1]

In [16]:
histories.sort(key=val_acc, reverse=True)

In [17]:
[val_acc(x) for x in histories]

[0.3435000073164701,
 0.3425000065565109,
 0.33200000803917645,
 0.3285000066459179,
 0.3250000074505806,
 0.3230000077188015,
 0.3230000064894557,
 0.323000006377697,
 0.3175000072270632,
 0.3165000065788627,
 0.31350000761449337,
 0.3080000070855021,
 0.3080000065267086,
 0.307500006891787,
 0.305000006519258,
 0.30250000681728123,
 0.2980000066012144,
 0.2505000047758222,
 0.23150000479072333,
 0.22750000536441803,
 0.2275000049546361,
 0.22450000502169132,
 0.2230000051110983,
 0.21600000452250243,
 0.204000004529953,
 0.20300000436604024,
 0.19500000398606063,
 0.19000000402331352,
 0.18750000394880773,
 0.18250000409781933]

In [21]:
def ReHyper(seed):
    r = np.random.RandomState(seed)
    hyper = RnnCnnHyper.Random(r)
    return hyper, r

In [19]:
hyper, _ =ReHyper(histories[0][0])

In [20]:
for seed, _ in histories[:6]:
    hyper, _ = ReHyper(seed)
    print(seed)
    hyper.display()

154351
embedding
vocab size=256
embedding dimension=256

conv 1d
filters=512
kernel size=6
stride = 2

RNN
hidden dimension=512
bidirectional
lstm

150338
embedding
vocab size=256
embedding dimension=128

conv 1d
filters=128
kernel size=5
stride = 1

RNN
hidden dimension=128
bidirectional
gru

17970
embedding
vocab size=256
embedding dimension=256

conv 1d
filters=128
kernel size=2
stride = 2

RNN
hidden dimension=64
bidirectional
gru

154443
embedding
vocab size=256
embedding dimension=128

conv 1d
filters=512
kernel size=6
stride = 4

RNN
hidden dimension=256
bidirectional
gru

27315
embedding
vocab size=256
embedding dimension=128

conv 1d
filters=512
kernel size=8
stride = 3

RNN
hidden dimension=256
bidirectional
gru

62245
embedding
vocab size=256
embedding dimension=512

conv 1d
filters=128
kernel size=2
stride = 4

RNN
hidden dimension=128
gru



In [156]:
x = Input(shape=(int(max_len / 4), 16), name='seq_input')
up = UpSampling1D(size=4)
h = up(x)
deconv = Conv1D(16, 4, strides=2, name='deconv', padding='causal')
h = deconv(h)

In [157]:
h.shape

TensorShape([Dimension(None), Dimension(128), Dimension(16)])

In [137]:
foo = r.rand(2, 64, 16)

In [145]:
model = Model(x, up(x))

In [146]:
bar = model.predict(foo)

In [147]:
bar.shape

(2, 256, 16)

In [144]:
embed_h = EmbeddingHyper(256, 256)
conv_h = ConvHyper(512, 6, 2)
rnn_h = RnnHyper(512, is_lstm=True, is_bidirectional=True)

In [145]:
hyper = RnnCnnHyper(embed_h, conv_h, rnn_h)

In [146]:
max_len = 32

def dataset(x):
    x = [random_chop(s, r, max_len + 1) for s in x]
    x = bytelevel.encode(x)
    x = pad_sequences(x, max_len + 1)
    return x

x_train = dataset(train['data'])
x_test = dataset(test['data'])
x_test = x_test[:2000]

# first with no dropout
embed, conv0, rnn, pred = hyper.make_layers('', False)


x = Input(shape=(None,), name='text_input')
h = embed(x)
print(h.shape)
h = conv0(h)
print(h.shape)
h = rnn(h)
print(h.shape)
h = pred(h)
print(h.shape)

model = Model(x, h)
model.compile(optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['sparse_categorical_accuracy'])

history = model.fit(x=x_train[:, :max_len], 
            y=x_train[:, max_len],
            epochs=5, batch_size=100,
            validation_data=(x_test[:, :max_len], x_test[:, max_len]))


(?, ?, 256)
(?, ?, 512)
(?, 1024)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [147]:
# refresh data
max_len = 32

x_train = dataset(train['data'])
x_test = dataset(test['data'])
x_test = x_test[:2000]

# add a bit of dropout
drop = Dropout(0.2)

h = embed(x)
print(h.shape)
h = conv0(h)
print(h.shape)
h = drop(h)
print(h.shape)
h = rnn(h)
print(h.shape)
h = pred(h)
print(h.shape)

model = Model(x, h)
model.compile(optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['sparse_categorical_accuracy'])

history = model.fit(x=x_train[:, :max_len], 
            y=x_train[:, max_len],
            epochs=5, batch_size=100,
            validation_data=(x_test[:, :max_len], x_test[:, max_len]))



(?, ?, 256)
(?, ?, 512)
(?, ?, 512)
(?, 1024)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [148]:
# get some data from longer strings
max_len = 64

x_train = dataset(train['data'])
x_test = dataset(test['data'])
x_test = x_test[:2000]

# add another convolutional layer
conv1 = conv_h.make_layer('')

h = embed(x)
print(h.shape)
h = conv0(h)
print(h.shape)
h = drop(h)
print(h.shape)
h = conv1(h)
print(h.shape)
h = drop(h)
print(h.shape)
h = rnn(h)
print(h.shape)
h = pred(h)
print(h.shape)

model = Model(x, h)
model.compile(optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['sparse_categorical_accuracy'])

history = model.fit(x=x_train[:, :max_len], 
            y=x_train[:, max_len],
            epochs=3, batch_size=100,
            validation_data=(x_test[:, :max_len], x_test[:, max_len]))


(?, ?, 256)
(?, ?, 512)
(?, ?, 512)
(?, ?, 512)
(?, ?, 512)
(?, 1024)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


In [161]:
def retrain(max_len):
    def dataset(x):
        x = [random_chop(s, r, max_len + 1) for s in x]
        x = bytelevel.encode(x)
        x = pad_sequences(x, max_len + 1)
        return x
    x_train = dataset(train['data'])
    x_test = dataset(test['data'])
    x_test = x_test[:2000]

    history = model.fit(x=x_train[:, :max_len], 
                y=x_train[:, max_len],
                epochs=3, batch_size=40,
                validation_data=(x_test[:, :max_len], x_test[:, max_len]))


In [162]:
x_test[:5, max_len]

array([115, 102,  48, 102,  97], dtype=int32)

In [163]:
x_test[:5, max_len-5:max_len]

array([[104, 101,  32,  98, 101],
       [ 98, 117, 116,  10,  97],
       [115,  32, 118,  49,  46],
       [ 32, 115, 116, 117, 102],
       [ 40,  78, 111, 114, 109]], dtype=int32)

In [164]:
model.predict(x_test[:5, :max_len]).argmax(axis=-1)

array([32, 32, 32, 32, 32])

In [153]:
# convolve a couple more times

h = embed(x)
print(h.shape)
h = conv0(h)
print(h.shape)
h = drop(h)
print(h.shape)
h = conv1(h)
print(h.shape)
h = drop(h)
print(h.shape)
h = conv1(h)
print(h.shape)
h = drop(h)
print(h.shape)
h = conv1(h)
print(h.shape)
h = drop(h)
print(h.shape)
h = rnn(h)
print(h.shape)
h = pred(h)
print(h.shape)

model = Model(x, h)
model.compile(optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['sparse_categorical_accuracy'])


(?, ?, 256)
(?, ?, 512)
(?, ?, 512)
(?, ?, 512)
(?, ?, 512)
(?, ?, 512)
(?, ?, 512)
(?, ?, 512)
(?, ?, 512)
(?, 1024)
(?, 256)


In [165]:
for i in range(10):
    print(i)
    retrain(32)
    print()

0
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

1
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

2
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

3
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

4
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

5
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

6
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

7
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

8
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

9
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



In [166]:
for i in range(10):
    print(i)
    retrain(64)
    print()

0
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

1
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

2
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

3
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

4
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

5
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

6
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

7
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

8
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

9
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



In [167]:
for i in range(10):
    print(i)
    retrain(128)
    print()

0
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

1
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

2
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

3
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

4
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

5
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

6
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

7
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

8
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

9
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



Variable             Type              Data/Info
------------------------------------------------
Bidirectional        type              <class 'keras.layers.wrappers.Bidirectional'>
Conv1D               type              <class 'keras.layers.convolutional.Conv1D'>
ConvHyper            type              <class '__main__.ConvHyper'>
DeconvHyper          type              <class '__main__.DeconvHyper'>
Dense                type              <class 'keras.layers.core.Dense'>
Dropout              type              <class 'keras.layers.core.Dropout'>
Embedding            type              <class 'keras.layers.embeddings.Embedding'>
EmbeddingHyper       type              <class '__main__.EmbeddingHyper'>
GRU                  type              <class 'keras.layers.recurrent.GRU'>
Input                function          <function Input at 0x7f34fa45cc80>
LSTM                 type              <class 'keras.layers.recurrent.LSTM'>
Model                type              <class 'keras.engine.train

In [23]:
embed_h = EmbeddingHyper(256, 128)
conv_h = ConvHyper(128, 6, 4)
rnn_h = RnnHyper(128, is_lstm=False, is_bidirectional=False)
hyper = RnnCnnHyper(embed_h, conv_h, rnn_h)
dec_h = DeconvHyper(128, 6, 4)

In [24]:
dec, up = dec_h.make_layer('')

In [25]:
emb, con, rnn, den = hyper.make_layers('', return_sequences=False)
dernn = GRU(128, return_sequences=True, unroll=True)

In [26]:
tden = TimeDistributed(den)

In [27]:
x = Input(shape=(max_len,), name='text_input')
h = emb(x)
print(h.shape)
h = con(h)
print(h.shape)
h = rnn(h)
print(h.shape)
h = RepeatVector(16)(h)
print(h.shape)
h = dernn(h)
print(h.shape)
h = up(h)
print(h.shape)
h = dec(h)
print(h.shape)
h = tden(h)
print(h.shape)


(?, 64, 128)
(?, 16, 128)
(?, 128)
(?, 16, 128)
(?, 16, 128)
(?, 64, 128)
(?, 64, 128)
(?, 64, 256)


In [29]:
model = Model(x, h)
model.compile(optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['categorical_accuracy'])

In [7]:
with open('../data/norvig/big.txt', 'r') as f:
    big = f.read()

In [8]:
big[:1000]

'The Project Gutenberg EBook of The Adventures of Sherlock Holmes\nby Sir Arthur Conan Doyle\n(#15 in our series by Sir Arthur Conan Doyle)\n\nCopyright laws are changing all over the world. Be sure to check the\ncopyright laws for your country before downloading or redistributing\nthis or any other Project Gutenberg eBook.\n\nThis header should be the first thing seen when viewing this Project\nGutenberg file.  Please do not remove it.  Do not change or edit the\nheader without written permission.\n\nPlease read the "legal small print," and other information about the\neBook and Project Gutenberg at the bottom of this file.  Included is\nimportant information about your specific rights and restrictions in\nhow the file may be used.  You can also find out about how to make a\ndonation to Project Gutenberg, and how to get involved.\n\n\n**Welcome To The World of Free Plain Vanilla Electronic Texts**\n\n**eBooks Readable By Both Humans and By Computers, Since 1971**\n\n*****These eBooks 

In [9]:
len(big)

6488666

In [10]:
big_chunks = [big[i:i+100000] for i in range(0, len(big), 100000)]

In [11]:
len(big_chunks)

65

In [12]:
r = np.random.RandomState(42)

In [13]:
r.shuffle(big_chunks)

In [14]:
big_train = ''.join(big_chunks[:55])

In [15]:
big_test = ''.join(big_chunks[55:])

In [19]:
max_len = 64

In [36]:
def big_data(max_len, n, r):
    def random_slice(data):
        i = r.randint(len(data) - max_len)
        return data[i : i + max_len]
    train = [random_slice(big_train) for _ in range(n)]
    test = [random_slice(big_test) for _ in range(int(0.1 * n))]
    return train, test

In [37]:
class AutoData(object):
    def __init__(self, text, max_len):
        self.text = text
        self.max_len = max_len
        x = bytelevel.encode(text)
        self.x = pad_sequences(x, max_len)
        self.y = bytelevel.onehot(self.x)
        
    @staticmethod
    def Random(max_len, n, r):
        train, test = big_data(max_len, n, r)
        return AutoData(train, max_len), AutoData(test, max_len)

In [38]:
train, test = AutoData.Random(max_len, 10000, r)

In [39]:
history = model.fit(x=train.x, 
             y=train.y,
            epochs=1, batch_size=10,
            validation_data=(test.x, test.y))

Train on 10000 samples, validate on 1000 samples
Epoch 1/1


In [31]:
train.x.shape

(10000, 65)

In [40]:
for i in range(10):
    print(i)
    train, test = AutoData.Random(max_len, 10000, r)
    history = model.fit(x=train.x, 
            y=train.y,
            epochs=10, batch_size=10,
            validation_data=(test.x, test.y))
    print()

0
Train on 10000 samples, validate on 1000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

1
Train on 10000 samples, validate on 1000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

2
Train on 10000 samples, validate on 1000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

3
Train on 10000 samples, validate on 1000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

4
Train on 10000 samples, validate on 1000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

5
Train on 10000 samples, validate on 1000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

6
Train on 10000 sampl

Epoch 9/10
Epoch 10/10



In [42]:
bytelevel.prediction2str(model.predict(test.x[:4]))

['s love had\nended.  he did not need an  fang if shas sand. He aee',
 'a parade dons not betin tilt all the droose ars aleebbled," sho ',
 ' promoter, began to interess him sa salaly that theequustion tth',
 'st to that grandhuv, the\nsispalss pateeven terdeeness. His aft e']

In [43]:
test.text[:4]

['s love had\nended. "He did not need anything of that kind. He nei',
 'a parade does not begin till all the troops are\nassembled," said',
 ' promoter,\nbegan to interest him so keenly that the question of ',
 'st to that grandeur, the\nsimplest paternal tenderness. His eyes ']

In [1]:
from keras.optimizers import Adam

In [4]:
Adam?