In [1]:
import sys
sys.path.append("..")

In [2]:
import bytelevel

In [3]:
from keras.preprocessing.sequence import pad_sequences

Using TensorFlow backend.


In [4]:
from sklearn.datasets import fetch_20newsgroups
from pprint import pprint
import numpy as np

In [5]:
from keras.layers import Embedding, Conv1D, Input, GRU, LSTM, Bidirectional, Dense
from keras.models import Model

In [6]:
r = np.random.RandomState(42)

In [7]:
r = np.random.RandomState(42)

def random_chop(s, r, m):
    n = len(s)
    if n <= m:
        return s
    k = r.randint(n - m)
    s = s[k:]
    return s[:m]

In [8]:
train = fetch_20newsgroups(subset='train')
test = fetch_20newsgroups(subset='test')

In [9]:
max_len = 50

def dataset(x):
    x = [random_chop(s, r, max_len + 1) for s in x]
    x = bytelevel.encode(x)
    x = pad_sequences(x, max_len + 1)
    return x

x_train = dataset(train['data'])
x_test = dataset(test['data'])
x_test = x_test[:2000]

In [33]:
class EmbeddingHyper(object):
    def __init__(self, vocab_size, embedding_dim):
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        
    @staticmethod
    def Random(r):
        embedding_dim = r.choice([2 ** i for i in range(4, 10)])
        return EmbeddingHyper(256, embedding_dim)
        
    def display(self):
        print("embedding")
        print("vocab size=%d" % self.vocab_size)
        print("embedding dimension=%d" % self.embedding_dim)
        
    def make_layer(self, name='embedder'):
        return Embedding(self.vocab_size, 
            self.embedding_dim , name=name)
    
class ConvHyper(object):
    def __init__(self, filters, kernel_size=3, stride=2):
        self.filters = filters
        self.kernel_size = kernel_size
        self.stride = stride
        
    @staticmethod
    def Random(r):
        filters = r.choice([2 ** i for i in range(6, 10)])
        kernel_size = r.randint(4) + 2
        stride = r.randint(4) + 1
        return ConvHyper(filters, kernel_size, stride)
        
    def display(self):
        print("conv 1d")
        print("filters=%d" % self.filters)
        print("kernel size=%d" % self.kernel_size)
        print("stride = %d" % self.stride)
        
    def make_layer(self, name):
        return Conv1D(64, 4, strides=2, activation='relu', name=name)
    
class RnnHyper(object):
    def __init__(self, hidden_dim, is_lstm, is_bidirectional):
        self.hidden_dim = hidden_dim
        self.is_lstm = is_lstm
        self.is_bidirectional = is_bidirectional
        
    @staticmethod
    def Random(r):
        hidden_dim = r.choice([2 ** i for i in range(6, 10)])
        is_lstm = bool(r.randint(2))
        is_bidirectional = bool(r.randint(2))
        return RnnHyper(hidden_dim, is_lstm, is_bidirectional)

    def display(self):
        print("RNN")
        print("hidden dimension=%d" % self.hidden_dim)
        if self.is_bidirectional:
            print("bidirectional")
        if self.is_lstm:
            print("lstm")
        else:
            print("gru")            
        
    def make_layer(self, name, return_sequences):
        if self.is_lstm:
            make_rnn = LSTM
        else:
            make_rnn = GRU
        if self.is_bidirectional:
            return Bidirectional(make_rnn(self.hidden_dim, return_sequences=return_sequences), name=name)
        return make_rnn(self.hidden_dim, return_sequences=return_sequences, name=name)
    

In [16]:
embedd_h = EmbeddingHyper(256, 64)
conv_h = ConvHyper(512)
rnn_h = RnnHyper(512, is_lstm=True, is_bidirectional=True)

embedd_h.display()
print()
conv_h.display()
print()
rnn_h.display()
print()

embed = embedd_h.make_layer()
c1d0 = conv_h.make_layer('conv0')
rnn = rnn_h.make_layer('rnn', return_sequences=False)
pred = Dense(256, activation='softmax')

embedding
vocab size=256
embedding dimension=64

conv 1d
filters=512
kernel size=3
stride = 2

RNN
hidden dimension=512
bidirectional
lstm



In [12]:
x = Input(shape=(None,), name='text_input')
h = embed(x)
print(h.shape)
h = c1d0(h)
print(h.shape)
h = rnn(h)
print(h.shape)
h = pred(h)
print(h.shape)

model = Model(x, h)
model.compile(optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['sparse_categorical_accuracy'])


(?, ?, 64)
(?, ?, 64)
(?, 1024)
(?, 256)


In [14]:
model.fit(x=x_train[:, :max_len], 
                 y=x_train[:, max_len],
                epochs=10, batch_size=10,
                validation_data=(x_test[:, :max_len], x_test[:, max_len]))

Train on 11314 samples, validate on 2000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fc09f0abda0>

In [39]:
r = np.random.RandomState(42)

for _ in range(20):
    embedd_h = EmbeddingHyper.Random(r)
    embedd_h.display()
    print()

    conv_h = ConvHyper.Random(r)
    conv_h.display()
    print()

    rnn_h = RnnHyper.Random(r)
    rnn_h.display()
    print()

    embed = embedd_h.make_layer()
    c1d0 = conv_h.make_layer('conv0')
    rnn = rnn_h.make_layer('rnn', return_sequences=False)
    pred = Dense(256, activation='softmax')
    
    x = Input(shape=(None,), name='text_input')
    h = embed(x)
    print(h.shape)
    h = c1d0(h)
    print(h.shape)
    h = rnn(h)
    print(h.shape)
    h = pred(h)
    print(h.shape)

    model = Model(x, h)
    model.compile(optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['sparse_categorical_accuracy'])

    model.fit(x=x_train[:, :max_len], 
                 y=x_train[:, max_len],
                epochs=3, batch_size=10,
                validation_data=(x_test[:, :max_len], x_test[:, max_len]))
    
    print()
    print()
    print()
    

embedding
vocab size=256
embedding dimension=128

conv 1d
filters=64
kernel size=4
stride = 3

RNN
hidden dimension=512
gru

(?, ?, 128)
(?, ?, 64)
(?, 512)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



embedding
vocab size=256
embedding dimension=32

conv 1d
filters=256
kernel size=4
stride = 3

RNN
hidden dimension=256
lstm

(?, ?, 32)
(?, ?, 64)
(?, 256)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



embedding
vocab size=256
embedding dimension=128

conv 1d
filters=512
kernel size=5
stride = 3

RNN
hidden dimension=128
bidirectional
gru

(?, ?, 128)
(?, ?, 64)
(?, 256)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



embedding
vocab size=256
embedding dimension=128

conv 1d
filters=128
kernel size=3
stride = 2

RNN
hidden dimension=512
lstm

(?, ?, 128)
(?, ?, 64)
(?, 512)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoc




embedding
vocab size=256
embedding dimension=128

conv 1d
filters=64
kernel size=5
stride = 3

RNN
hidden dimension=256
lstm

(?, ?, 128)
(?, ?, 64)
(?, 256)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



embedding
vocab size=256
embedding dimension=128

conv 1d
filters=128
kernel size=5
stride = 4

RNN
hidden dimension=128
bidirectional
lstm

(?, ?, 128)
(?, ?, 64)
(?, 256)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



embedding
vocab size=256
embedding dimension=512

conv 1d
filters=128
kernel size=5
stride = 2

RNN
hidden dimension=64
bidirectional
gru

(?, ?, 512)
(?, ?, 64)
(?, 128)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



embedding
vocab size=256
embedding dimension=32

conv 1d
filters=512
kernel size=3
stride = 2

RNN
hidden dimension=128
bidirectional
lstm

(?, ?, 32)
(?, ?, 64)
(?, 256)
(?, 256)
Train on 11314 samples, validate on 2000 s

In [43]:
class EmbeddingHyper(object):
    def __init__(self, vocab_size, embedding_dim):
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        
    @staticmethod
    def Random(r):
        embedding_dim = r.choice([2 ** i for i in range(7, 9)])
        return EmbeddingHyper(256, embedding_dim)
        
    def display(self):
        print("embedding")
        print("vocab size=%d" % self.vocab_size)
        print("embedding dimension=%d" % self.embedding_dim)
        
    def make_layer(self, name='embedder'):
        return Embedding(self.vocab_size, 
            self.embedding_dim , name=name)
    
class ConvHyper(object):
    def __init__(self, filters, kernel_size=3, stride=2):
        self.filters = filters
        self.kernel_size = kernel_size
        self.stride = stride
        
    @staticmethod
    def Random(r):
        filters = r.choice([2 ** i for i in range(6, 10)])
        kernel_size = r.randint(4) + 4
        stride = r.randint(4) + 3
        return ConvHyper(filters, kernel_size, stride)
        
    def display(self):
        print("conv 1d")
        print("filters=%d" % self.filters)
        print("kernel size=%d" % self.kernel_size)
        print("stride = %d" % self.stride)
        
    def make_layer(self, name):
        return Conv1D(64, 4, strides=2, activation='relu', name=name)
    
class RnnHyper(object):
    def __init__(self, hidden_dim, is_lstm, is_bidirectional):
        self.hidden_dim = hidden_dim
        self.is_lstm = is_lstm
        self.is_bidirectional = is_bidirectional
        
    @staticmethod
    def Random(r):
        hidden_dim = r.choice([2 ** i for i in range(6, 8)])
        is_lstm = bool(r.randint(2))
        is_bidirectional = bool(r.randint(2))
        return RnnHyper(hidden_dim, is_lstm, is_bidirectional)

    def display(self):
        print("RNN")
        print("hidden dimension=%d" % self.hidden_dim)
        if self.is_bidirectional:
            print("bidirectional")
        if self.is_lstm:
            print("lstm")
        else:
            print("gru")            
        
    def make_layer(self, name, return_sequences):
        if self.is_lstm:
            make_rnn = LSTM
        else:
            make_rnn = GRU
        if self.is_bidirectional:
            return Bidirectional(make_rnn(self.hidden_dim, return_sequences=return_sequences), name=name)
        return make_rnn(self.hidden_dim, return_sequences=return_sequences, name=name)
    


In [44]:
r = np.random.RandomState(43)

for _ in range(20):
    embedd_h = EmbeddingHyper.Random(r)
    embedd_h.display()
    print()

    conv_h = ConvHyper.Random(r)
    conv_h.display()
    print()

    rnn_h = RnnHyper.Random(r)
    rnn_h.display()
    print()

    embed = embedd_h.make_layer()
    c1d0 = conv_h.make_layer('conv0')
    rnn = rnn_h.make_layer('rnn', return_sequences=False)
    pred = Dense(256, activation='softmax')
    
    x = Input(shape=(None,), name='text_input')
    h = embed(x)
    print(h.shape)
    h = c1d0(h)
    print(h.shape)
    h = rnn(h)
    print(h.shape)
    h = pred(h)
    print(h.shape)

    model = Model(x, h)
    model.compile(optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['sparse_categorical_accuracy'])

    model.fit(x=x_train[:, :max_len], 
                 y=x_train[:, max_len],
                epochs=3, batch_size=10,
                validation_data=(x_test[:, :max_len], x_test[:, max_len]))
    
    print()
    print()
    print()
    

embedding
vocab size=256
embedding dimension=64

conv 1d
filters=64
kernel size=7
stride = 4

RNN
hidden dimension=128
gru

(?, ?, 64)
(?, ?, 64)
(?, 128)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



embedding
vocab size=256
embedding dimension=128

conv 1d
filters=512
kernel size=7
stride = 5

RNN
hidden dimension=64
bidirectional
gru

(?, ?, 128)
(?, ?, 64)
(?, 128)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



embedding
vocab size=256
embedding dimension=64

conv 1d
filters=512
kernel size=7
stride = 5

RNN
hidden dimension=128
bidirectional
lstm

(?, ?, 64)
(?, ?, 64)
(?, 256)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



embedding
vocab size=256
embedding dimension=128

conv 1d
filters=512
kernel size=6
stride = 6

RNN
hidden dimension=64
bidirectional
lstm

(?, ?, 128)
(?, ?, 64)
(?, 128)
(?, 256)
Train on 11314 samples, validate on 2000 samples





embedding
vocab size=256
embedding dimension=256

conv 1d
filters=256
kernel size=6
stride = 3

RNN
hidden dimension=64
bidirectional
lstm

(?, ?, 256)
(?, ?, 64)
(?, 128)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



embedding
vocab size=256
embedding dimension=256

conv 1d
filters=64
kernel size=7
stride = 5

RNN
hidden dimension=128
bidirectional
gru

(?, ?, 256)
(?, ?, 64)
(?, 256)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



embedding
vocab size=256
embedding dimension=64

conv 1d
filters=128
kernel size=6
stride = 6

RNN
hidden dimension=128
lstm

(?, ?, 64)
(?, ?, 64)
(?, 128)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



embedding
vocab size=256
embedding dimension=256

conv 1d
filters=128
kernel size=7
stride = 5

RNN
hidden dimension=128
gru

(?, ?, 256)
(?, ?, 64)
(?, 128)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/

In [45]:
class EmbeddingHyper(object):
    def __init__(self, vocab_size, embedding_dim):
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        
    @staticmethod
    def Random(r):
        embedding_dim = r.choice([2 ** i for i in range(7, 9)])
        return EmbeddingHyper(256, embedding_dim)
        
    def display(self):
        print("embedding")
        print("vocab size=%d" % self.vocab_size)
        print("embedding dimension=%d" % self.embedding_dim)
        
    def make_layer(self, name='embedder'):
        return Embedding(self.vocab_size, 
            self.embedding_dim , name=name)
    
class ConvHyper(object):
    def __init__(self, filters, kernel_size=3, stride=2):
        self.filters = filters
        self.kernel_size = kernel_size
        self.stride = stride
        
    @staticmethod
    def Random(r):
        filters = r.choice([2 ** i for i in range(7, 9)])
        kernel_size = r.randint(4) + 4
        stride = r.randint(4) + 3
        return ConvHyper(filters, kernel_size, stride)
        
    def display(self):
        print("conv 1d")
        print("filters=%d" % self.filters)
        print("kernel size=%d" % self.kernel_size)
        print("stride = %d" % self.stride)
        
    def make_layer(self, name):
        return Conv1D(64, 4, strides=2, activation='relu', name=name)
    
class RnnHyper(object):
    def __init__(self, hidden_dim, is_lstm, is_bidirectional):
        self.hidden_dim = hidden_dim
        self.is_lstm = is_lstm
        self.is_bidirectional = is_bidirectional
        
    @staticmethod
    def Random(r):
        hidden_dim = r.choice([2 ** i for i in range(7, 9)])
        is_lstm = bool(r.randint(2))
        is_bidirectional = bool(r.randint(2))
        return RnnHyper(hidden_dim, is_lstm, is_bidirectional)

    def display(self):
        print("RNN")
        print("hidden dimension=%d" % self.hidden_dim)
        if self.is_bidirectional:
            print("bidirectional")
        if self.is_lstm:
            print("lstm")
        else:
            print("gru")            
        
    def make_layer(self, name, return_sequences):
        if self.is_lstm:
            make_rnn = LSTM
        else:
            make_rnn = GRU
        if self.is_bidirectional:
            return Bidirectional(make_rnn(self.hidden_dim, return_sequences=return_sequences), name=name)
        return make_rnn(self.hidden_dim, return_sequences=return_sequences, name=name)
    


In [46]:
np.mean([len(s) for s in train['data']])

1949.3100583348064

In [47]:
max_len = 256

def dataset(x):
    x = [random_chop(s, r, max_len + 1) for s in x]
    x = bytelevel.encode(x)
    x = pad_sequences(x, max_len + 1)
    return x

x_train = dataset(train['data'])
x_test = dataset(test['data'])
x_test = x_test[:2000]

In [48]:
def hyper_search(seed):
    r = np.random.RandomState(seed)

    for _ in range(20):
        embedd_h = EmbeddingHyper.Random(r)
        embedd_h.display()
        print()

        conv_h = ConvHyper.Random(r)
        conv_h.display()
        print()

        rnn_h = RnnHyper.Random(r)
        rnn_h.display()
        print()

        embed = embedd_h.make_layer()
        c1d0 = conv_h.make_layer('conv0')
        rnn = rnn_h.make_layer('rnn', return_sequences=False)
        pred = Dense(256, activation='softmax')

        x = Input(shape=(None,), name='text_input')
        h = embed(x)
        print(h.shape)
        h = c1d0(h)
        print(h.shape)
        h = rnn(h)
        print(h.shape)
        h = pred(h)
        print(h.shape)

        model = Model(x, h)
        model.compile(optimizer='adam',
            loss='sparse_categorical_crossentropy',
            metrics=['sparse_categorical_accuracy'])

        model.fit(x=x_train[:, :max_len], 
                     y=x_train[:, max_len],
                    epochs=3, batch_size=10,
                    validation_data=(x_test[:, :max_len], x_test[:, max_len]))

        print()
        print()
        print()

In [49]:
hyper_search(44)

embedding
vocab size=256
embedding dimension=128

conv 1d
filters=256
kernel size=5
stride = 4

RNN
hidden dimension=256
lstm

(?, ?, 128)
(?, ?, 64)
(?, 256)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



embedding
vocab size=256
embedding dimension=128

conv 1d
filters=256
kernel size=7
stride = 5

RNN
hidden dimension=128
bidirectional
lstm

(?, ?, 128)
(?, ?, 64)
(?, 256)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



embedding
vocab size=256
embedding dimension=128

conv 1d
filters=256
kernel size=7
stride = 6

RNN
hidden dimension=256
lstm

(?, ?, 128)
(?, ?, 64)
(?, 256)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



embedding
vocab size=256
embedding dimension=256

conv 1d
filters=256
kernel size=4
stride = 4

RNN
hidden dimension=256
lstm

(?, ?, 256)
(?, ?, 64)
(?, 256)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3




embedding
vocab size=256
embedding dimension=128

conv 1d
filters=256
kernel size=5
stride = 6

RNN
hidden dimension=128
bidirectional
gru

(?, ?, 128)
(?, ?, 64)
(?, 256)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



embedding
vocab size=256
embedding dimension=256

conv 1d
filters=256
kernel size=5
stride = 6

RNN
hidden dimension=256
bidirectional
lstm

(?, ?, 256)
(?, ?, 64)
(?, 512)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



embedding
vocab size=256
embedding dimension=128

conv 1d
filters=128
kernel size=5
stride = 4

RNN
hidden dimension=256
gru

(?, ?, 128)
(?, ?, 64)
(?, 256)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



embedding
vocab size=256
embedding dimension=128

conv 1d
filters=256
kernel size=7
stride = 5

RNN
hidden dimension=256
bidirectional
lstm

(?, ?, 128)
(?, ?, 64)
(?, 512)
(?, 256)
Train on 11314 samples, validate on 200

In [92]:
maxlen = 72
x = Input(shape=(None,), name='text_input')
embed = Embedding(256, 
            64, name='embedder')
c1d0 = Conv1D(256, 4, strides=2, activation='relu')
c1d1 = Conv1D(256, 4, strides=2, activation='relu')
rnn = Bidirectional(GRU(512, return_sequences=False))
pred = Dense(256, activation='softmax')

h = embed(x)
print(h.shape)
h = c1d0(h)
print(h.shape)
h = c1d1(h)
print(h.shape)
h = rnn(h)
print(h.shape)
h = pred(h)
print(h.shape)

model = Model(x, h)
model.compile(optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['sparse_categorical_accuracy'])

model.fit(x=x_train[:, :72], 
                 y=x_train[:, 72],
                epochs=10, batch_size=2,
                validation_data=(x_test[:, :72], x_test[:, 72]))

(?, ?, 64)
(?, ?, 256)
(?, ?, 256)
(?, 1024)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f5c1116ca58>