In [1]:
import sys
sys.path.append("..")

In [2]:
import bytelevel

In [3]:
from keras.preprocessing.sequence import pad_sequences

Using TensorFlow backend.


In [82]:
from sklearn.datasets import fetch_20newsgroups
from pprint import pprint
import numpy as np
import pickle

In [5]:
from keras.layers import Embedding, Conv1D, Input, GRU, LSTM, Bidirectional, Dense
from keras.models import Model

In [6]:
r = np.random.RandomState(42)

In [7]:
r = np.random.RandomState(42)

def random_chop(s, r, m):
    n = len(s)
    if n <= m:
        return s
    k = r.randint(n - m)
    s = s[k:]
    return s[:m]

In [8]:
train = fetch_20newsgroups(subset='train')
test = fetch_20newsgroups(subset='test')

In [9]:
max_len = 50

def dataset(x):
    x = [random_chop(s, r, max_len + 1) for s in x]
    x = bytelevel.encode(x)
    x = pad_sequences(x, max_len + 1)
    return x

x_train = dataset(train['data'])
x_test = dataset(test['data'])
x_test = x_test[:2000]

In [72]:
class EmbeddingHyper(object):
    def __init__(self, vocab_size, embedding_dim):
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        
    @staticmethod
    def Random(r):
        embedding_dim = r.choice([2 ** i for i in range(6, 10)])
        return EmbeddingHyper(256, embedding_dim)
        
    def display(self):
        print("embedding")
        print("vocab size=%d" % self.vocab_size)
        print("embedding dimension=%d" % self.embedding_dim)
        
    def make_layer(self, name='embedder'):
        return Embedding(self.vocab_size, 
            self.embedding_dim , name=name)
    
class ConvHyper(object):
    def __init__(self, filters, kernel_size=3, stride=2):
        self.filters = filters
        self.kernel_size = kernel_size
        self.stride = stride
        
    @staticmethod
    def Random(r):
        filters = r.choice([2 ** i for i in range(6, 10)])
        kernel_size = r.randint(8) + 2
        stride = r.randint(4) + 1
        return ConvHyper(filters, kernel_size, stride)
        
    def display(self):
        print("conv 1d")
        print("filters=%d" % self.filters)
        print("kernel size=%d" % self.kernel_size)
        print("stride = %d" % self.stride)
        
    def make_layer(self, name):
        return Conv1D(64, 4, strides=2, activation='relu', name=name)
    
class RnnHyper(object):
    def __init__(self, hidden_dim, is_lstm, is_bidirectional):
        self.hidden_dim = hidden_dim
        self.is_lstm = is_lstm
        self.is_bidirectional = is_bidirectional
        
    @staticmethod
    def Random(r):
        hidden_dim = r.choice([2 ** i for i in range(6, 10)])
        is_lstm = bool(r.randint(2))
        is_bidirectional = bool(r.randint(2))
        return RnnHyper(hidden_dim, is_lstm, is_bidirectional)

    def display(self):
        print("RNN")
        print("hidden dimension=%d" % self.hidden_dim)
        if self.is_bidirectional:
            print("bidirectional")
        if self.is_lstm:
            print("lstm")
        else:
            print("gru")            
        
    def make_layer(self, name, return_sequences):
        if self.is_lstm:
            make_rnn = LSTM
        else:
            make_rnn = GRU
        if self.is_bidirectional:
            return Bidirectional(make_rnn(self.hidden_dim, return_sequences=return_sequences), name=name)
        return make_rnn(self.hidden_dim, return_sequences=return_sequences, name=name)
    
class RnnCnnHyper(object):
    def __init__(self, embedder, conv, rnn):
        self.embedder = embedder
        self.conv = conv
        self.rnn = rnn
        
    @staticmethod
    def Random(r):
        embedder = EmbeddingHyper.Random(r)
        conv = ConvHyper.Random(r)
        rnn = RnnHyper.Random(r)
        
        return RnnCnnHyper(embedder, conv, rnn)

    def display(self):
        self.embedder.display()
        print()
        self.conv.display()
        print()
        self.rnn.display()
        print()
        
    def make_layers(self, name, return_sequences):
        if name is not None and len(name):
            prefix = '%s_' % name
        else:
            prefix = ''
        embedder = self.embedder.make_layer(name='%sembedder' % prefix)
        conv = self.conv.make_layer(name='%sconv' % prefix)
        rnn = self.rnn.make_layer(name='%srnn' % prefix, return_sequences=return_sequences)
        dense = Dense(self.embedder.vocab_size, activation='softmax', name='%sprobs' % prefix)
        return embedder, conv, rnn, dense
    

In [89]:
def explore(seed, count, max_len):
    r = np.random.RandomState(seed)
    histories = []
    
    def dataset(x):
        x = [random_chop(s, r, max_len + 1) for s in x]
        x = bytelevel.encode(x)
        x = pad_sequences(x, max_len + 1)
        return x

    x_train = dataset(train['data'])
    x_test = dataset(test['data'])
    x_test = x_test[:2000]

    for i in range(count):
        seed = r.randint(200000)
        name = "RnnCnn%d" % seed
        r_i = np.random.RandomState(seed)
        hyper = RnnCnnHyper.Random(r_i)
        hyper.display()

        embed, conv, rnn, pred = hyper.make_layers('', False)

        x = Input(shape=(max_len,), name='text_input')
        h = embed(x)
        print(h.shape)
        h = conv(h)
        print(h.shape)
        h = rnn(h)
        print(h.shape)
        h = pred(h)
        print(h.shape)

        model = Model(x, h)
        model.compile(optimizer='adam',
            loss='sparse_categorical_crossentropy',
            metrics=['sparse_categorical_accuracy'])

        history = model.fit(x=x_train[:, :max_len], 
                     y=x_train[:, max_len],
                    epochs=3, batch_size=10,
                    validation_data=(x_test[:, :max_len], x_test[:, max_len]))

        histories.append([seed, history])
        
        with open("../models/%s.pkl" % name, 'wb') as f:
            pickle.dump([seed, hyper, history.history], f)
            
        model.save("../models/%s.h5" % name)
        
        print()
        print()
        print()
    
    return histories

In [102]:
histories = explore(42, 30, 50)

embedding
vocab size=256
embedding dimension=256

conv 1d
filters=256
kernel size=3
stride = 4

RNN
hidden dimension=256
lstm

(?, 50, 256)
(?, 24, 64)
(?, 256)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



embedding
vocab size=256
embedding dimension=512

conv 1d
filters=512
kernel size=4
stride = 4

RNN
hidden dimension=512
lstm

(?, 50, 512)
(?, 24, 64)
(?, 512)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



embedding
vocab size=256
embedding dimension=64

conv 1d
filters=256
kernel size=3
stride = 3

RNN
hidden dimension=64
bidirectional
gru

(?, 50, 64)
(?, 24, 64)
(?, 128)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



embedding
vocab size=256
embedding dimension=128

conv 1d
filters=256
kernel size=6
stride = 2

RNN
hidden dimension=64
bidirectional
lstm

(?, 50, 128)
(?, 24, 64)
(?, 128)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Ep




embedding
vocab size=256
embedding dimension=256

conv 1d
filters=128
kernel size=9
stride = 4

RNN
hidden dimension=64
bidirectional
gru

(?, 50, 256)
(?, 24, 64)
(?, 128)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



embedding
vocab size=256
embedding dimension=64

conv 1d
filters=512
kernel size=4
stride = 2

RNN
hidden dimension=128
lstm

(?, 50, 64)
(?, 24, 64)
(?, 128)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



embedding
vocab size=256
embedding dimension=128

conv 1d
filters=512
kernel size=8
stride = 3

RNN
hidden dimension=256
bidirectional
gru

(?, 50, 128)
(?, 24, 64)
(?, 512)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



embedding
vocab size=256
embedding dimension=128

conv 1d
filters=256
kernel size=4
stride = 2

RNN
hidden dimension=64
lstm

(?, 50, 128)
(?, 24, 64)
(?, 64)
(?, 256)
Train on 11314 samples, validate on 2000 samples
E

Epoch 3/3



embedding
vocab size=256
embedding dimension=256

conv 1d
filters=512
kernel size=6
stride = 2

RNN
hidden dimension=512
bidirectional
lstm

(?, 50, 256)
(?, 24, 64)
(?, 1024)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



embedding
vocab size=256
embedding dimension=512

conv 1d
filters=512
kernel size=5
stride = 4

RNN
hidden dimension=256
bidirectional
gru

(?, 50, 512)
(?, 24, 64)
(?, 512)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



embedding
vocab size=256
embedding dimension=64

conv 1d
filters=256
kernel size=2
stride = 2

RNN
hidden dimension=256
lstm

(?, 50, 64)
(?, 24, 64)
(?, 256)
(?, 256)
Train on 11314 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3



embedding
vocab size=256
embedding dimension=256

conv 1d
filters=256
kernel size=2
stride = 4

RNN
hidden dimension=128
lstm

(?, 50, 256)
(?, 24, 64)
(?, 128)
(?, 256)
Train on 11314 samples, validate on

In [103]:
histories[0][1].history

{'val_loss': [2.839404977560043, 2.683288841843605, 2.606572616696358],
 'val_sparse_categorical_accuracy': [0.26500000633299353,
  0.29600000645965335,
  0.3225000076368451],
 'loss': [3.198784228641489, 2.7334388353514, 2.544429989676183],
 'sparse_categorical_accuracy': [0.21327559238628002,
  0.28036062140989076,
  0.31686406924764826]}

In [106]:
histories.sort(key=lambda x: x[1].history['val_sparse_categorical_accuracy'][-1], reverse=True)

In [110]:
val_acc = lambda x: x[1].history['val_sparse_categorical_accuracy'][-1]

In [111]:
[val_acc(x) for x in histories]

[0.34550000783056023,
 0.3450000071153045,
 0.342500007674098,
 0.3415000081434846,
 0.33150000784546135,
 0.3295000068470836,
 0.3255000079423189,
 0.3250000071525574,
 0.3240000067651272,
 0.32350000690668823,
 0.32250000767409803,
 0.3225000076368451,
 0.3225000067800283,
 0.32050000689923763,
 0.3205000068619847,
 0.3195000072568655,
 0.31100000750273465,
 0.31050000626593827,
 0.31000000700354574,
 0.30950000673532485,
 0.30900000665336846,
 0.30800000615417955,
 0.305000007301569,
 0.2980000066384673,
 0.2960000068694353,
 0.2940000068768859,
 0.2915000059828162,
 0.2825000066310167,
 0.28250000648200513,
 0.2810000065341592]

In [109]:
def ReHyper(seed):
    r = np.random.RandomState(seed)
    hyper = RnnCnnHyper.Random(r)
    return hyper, None

In [114]:
hyper, _ =ReHyper(histories[0][0])

In [117]:
for seed, _ in histories[:6]:
    hyper, _ = ReHyper(seed)
    print(seed)
    hyper.display()

176609
embedding
vocab size=256
embedding dimension=256

conv 1d
filters=64
kernel size=8
stride = 3

RNN
hidden dimension=256
gru

70218
embedding
vocab size=256
embedding dimension=256

conv 1d
filters=256
kernel size=7
stride = 4

RNN
hidden dimension=256
gru

159467
embedding
vocab size=256
embedding dimension=512

conv 1d
filters=512
kernel size=4
stride = 4

RNN
hidden dimension=512
lstm

97357
embedding
vocab size=256
embedding dimension=512

conv 1d
filters=512
kernel size=9
stride = 2

RNN
hidden dimension=128
bidirectional
gru

62245
embedding
vocab size=256
embedding dimension=512

conv 1d
filters=128
kernel size=2
stride = 4

RNN
hidden dimension=128
gru

150338
embedding
vocab size=256
embedding dimension=128

conv 1d
filters=128
kernel size=5
stride = 1

RNN
hidden dimension=128
bidirectional
gru

