In [1]:
import numpy as np
from sklearn.cross_validation import train_test_split

import keras
from keras.models import *
from keras.layers import *

import matplotlib.pyplot as plt
%matplotlib inline

Using TensorFlow backend.


In [2]:
RND = 777
RUN = 'I.2'

TEST_SIZE = .1

N_BATCH_SAMPLES = 64
N_EPOCHS = 1111

MODELS_DIR = '/d3/caches/kaggle-integer-seq-v2/models/' + RUN
TENSORBOARD_DIR = '/tmp-persistent/integer-seq-v2/' + RUN

TRAIN_WITH_GENERATOR = False

In [3]:
if not os.path.isdir(MODELS_DIR): os.makedirs(MODELS_DIR)
if not os.path.isdir(TENSORBOARD_DIR): os.makedirs(TENSORBOARD_DIR)

In [4]:
np.random.seed(RND)

In [5]:
# load stuff
dictionary = np.load('out/dictionary@tr=10.npy').item()
train_e_seq = np.load('out/train_e_seq@tr=10.npy').item()
train_e_last = np.load('out/train_e_last@tr=10.npy').item()

In [6]:
DICT_SIZE = len(dictionary) + 1 # add 1 for zero-padding
SEQ_LEN = len(train_e_seq.values()[0])

In [7]:
X = np.array(train_e_seq.values(), dtype=np.int32)
y_u = train_e_last.values() # unencoded

X_train, X_val, y_u_train, y_u_val = \
    train_test_split(X, y_u, test_size=TEST_SIZE, random_state=RND)

In [8]:
# calculate samples per epoch
n_sub_epochs = 1
N_EPOCH_SAMPLES = (len(X_train) / n_sub_epochs / N_BATCH_SAMPLES) * N_BATCH_SAMPLES
print 'Samples per epoch:', N_EPOCH_SAMPLES, 'of', len(X_train)

Samples per epoch: 91648 of 91665


In [9]:
print 'Validation size:', len(y_u_val)
print 'Mem for val data: %.2fG'%(DICT_SIZE * 4. * len(y_u_val) / 1024 / 1024 / 1024)

Validation size: 10186
Mem for val data: 0.52G


In [10]:
# generate data batch
def gen_batch(n_samples, batch_ix, X, y_u, dict_size=DICT_SIZE):
    
    batches_in_X = len(X) / n_samples
    batch_ix %= batches_in_X

    X_batch = X[batch_ix * n_samples:batch_ix * n_samples + n_samples]

    y_batch = np.zeros([n_samples, dict_size], dtype=np.float32)
    
    for i in range(len(y_batch)):
        y_batch[i][y_u[i]] = 1.
    
    return X_batch, y_batch

In [11]:
# gen validation data
X_val, y_val = gen_batch(
    n_samples=len(y_u_val), 
    batch_ix=0,
    X=X_val,
    y_u=y_u_val
)

In [12]:
embedding_size = 64
lstm_output_dim = 128

model = Sequential()

model.add(Embedding(DICT_SIZE, embedding_size, mask_zero=True, dropout=0.2))

model.add(LSTM(input_dim=SEQ_LEN, output_dim=lstm_output_dim, return_sequences=False, dropout_U=0.2, dropout_W=0.2)) 

model.add(Dropout(0.5))

model.add(Dense(output_dim=DICT_SIZE, activation='softmax')) 

model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

In [13]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
embedding_1 (Embedding)          (None, None, 64)      884672      embedding_input_1[0][0]          
____________________________________________________________________________________________________
lstm_1 (LSTM)                    (None, 128)           98816       embedding_1[0][0]                
____________________________________________________________________________________________________
dropout_1 (Dropout)              (None, 128)           0           lstm_1[0][0]                     
____________________________________________________________________________________________________
dense_1 (Dense)                  (None, 13823)         1783167     dropout_1[0][0]                  
Total params: 2766655
_____________________________________________________________________

In [None]:
if TRAIN_WITH_GENERATOR:

    batch_ix = -1

    def gen_sample():

        global batch_ix

        while True:
            batch_ix += 1
            yield gen_batch(
                n_samples=N_BATCH_SAMPLES,
                batch_ix=batch_ix,
                X=X_train,
                y_u=y_u_train
            )

    history = model.fit_generator(
            gen_sample(),
            samples_per_epoch=N_EPOCH_SAMPLES,
            nb_epoch=N_EPOCHS,
            validation_data=(X_val, y_val),
            verbose=True,
            max_q_size=20,
            nb_worker=1,
            pickle_safe=False,
            callbacks = [
                keras.callbacks.TensorBoard(log_dir=TENSORBOARD_DIR, histogram_freq=0),
                keras.callbacks.ModelCheckpoint(
                    MODELS_DIR + \
                    '/e{epoch:02d}-l={loss:.5f}-vl={val_loss:.5f}-a={acc:.5f}-va={val_acc:.5f}.h5', 
                    monitor='val_acc', verbose=0, save_best_only=False, 
                    save_weights_only=False, mode='auto'
                ),
            ]
         )
    
else:

    # gen train data
    _X_train, _y_train = gen_batch(
        n_samples=len(X_train), 
        batch_ix=0,
        X=X_train,
        y_u=y_u_train
    )

    print 'Training size:', len(y_u_train)
    print 'Mem for tr data: %.2fG'%(DICT_SIZE * 4. * len(y_u_train) / 1024 / 1024 / 1024)

    model.fit(
        _X_train,
        _y_train,
        batch_size=N_BATCH_SAMPLES,
        nb_epoch=N_EPOCHS,
        validation_data=(X_val, y_val),
        shuffle=True,
        callbacks = [
            keras.callbacks.TensorBoard(log_dir=TENSORBOARD_DIR, histogram_freq=0),
            keras.callbacks.ModelCheckpoint(
                MODELS_DIR + \
                '/e{epoch:02d}-l={loss:.5f}-vl={val_loss:.5f}-a={acc:.5f}-va={val_acc:.5f}.h5', 
                monitor='val_acc', verbose=0, save_best_only=False, 
                save_weights_only=False, mode='auto'
            )
        ]
    )

Training size: 91665
Mem for tr data: 4.72G


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 91665 samples, validate on 10186 samples
Epoch 1/1111