In [1]:
cd /content/drive/app/task-gen/

[Errno 2] No such file or directory: '/content/drive/app/task-gen/'
/content


# Char-RNN

In [0]:
%matplotlib inline

import os
import sys
import random
import numpy as np

from itertools import chain
from IPython.display import SVG
from hyperdash import Experiment
from contextlib import redirect_stdout, redirect_stderr

In [3]:
from keras.utils.vis_utils import model_to_dot
from keras.models import Sequential, load_model
from keras.callbacks import Callback, ModelCheckpoint
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import text_to_word_sequence, one_hot
from keras.utils import to_categorical, print_summary, plot_model, Sequence
from keras.layers import LSTM, CuDNNLSTM, Dense, TimeDistributed, Activation, GRU

Using TensorFlow backend.


## Args

In [0]:
GPU_ACTIVE = True

## Load

In [5]:
%store -r descs
# descs = edescs
len(descs)

171127

In [6]:
random.sample(descs, 10)

['Tokenizes value into a sequence of Tokens.',
 'Add platform specific attributes.',
 'Returns a list of the default providers from the settings as the DCNL appropriate constants.',
 'Get the main plot properties and create the plot.',
 'Predefined callback. DCNL The widget grabs the focus. DCNL Equivalent to widget.grab_focus()',
 "Returns the XML blob as an atom.ExtensionElement. DCNL Returns: DCNL An atom.ExtensionElement representing the blob\\'s XML, or None if no DCNL blob was set.",
 'Set up the test schema for TestInfluxDBClient object.',
 'Sends this transaction using the provided AS API interface. DCNL Args: DCNL as_api(ApplicationServiceApi): The API to use to send. DCNL Returns: DCNL A Deferred which resolves to True if the transaction was sent.',
 'Converts scope value to a string.',
 'Calculate whether the thumbnail already exists and that the source is DCNL not newer than the thumbnail. DCNL If the source and thumbnail file storages are local, their file DCNL modificatio

In [7]:
MAX_SEQ_LEN = max(len(desc) for desc in descs)
MAX_SEQ_LEN

500

## Preprocessing

In [0]:
def chars_split(descs):
    chars = list(set(chain.from_iterable(desc for desc in descs)))
    char_ix = {char:ix for ix, char in enumerate(chars)}
    ix_char = {ix:char for ix, char in enumerate(chars)}
    descs = [[char_ix[char] for char in desc] for desc in descs]
    return descs, char_ix, ix_char

In [9]:
%time descs, dir_map, rev_map = chars_split(descs)
list(dir_map.items())[:5], list(rev_map.items())[:5]

CPU times: user 1.58 s, sys: 71 ms, total: 1.65 s
Wall time: 1.65 s


([('o', 0), ('W', 1), (' ', 2), ('#', 3), ('"', 4)],
 [(0, 'o'), (1, 'W'), (2, ' '), (3, '#'), (4, '"')])

In [10]:
VOCAB_SIZE = len(dir_map)
VOCAB_SIZE

95

## TT prepare

In [0]:
BATCH_SIZE = 64

In [0]:
class TTSequence(Sequence):
    def __init__(self):
        self.on_epoch_end()
    
    def __len__(self):
        return int(np.floor(len(descs) / BATCH_SIZE))
    
    def __getitem__(self, idx):
        batch_ids = self._ids[idx * BATCH_SIZE: (idx + 1) * BATCH_SIZE]
        seq_len = max(len(descs[di]) for di in batch_ids)
        X = np.zeros((BATCH_SIZE, seq_len, VOCAB_SIZE))
        y = np.zeros_like(X)
        
        for bi, di in enumerate(batch_ids):
            for pi, wi in enumerate(descs[di]):
                X[bi, seq_len - len(descs[di]) + pi, wi] = 1
            
            for pi, wi in enumerate(descs[di][1:]):
                y[bi, seq_len - len(descs[di]) + pi, wi] = 1
        
        return X, y
    
    def on_epoch_end(self):
        """Generate new shuffle in between epochs."""
        self._ids = np.random.permutation(len(descs))

In [13]:
tts = TTSequence()
len(tts), tts[0][0].shape, tts[0][1].shape

(2673, (64, 461, 95), (64, 461, 95))

## Model

In [0]:
HIDDEN_DIM = 100
N_LAYERS = 1
LSTM_CLASS = LSTM if not GPU_ACTIVE else CuDNNLSTM
MODEL_PATH = 'model.hdf5'
RENEW = True

In [15]:
if not RENEW and os.path.exists(MODEL_PATH):
    print("Loading model...")
    model = load_model(MODEL_PATH)
else:
    print("Creating new model...")
    model = Sequential()
    model.add(LSTM_CLASS(HIDDEN_DIM, input_shape=(None, VOCAB_SIZE), 
                         return_sequences=True))
    for i in range(N_LAYERS - 1):
        model.add(LSTM_CLASS(HIDDEN_DIM, return_sequences=True))
    model.add(TimeDistributed(Dense(VOCAB_SIZE, activation='softmax')))
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

Creating new model...


In [16]:
print_summary(model)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
cu_dnnlstm_1 (CuDNNLSTM)     (None, None, 100)         78800     
_________________________________________________________________
time_distributed_1 (TimeDist (None, None, 95)          9595      
Total params: 88,395
Trainable params: 88,395
Non-trainable params: 0
_________________________________________________________________


In [0]:
# SVG(model_to_dot(model).create(prog='dot', format='svg'))

## Learn

In [0]:
class HDLoss(Callback):
    def on_train_begin(self, logs={}):
        self.exp = Experiment('[2.1.1] ChaRNN convergence', capture_io=False,
                             api_key_getter=lambda: 'mB3wTK1XXivrCFp4HpnX/KDUFT/0az3+W8BhLSF+Vdg=')
        
        # SUPER-hacky, but it's work (needed to supress hd output)
        self.exp._hd.out_buf.write = lambda _: _
    
    def on_train_end(self, logs={}):
        self.exp.end()

    def on_batch_end(self, n_batch, logs={}):
        self.exp.metric('n_batch', n_batch)
        self.exp.metric('loss', logs.get('loss'))

In [0]:
callbacks = [
    HDLoss(), 
    ModelCheckpoint(MODEL_PATH, 
                    monitor='loss', save_best_only=True)
]
model.fit_generator(TTSequence(), verbose=1, epochs=100,
                    callbacks=callbacks,
                    use_multiprocessing=False);

Epoch 1/100

Epoch 2/100
  34/2673 [..............................] - ETA: 4:28 - loss: 0.4411



Epoch 3/100
 187/2673 [=>............................] - ETA: 4:04 - loss: 0.3755



Epoch 4/100
 228/2673 [=>............................] - ETA: 4:01 - loss: 0.3432



Epoch 5/100
 176/2673 [>.............................] - ETA: 4:11 - loss: 0.3270



Epoch 6/100
 199/2673 [=>............................] - ETA: 4:05 - loss: 0.3241



Epoch 7/100
 204/2673 [=>............................] - ETA: 4:05 - loss: 0.3131



Epoch 8/100
 178/2673 [>.............................] - ETA: 4:12 - loss: 0.3058



Epoch 9/100
 187/2673 [=>............................] - ETA: 4:13 - loss: 0.3022



Epoch 10/100
 187/2673 [=>............................] - ETA: 4:10 - loss: 0.3030



Epoch 11/100
 197/2673 [=>............................] - ETA: 4:02 - loss: 0.2980



Epoch 12/100
 206/2673 [=>............................] - ETA: 4:14 - loss: 0.2913



Epoch 13/100
 175/2673 [>.............................] - ETA: 4:08 - loss: 0.2932



Epoch 14/100
 174/2673 [>.............................] - ETA: 4:02 - loss: 0.3119



Epoch 15/100
 193/2673 [=>............................] - ETA: 4:02 - loss: 0.2926



Epoch 16/100
 184/2673 [=>............................] - ETA: 4:08 - loss: 0.2929



Epoch 17/100
 172/2673 [>.............................] - ETA: 4:15 - loss: 0.2852



Epoch 18/100
 181/2673 [=>............................] - ETA: 4:09 - loss: 0.2881



Epoch 19/100
 257/2673 [=>............................] - ETA: 4:02 - loss: 0.2838



Epoch 20/100
 208/2673 [=>............................] - ETA: 4:04 - loss: 0.2851



Epoch 21/100
 188/2673 [=>............................] - ETA: 4:10 - loss: 0.2826



Epoch 22/100
 224/2673 [=>............................] - ETA: 4:06 - loss: 0.2868



Epoch 23/100
 401/2673 [===>..........................] - ETA: 3:46 - loss: 0.2840



Epoch 24/100
 313/2673 [==>...........................] - ETA: 3:59 - loss: 0.2824



Epoch 25/100
 248/2673 [=>............................] - ETA: 4:07 - loss: 0.2801



## Test

In [0]:
def generate_text(length, start):
    """Generate text with specific length."""
    assert length >= 1
    
    start = ', '.join(sorted(start)) + ' | '
    
    ys = list(start)
    X = np.zeros((1, len(start) + length, VOCAB_SIZE))
    for i in range(len(start)):
        X[0, i, dir_map[ys[i]]] = 1
    
    for i in range(len(start), len(start) + length):
        ix = np.argmax(model.predict(X[:, :i, :])[0], 1)[-1]
        X[0, i, ix] = 1
        ys.append(rev_map[ix])
    
    return ''.join(ys)[len(start):]

In [0]:
generate_text(100, ['user', 'server'])