In [1]:
!pip install keras-TCN

Collecting keras-TCN
  Downloading https://files.pythonhosted.org/packages/f2/bc/dcbdc24d80229022333150f42ff88ddf4c6793568f711a0d6fc1e83b102e/keras_tcn-2.3.5-py2.py3-none-any.whl
Installing collected packages: keras-TCN
Successfully installed keras-TCN-2.3.5


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
!mkdir -p /content/drive/My\ Drive/nn_output
OUTPUTDIR='/content/drive/My Drive/nn_output'

In [4]:
from keras.layers import (Bidirectional, Dense, Embedding, Input, Lambda, InputLayer, Reshape
                          , LSTM, RepeatVector, TimeDistributed)
from keras.models import Model, Sequential
from tcn import TCN

Using TensorFlow backend.


In [5]:
MAX_SEQUENCE_LEN = 500
MAX_NUM_WORDS = 5000

input_layer = Input(shape=(MAX_SEQUENCE_LEN,))
#encoder = Reshape((MAX_SEQUENCE_LEN, 1))(input_layer)
encoder = Embedding(MAX_NUM_WORDS, 100)(input_layer)
encoder = TCN(name='latent', return_sequences=False, nb_stacks=1, dilations=[2**n for n in range(8)])(encoder)
decoder = RepeatVector(MAX_SEQUENCE_LEN, name='decoder')(encoder)
decoder = TCN(return_sequences=True, nb_stacks=1, dilations=[2**n for n in range(8)])(decoder)
output_layer = TimeDistributed(Dense(MAX_NUM_WORDS, activation='softmax'))(decoder)
model = Model(input_layer, output_layer)
model.compile(optimizer='adam', metrics=['accuracy'], loss='categorical_crossentropy')
print(model.summary())

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 500)          0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 500, 100)     500000      input_1[0][0]                    
__________________________________________________________________________________________________
latent_initial_conv (Conv1D)    (None, 500, 64)      6464        embedding_1[0][0]                
__________________________________________________________________________________________________
latent_d_causal_conv_1_tanh_s0  (None, 500, 64)      8256        latent_initial_conv[0][0]        
__________________________________________________________________________________________________
activation

In [6]:
from keras.utils import to_categorical
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import numpy as np
from nltk.corpus import reuters
from itertools import chain
import nltk
nltk.download('reuters')
nltk.download('punkt')

[nltk_data] Downloading package reuters to /root/nltk_data...
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [0]:
def iter_labels(selection='train'):
    for fid in reuters.fileids():
        if fid.startswith(selection):
            for sent in reuters.sents(fid):
                yield reuters.categories(fid)
labels_train = np.array(list(iter_labels('train')))
labels_test = np.array(list(iter_labels('test')))

In [0]:
from sklearn.preprocessing import MultiLabelBinarizer
mlb = MultiLabelBinarizer().fit(labels_train)
y_train = mlb.transform(labels_train)
y_test = mlb.transform(labels_test)

In [0]:
def iter_sents(selection='train'):
    for fid in reuters.fileids():
        if fid.startswith(selection):
            for sent in reuters.sents(fid):
                yield " ".join(sent)
data_train = np.array(list(iter_sents('train')))
data_test = np.array(list(iter_sents('test')))

In [0]:
tokenizer = Tokenizer(num_words=MAX_NUM_WORDS)
tokenizer.fit_on_texts(data_train)

X_train = tokenizer.texts_to_sequences(data_train)
X_test = tokenizer.texts_to_sequences(data_test)

X_train = pad_sequences(X_train, MAX_SEQUENCE_LEN)
X_test = pad_sequences(X_test, MAX_SEQUENCE_LEN)

In [0]:
def data_generator(X_in, batch_size=32, shuffle=True, repeat=True):
    index = np.arange(X_in.shape[0])
    while True:
        np.random.shuffle(index)
        X = X_in[index]
        n = X.shape[0]//batch_size
        for chunk in np.split(X[:n*batch_size], n):
            yield chunk, to_categorical(chunk, MAX_NUM_WORDS)
        rest = X[n*batch_size:]
        if rest.shape[0]:
            yield rest, to_categorical(rest, MAX_NUM_WORDS)
        if not repeat:
            break

In [0]:
from keras.callbacks import ModelCheckpoint
import os.path

outfname = os.path.join(
    OUTPUTDIR,
    'seq2seq-TCN-weights-ep{epoch:02d}-vl{val_loss:.2f}.hdf5')
cp = ModelCheckpoint(
    outfname,
    monitor='val_loss',
    save_best_only=True,
    save_weights_only=True)

In [0]:
bs = 64
epochs = 100
history = model.fit_generator(data_generator(X_train),
    validation_data=data_generator(X_test),
    steps_per_epoch=X_train.shape[0]//bs,
    validation_steps=X_test.shape[0]//bs,
    epochs=epochs, shuffle=True, callbacks=[cp])

Epoch 1/100
 65/629 [==>...........................] - ETA: 5:07 - loss: 1.9340 - acc: 0.9223

In [0]:
tokenizer.sequences_to_texts(np.argmax(model.predict(X_train[:20], verbose=1), axis=2))

In [0]:
tokenizer.sequences_to_texts(X_train[:20])

In [0]:
model_enc = Model(input_layer, encoder)
vecs = model_enc.predict(X_hat[:1000], verbose=True)

In [0]:
from sklearn.manifold import TSNE
vecs_reduced = TSNE().fit_transform(vecs)

In [0]:
%matplotlib inline

import matplotlib
import numpy as np
import matplotlib.pyplot as plt

categories = [(cat, len(reuters.fileids(categories=cat))) for cat in reuters.categories()]
topn = [cat for cat, _ in sorted(categories, key=lambda x: -x[1])[:10]]

indexes = []
for cat in topn:
    index = []
    for pos, cats in enumerate(labels_train[:1000]):
        if cat in cats:
            index.append(pos)
    indexes.append((cat, index))

for cat, index in indexes:
    plt.scatter(vecs_reduced[index,0], vecs_reduced[index,1], label=cat)
plt.legend(bbox_to_anchor=(1, 1.01))