In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "2,3"
import tensorflow as tf
print(tf.__version__)
import keras
print(keras.__version__)
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
config.log_device_placement = True  # to log device placement (on which device the operation ran)
                                    # (nothing gets printed in Jupyter, only if you run it standalone)
sess = tf.Session(config=config)
set_session(sess)  # set this TensorFlow session as the default session for Keras

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


1.13.2
2.3.1


Using TensorFlow backend.


In [2]:
import numpy as np
from tqdm import tqdm
from keras import layers
from keras import models
from keras import optimizers
from keras.utils import multi_gpu_model
from keras_bert import load_vocabulary, load_trained_model_from_checkpoint, Tokenizer

In [3]:
SEQ_LEN = 128
BATCH_SIZE = 64
EPOCHS = 5
LR = 1e-4

In [4]:
pretrained_path = '../bert/uncased_L-12_H-768_A-12'
config_path = os.path.join(pretrained_path, 'bert_config.json')
checkpoint_path = os.path.join(pretrained_path, 'bert_model.ckpt')
vocab_path = os.path.join(pretrained_path, 'vocab.txt')

In [5]:
token_dict = load_vocabulary(vocab_path)
tokenizer = Tokenizer(token_dict)
model = load_trained_model_from_checkpoint(
    config_path,
    checkpoint_path,
    training=True,
    trainable=True,
    seq_len=SEQ_LEN,
)
model.summary()

Instructions for updating:
Colocations handled automatically by placer.
Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Input-Token (InputLayer)        (None, 128)          0                                            
__________________________________________________________________________________________________
Input-Segment (InputLayer)      (None, 128)          0                                            
__________________________________________________________________________________________________
Embedding-Token (TokenEmbedding [(None, 128, 768), ( 23440896    Input-Token[0][0]                
__________________________________________________________________________________________________
Embedding-Segment (Embedding)   (None, 128, 768)     1536        Input-Segment[0][0]              
____________________

In [6]:
def load_data(path):
    indices, sentiments = [], []
    for folder, sentiment in (('neg', 0), ('pos', 1)):
        folder = os.path.join(path, folder)
        for name in tqdm(os.listdir(folder)):
            with open(os.path.join(folder, name), 'r') as reader:
                  text = reader.read()
            ids, segments = tokenizer.encode(text, max_len=SEQ_LEN)
            indices.append(ids)
            sentiments.append(sentiment)
    items = list(zip(indices, sentiments))
    rng = np.random.RandomState(0)
    rng.shuffle(items)
    indices, sentiments = zip(*items)
    indices = np.array(indices)
    return [indices, np.zeros_like(indices)], np.array(sentiments)

train_path = os.path.join("../datasets", 'aclImdb', 'train')
test_path = os.path.join("../datasets", 'aclImdb', 'test')
X_train, y_train = load_data(train_path)
X_test, y_test = load_data(test_path)

100%|██████████| 12500/12500 [00:28<00:00, 443.31it/s]
100%|██████████| 12500/12500 [00:29<00:00, 428.15it/s]
100%|██████████| 12500/12500 [00:27<00:00, 447.21it/s]
100%|██████████| 12500/12500 [00:28<00:00, 443.22it/s]


In [7]:
inputs = model.inputs[:2]
dense = model.get_layer('NSP-Dense').output
outputs = layers.Dense(2, activation='softmax')(dense)
model = models.Model(inputs, outputs)
model = multi_gpu_model(model, gpus=2)
model.compile(
    optimizers.Adam(lr=LR),
    loss='sparse_categorical_crossentropy',
    metrics=['acc'],
)
model.summary()

Model: "model_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Input-Token (InputLayer)        (None, 128)          0                                            
__________________________________________________________________________________________________
Input-Segment (InputLayer)      (None, 128)          0                                            
__________________________________________________________________________________________________
lambda_1 (Lambda)               (None, 128)          0           Input-Token[0][0]                
__________________________________________________________________________________________________
lambda_2 (Lambda)               (None, 128)          0           Input-Segment[0][0]              
____________________________________________________________________________________________

In [8]:
model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
)

Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Train on 25000 samples, validate on 25000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.callbacks.History at 0x7efb9a304ef0>

In [9]:
y_pred = model.predict(X_test).argmax(axis=1)
print(np.sum(y_test == y_pred) / y_test.shape[0])

0.86696
