In [1]:
# !pip3 install bert-tensorflow --user
# !wget https://storage.googleapis.com/bert_models/2018_11_23/multi_cased_L-12_H-768_A-12.zip
# !unzip multi_cased_L-12_H-768_A-12.zip

In [2]:
import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization
from bert import modeling
import numpy as np
import tensorflow as tf

In [3]:
from tqdm import tqdm
import json

In [4]:
with open('emotion.json') as fopen:
    x = json.load(fopen)
texts = x['x']
labels = x['y']

In [6]:
MAX_SEQ_LENGTH = 100

In [7]:
BERT_VOCAB = 'multi_cased_L-12_H-768_A-12/vocab.txt'
BERT_INIT_CHKPNT = 'multi_cased_L-12_H-768_A-12/bert_model.ckpt'
BERT_CONFIG = 'multi_cased_L-12_H-768_A-12/bert_config.json'

tokenization.validate_case_matches_checkpoint(False,BERT_INIT_CHKPNT)
tokenizer = tokenization.FullTokenizer(
      vocab_file=BERT_VOCAB, do_lower_case=False)

In [8]:
tokenizer.tokenize(texts[1])

['@',
 'Iz',
 '##zati',
 '##A',
 '##nis',
 '##ss',
 'E',
 '##h',
 '##h',
 'si',
 'bus',
 '##uk',
 'hati',
 'level',
 'maha',
 '##di',
 'ni']

In [9]:
input_ids, input_masks, segment_ids = [], [], []

for text in tqdm(texts):
    tokens_a = tokenizer.tokenize(text)
    if len(tokens_a) > MAX_SEQ_LENGTH - 2:
        tokens_a = tokens_a[:(MAX_SEQ_LENGTH - 2)]
    tokens = ["[CLS]"] + tokens_a + ["[SEP]"]
    segment_id = [0] * len(tokens)
    input_id = tokenizer.convert_tokens_to_ids(tokens)
    input_mask = [1] * len(input_id)
    padding = [0] * (MAX_SEQ_LENGTH - len(input_id))
    input_id += padding
    input_mask += padding
    segment_id += padding
    
    input_ids.append(input_id)
    input_masks.append(input_mask)
    segment_ids.append(segment_id)

100%|██████████| 420516/420516 [02:09<00:00, 3235.30it/s]


In [10]:
bert_config = modeling.BertConfig.from_json_file(BERT_CONFIG)

In [11]:
epoch = 10
batch_size = 60
warmup_proportion = 0.1
num_train_steps = int(len(texts) / batch_size * epoch)
num_warmup_steps = int(num_train_steps * warmup_proportion)

In [12]:
class Model:
    def __init__(
        self,
        dimension_output,
        learning_rate = 2e-5,
    ):
        self.X = tf.placeholder(tf.int32, [None, None])
        self.segment_ids = tf.placeholder(tf.int32, [None, None])
        self.input_masks = tf.placeholder(tf.int32, [None, None])
        self.Y = tf.placeholder(tf.int32, [None])
        
        model = modeling.BertModel(
            config=bert_config,
            is_training=True,
            input_ids=self.X,
            input_mask=self.input_masks,
            token_type_ids=self.segment_ids,
            use_one_hot_embeddings=False)
        
        output_layer = model.get_pooled_output()
        self.logits = tf.layers.dense(output_layer, dimension_output)
        self.logits = tf.identity(self.logits, name = 'logits')
        
        self.cost = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits = self.logits, labels = self.Y
            )
        )
        
        self.optimizer = optimization.create_optimizer(self.cost, learning_rate, 
                                                       num_train_steps, num_warmup_steps, False)
        correct_pred = tf.equal(
            tf.argmax(self.logits, 1, output_type = tf.int32), self.Y
        )
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [13]:
dimension_output = 6
learning_rate = 2e-5

tf.reset_default_graph()
sess = tf.InteractiveSession()
model = Model(
    dimension_output,
    learning_rate
)

sess.run(tf.global_variables_initializer())
var_lists = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope = 'bert')
saver = tf.train.Saver(var_list = var_lists)
saver.restore(sess, BERT_INIT_CHKPNT)

Instructions for updating:
Colocations handled automatically by placer.

For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from multi_cased_L-12_H-768_A-12/bert_model.ckpt


In [14]:
from sklearn.model_selection import train_test_split

train_input_ids, test_input_ids, train_input_masks, test_input_masks, train_segment_ids, test_segment_ids, train_Y, test_Y = train_test_split(
    input_ids, input_masks, segment_ids, labels, test_size = 0.2
)

In [15]:
from tqdm import tqdm
import time

EARLY_STOPPING, CURRENT_CHECKPOINT, CURRENT_ACC, EPOCH = 3, 0, 0, 0

while True:
    lasttime = time.time()
    if CURRENT_CHECKPOINT == EARLY_STOPPING:
        print('break epoch:%d\n' % (EPOCH))
        break

    train_acc, train_loss, test_acc, test_loss = 0, 0, 0, 0
    pbar = tqdm(
        range(0, len(train_input_ids), batch_size), desc = 'train minibatch loop'
    )
    for i in pbar:
        index = min(i + batch_size, len(train_input_ids))
        batch_x = train_input_ids[i: index]
        batch_masks = train_input_masks[i: index]
        batch_segment = train_segment_ids[i: index]
        batch_y = train_Y[i: index]
        acc, cost, _ = sess.run(
            [model.accuracy, model.cost, model.optimizer],
            feed_dict = {
                model.Y: batch_y,
                model.X: batch_x,
                model.segment_ids: batch_segment,
                model.input_masks: batch_masks
            },
        )
        assert not np.isnan(cost)
        train_loss += cost
        train_acc += acc
        pbar.set_postfix(cost = cost, accuracy = acc)
        
    pbar = tqdm(range(0, len(test_input_ids), batch_size), desc = 'test minibatch loop')
    for i in pbar:
        index = min(i + batch_size, len(test_input_ids))
        batch_x = test_input_ids[i: index]
        batch_masks = test_input_masks[i: index]
        batch_segment = test_segment_ids[i: index]
        batch_y = test_Y[i: index]
        acc, cost = sess.run(
            [model.accuracy, model.cost],
            feed_dict = {
                model.Y: batch_y,
                model.X: batch_x,
                model.segment_ids: batch_segment,
                model.input_masks: batch_masks
            },
        )
        test_loss += cost
        test_acc += acc
        pbar.set_postfix(cost = cost, accuracy = acc)

    train_loss /= len(train_input_ids) / batch_size
    train_acc /= len(train_input_ids) / batch_size
    test_loss /= len(test_input_ids) / batch_size
    test_acc /= len(test_input_ids) / batch_size

    if test_acc > CURRENT_ACC:
        print(
            'epoch: %d, pass acc: %f, current acc: %f'
            % (EPOCH, CURRENT_ACC, test_acc)
        )
        CURRENT_ACC = test_acc
        CURRENT_CHECKPOINT = 0
    else:
        CURRENT_CHECKPOINT += 1
        
    print('time taken:', time.time() - lasttime)
    print(
        'epoch: %d, training loss: %f, training acc: %f, valid loss: %f, valid acc: %f\n'
        % (EPOCH, train_loss, train_acc, test_loss, test_acc)
    )
    EPOCH += 1

train minibatch loop: 100%|██████████| 5607/5607 [38:38<00:00,  2.49it/s, accuracy=0.885, cost=0.195] 
test minibatch loop: 100%|██████████| 1402/1402 [03:14<00:00,  7.74it/s, accuracy=0.864, cost=0.334]
train minibatch loop:   0%|          | 0/5607 [00:00<?, ?it/s]

epoch: 0, pass acc: 0.000000, current acc: 0.859481
time taken: 2513.028660774231
epoch: 0, training loss: 0.517047, training acc: 0.777140, valid loss: 0.279985, valid acc: 0.859481



train minibatch loop: 100%|██████████| 5607/5607 [38:41<00:00,  2.49it/s, accuracy=0.865, cost=0.149] 
test minibatch loop: 100%|██████████| 1402/1402 [03:14<00:00,  7.75it/s, accuracy=0.864, cost=0.232] 
train minibatch loop:   0%|          | 0/5607 [00:00<?, ?it/s]

epoch: 1, pass acc: 0.859481, current acc: 0.866175
time taken: 2516.0214388370514
epoch: 1, training loss: 0.259108, training acc: 0.867389, valid loss: 0.250787, valid acc: 0.866175



train minibatch loop: 100%|██████████| 5607/5607 [38:39<00:00,  2.49it/s, accuracy=0.923, cost=0.12]  
test minibatch loop: 100%|██████████| 1402/1402 [03:14<00:00,  7.76it/s, accuracy=0.864, cost=0.241] 
train minibatch loop:   0%|          | 0/5607 [00:00<?, ?it/s]

epoch: 2, pass acc: 0.866175, current acc: 0.868601
time taken: 2513.8838334083557
epoch: 2, training loss: 0.230914, training acc: 0.874625, valid loss: 0.248610, valid acc: 0.868601



train minibatch loop: 100%|██████████| 5607/5607 [38:39<00:00,  2.48it/s, accuracy=0.981, cost=0.101] 
test minibatch loop: 100%|██████████| 1402/1402 [03:14<00:00,  7.73it/s, accuracy=0.909, cost=0.193] 
train minibatch loop:   0%|          | 0/5607 [00:00<?, ?it/s]

time taken: 2514.679234266281
epoch: 3, training loss: 0.215719, training acc: 0.879932, valid loss: 0.255501, valid acc: 0.868598



train minibatch loop: 100%|██████████| 5607/5607 [38:41<00:00,  2.48it/s, accuracy=0.923, cost=0.18]  
test minibatch loop: 100%|██████████| 1402/1402 [03:14<00:00,  7.75it/s, accuracy=0.909, cost=0.224] 
train minibatch loop:   0%|          | 0/5607 [00:00<?, ?it/s]

epoch: 4, pass acc: 0.868601, current acc: 0.868752
time taken: 2516.8288049697876
epoch: 4, training loss: 0.203823, training acc: 0.883489, valid loss: 0.264114, valid acc: 0.868752



train minibatch loop: 100%|██████████| 5607/5607 [38:41<00:00,  2.49it/s, accuracy=0.962, cost=0.0973]
test minibatch loop: 100%|██████████| 1402/1402 [03:14<00:00,  7.72it/s, accuracy=0.909, cost=0.216] 
train minibatch loop:   0%|          | 0/5607 [00:00<?, ?it/s]

time taken: 2516.834005832672
epoch: 5, training loss: 0.192948, training acc: 0.887435, valid loss: 0.285693, valid acc: 0.868372



train minibatch loop: 100%|██████████| 5607/5607 [38:42<00:00,  2.48it/s, accuracy=0.923, cost=0.101] 
test minibatch loop: 100%|██████████| 1402/1402 [03:14<00:00,  7.71it/s, accuracy=0.932, cost=0.291] 
train minibatch loop:   0%|          | 0/5607 [00:00<?, ?it/s]

time taken: 2516.7549633979797
epoch: 6, training loss: 0.184223, training acc: 0.890249, valid loss: 0.303162, valid acc: 0.867342



train minibatch loop: 100%|██████████| 5607/5607 [38:41<00:00,  2.48it/s, accuracy=0.962, cost=0.101] 
test minibatch loop: 100%|██████████| 1402/1402 [03:15<00:00,  7.71it/s, accuracy=0.932, cost=0.22]  

time taken: 2517.0509905815125
epoch: 7, training loss: 0.176839, training acc: 0.892119, valid loss: 0.313233, valid acc: 0.868257

break epoch:8






In [16]:
real_Y, predict_Y = [], []

pbar = tqdm(
    range(0, len(test_input_ids), batch_size), desc = 'validation minibatch loop'
)
for i in pbar:
    index = min(i + batch_size, len(test_input_ids))
    batch_x = test_input_ids[i: index]
    batch_masks = test_input_masks[i: index]
    batch_segment = test_segment_ids[i: index]
    batch_y = test_Y[i: index]
    predict_Y += np.argmax(sess.run(model.logits,
            feed_dict = {
                model.Y: batch_y,
                model.X: batch_x,
                model.segment_ids: batch_segment,
                model.input_masks: batch_masks
            },
    ), 1, ).tolist()
    real_Y += batch_y

validation minibatch loop: 100%|██████████| 1402/1402 [03:12<00:00,  7.80it/s]


In [17]:
from sklearn import metrics

print(
    metrics.classification_report(
        real_Y, predict_Y, target_names = ['anger', 'fear', 'joy', 'love', 'sadness', 'surprise']
    )
)

             precision    recall  f1-score   support

      anger       0.94      0.92      0.93     14790
       fear       0.88      0.87      0.87      7527
        joy       0.92      0.93      0.92     16669
       love       0.96      0.91      0.94     15651
    sadness       0.74      0.95      0.83     20004
   surprise       0.86      0.44      0.58      9463

avg / total       0.88      0.87      0.86     84104



In [18]:
saver = tf.train.Saver(tf.trainable_variables())
saver.save(sess, 'bert-emotion/model.ckpt')

'bert-emotion/model.ckpt'

In [19]:
tf.trainable_variables()

[<tf.Variable 'bert/embeddings/word_embeddings:0' shape=(119547, 768) dtype=float32_ref>,
 <tf.Variable 'bert/embeddings/token_type_embeddings:0' shape=(2, 768) dtype=float32_ref>,
 <tf.Variable 'bert/embeddings/position_embeddings:0' shape=(512, 768) dtype=float32_ref>,
 <tf.Variable 'bert/embeddings/LayerNorm/beta:0' shape=(768,) dtype=float32_ref>,
 <tf.Variable 'bert/embeddings/LayerNorm/gamma:0' shape=(768,) dtype=float32_ref>,
 <tf.Variable 'bert/encoder/layer_0/attention/self/query/kernel:0' shape=(768, 768) dtype=float32_ref>,
 <tf.Variable 'bert/encoder/layer_0/attention/self/query/bias:0' shape=(768,) dtype=float32_ref>,
 <tf.Variable 'bert/encoder/layer_0/attention/self/key/kernel:0' shape=(768, 768) dtype=float32_ref>,
 <tf.Variable 'bert/encoder/layer_0/attention/self/key/bias:0' shape=(768,) dtype=float32_ref>,
 <tf.Variable 'bert/encoder/layer_0/attention/self/value/kernel:0' shape=(768, 768) dtype=float32_ref>,
 <tf.Variable 'bert/encoder/layer_0/attention/self/value/bi

In [20]:
strings = ','.join(
    [
        n.name
        for n in tf.get_default_graph().as_graph_def().node
        if ('Variable' in n.op
        or 'Placeholder' in n.name
        or 'logits' in n.name
        or 'alphas' in n.name)
        and 'adam' not in n.name
        and 'beta' not in n.name
        and 'global_step' not in n.name
    ]
)
strings.split(',')

['Placeholder',
 'Placeholder_1',
 'Placeholder_2',
 'Placeholder_3',
 'bert/embeddings/word_embeddings',
 'bert/embeddings/token_type_embeddings',
 'bert/embeddings/position_embeddings',
 'bert/embeddings/LayerNorm/gamma',
 'bert/encoder/layer_0/attention/self/query/kernel',
 'bert/encoder/layer_0/attention/self/query/bias',
 'bert/encoder/layer_0/attention/self/key/kernel',
 'bert/encoder/layer_0/attention/self/key/bias',
 'bert/encoder/layer_0/attention/self/value/kernel',
 'bert/encoder/layer_0/attention/self/value/bias',
 'bert/encoder/layer_0/attention/output/dense/kernel',
 'bert/encoder/layer_0/attention/output/dense/bias',
 'bert/encoder/layer_0/attention/output/LayerNorm/gamma',
 'bert/encoder/layer_0/intermediate/dense/kernel',
 'bert/encoder/layer_0/intermediate/dense/bias',
 'bert/encoder/layer_0/output/dense/kernel',
 'bert/encoder/layer_0/output/dense/bias',
 'bert/encoder/layer_0/output/LayerNorm/gamma',
 'bert/encoder/layer_1/attention/self/query/kernel',
 'bert/encode

In [21]:
def freeze_graph(model_dir, output_node_names):

    if not tf.gfile.Exists(model_dir):
        raise AssertionError(
            "Export directory doesn't exists. Please specify an export "
            'directory: %s' % model_dir
        )

    checkpoint = tf.train.get_checkpoint_state(model_dir)
    input_checkpoint = checkpoint.model_checkpoint_path

    absolute_model_dir = '/'.join(input_checkpoint.split('/')[:-1])
    output_graph = absolute_model_dir + '/frozen_model.pb'
    clear_devices = True
    with tf.Session(graph = tf.Graph()) as sess:
        saver = tf.train.import_meta_graph(
            input_checkpoint + '.meta', clear_devices = clear_devices
        )
        saver.restore(sess, input_checkpoint)
        output_graph_def = tf.graph_util.convert_variables_to_constants(
            sess,
            tf.get_default_graph().as_graph_def(),
            output_node_names.split(','),
        )
        with tf.gfile.GFile(output_graph, 'wb') as f:
            f.write(output_graph_def.SerializeToString())
        print('%d ops in the final graph.' % len(output_graph_def.node))

In [22]:
freeze_graph('bert-emotion', strings)

INFO:tensorflow:Restoring parameters from bert-emotion/model.ckpt
Instructions for updating:
Use tf.compat.v1.graph_util.convert_variables_to_constants
Instructions for updating:
Use tf.compat.v1.graph_util.extract_sub_graph
INFO:tensorflow:Froze 201 variables.
INFO:tensorflow:Converted 201 variables to const ops.
2132 ops in the final graph.


In [23]:
def load_graph(frozen_graph_filename):
    with tf.gfile.GFile(frozen_graph_filename, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(graph_def)
    return graph

In [24]:
g = load_graph('bert-emotion/frozen_model.pb')
x = g.get_tensor_by_name('import/Placeholder:0')
segment_ids = g.get_tensor_by_name('import/Placeholder_1:0')
input_masks = g.get_tensor_by_name('import/Placeholder_2:0')
logits = g.get_tensor_by_name('import/logits:0')
test_sess = tf.InteractiveSession(graph = g)
result = test_sess.run(logits, feed_dict = {x: batch_x,
                                           segment_ids: batch_segment,
                                           input_masks: batch_masks})
result



array([[-2.8623114 , -2.1982768 , -1.6715943 , -2.245584  ,  6.751199  ,
         6.639578  ],
       [-0.51795965, -0.93047565,  0.9801868 , -2.8759506 ,  9.051945  ,
        -3.1700108 ],
       [ 0.56800187,  2.1384933 , -1.6594173 , -3.7759523 , -2.6354241 ,
         6.686962  ],
       [-1.1127876 , -0.90448266,  0.8982949 , -3.4574199 ,  9.992989  ,
        -1.9199495 ],
       [-2.311633  , -2.4050233 , -0.2732021 , 10.642401  , -1.400313  ,
        -3.2785573 ],
       [-1.5117706 , -1.7092414 , 10.709359  , -2.466627  , -1.9893956 ,
        -1.7655612 ],
       [-1.980026  , -1.9756565 , -0.38158944, -2.5745296 , 11.211397  ,
        -0.8161135 ],
       [-1.6265833 , -1.438981  , -0.54035276, -2.1318078 , 11.078032  ,
        -1.6591955 ],
       [-2.2023234 , -1.547989  , -0.31373847, 10.896988  , -2.6408284 ,
        -3.919     ],
       [-1.6516829 , -1.3925744 , -0.89381707, 10.980319  , -2.2088547 ,
        -4.1854672 ],
       [-1.0981047 , -2.1507485 , -1.2287042 , 10.

In [25]:
(np.argmax(result,axis= 1) == np.array(batch_y)).mean()

0.9090909090909091