# Data and Model Preparation

## Prepare Data

In [0]:
!mkdir data

SPLIT_TYPE = 'rand' # @param ['rand', 'qtoken']
DATA_URL = r'https://s3.amazonaws.com/commensenseqa/'
DATA_TRAIN_URL = DATA_URL+'train_'+SPLIT_TYPE+'_split.jsonl'
DATA_VALID_URL = DATA_URL+'dev_'+SPLIT_TYPE+'_split.jsonl'
DATA_TEST_URL = DATA_URL+'test_'+SPLIT_TYPE+'_split_no_answers.jsonl'

!wget $DATA_TRAIN_URL -O data/train.jsonl
!wget $DATA_VALID_URL -O data/dev.jsonl
!wget $DATA_TEST_URL -O data/test.jsonl

--2019-03-17 15:29:25--  https://s3.amazonaws.com/commensenseqa/train_rand_split.jsonl
Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.86.69
Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.86.69|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3513912 (3.4M) [binary/octet-stream]
Saving to: ‘data/train.jsonl’


2019-03-17 15:29:26 (15.2 MB/s) - ‘data/train.jsonl’ saved [3513912/3513912]

--2019-03-17 15:29:26--  https://s3.amazonaws.com/commensenseqa/dev_rand_split.jsonl
Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.86.69
Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.86.69|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 438003 (428K) [binary/octet-stream]
Saving to: ‘data/dev.jsonl’


2019-03-17 15:29:27 (3.63 MB/s) - ‘data/dev.jsonl’ saved [438003/438003]

--2019-03-17 15:29:29--  https://s3.amazonaws.com/commensenseqa/test_rand_split_no_answers.jsonl
Resolving s3.amazonaws.com (s3.amazonaws.com).

## Prepare BERT

In [0]:
import sys

!test -d bert_model || git clone https://github.com/google-research/bert bert_model

if not 'bert_model' in sys.path:
  sys.path += ['bert_model']

Cloning into 'bert_model'...
remote: Enumerating objects: 317, done.[K
remote: Total 317 (delta 0), reused 0 (delta 0), pack-reused 317[K
Receiving objects: 100% (317/317), 254.91 KiB | 3.44 MiB/s, done.
Resolving deltas: 100% (178/178), done.


## Prepare Embedding

In [0]:
!mkdir embed

GLOVE_URL = r'https://s3.eu-west-2.amazonaws.com/csqa-embed/glove_sm.txt'

!wget $GLOVE_URL -O embed/glove_sm.txt

--2019-03-17 15:29:34--  https://s3.eu-west-2.amazonaws.com/csqa-embed/glove_sm.txt
Resolving s3.eu-west-2.amazonaws.com (s3.eu-west-2.amazonaws.com)... 52.95.150.64
Connecting to s3.eu-west-2.amazonaws.com (s3.eu-west-2.amazonaws.com)|52.95.150.64|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 44697629 (43M) [text/plain]
Saving to: ‘embed/glove_sm.txt’


2019-03-17 15:29:37 (20.3 MB/s) - ‘embed/glove_sm.txt’ saved [44697629/44697629]



# Define Model

## Imports

In [0]:
import gc
import os
import csv
import json
import collections

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
import tensorboardcolab as tbc

from keras import utils
from keras import layers
from keras import models
from keras import backend as K
from keras import callbacks as cbs
from keras import optimizers as opt
from keras import initializers as ini
from keras import preprocessing as pre

import tokenization as tkz
import run_classifier as rcf

W0317 15:29:38.392063 139920176412544 __init__.py:56] Some hub symbols are not available because TensorFlow version is less than 1.14
Using TensorFlow backend.


## Data Processing

### Data Example Structure

In [0]:
class CsQAExample(object):
  
  def __init__(self, uid, Q, A, label):
    self.uid = uid
    self.Q = Q
    self.A = A
    self.label = label
    assert isinstance(A, (list, tuple)), 'A must be an instance of list or tuple'
    assert len(A) == 5, 'length of A should be 3, but got {}'.format(len(A))
    
  def __str__(self):
    return self.__repr__()
  
  def __repr__(self):
    l = [
        'uid: {}'.format(self.uid),
        'Q: {}'.format(self.Q),
        'A0: {}'.format(self.A[0]),
        'A1: {}'.format(self.A[1]),
        'A2: {}'.format(self.A[2]),
        'A3: {}'.format(self.A[3]),
        'A4: {}'.format(self.A[4]),
        'label: {}'.format(self.label)
    ]
    return '; '.join(l)

### Data Processor

In [0]:
class CsQAProcessor(object):
  
  def get_train_examples(self, data_dir):
    return self._create_examples(self._read_lines(os.path.join(data_dir, 'train.jsonl')))

  def get_dev_examples(self, data_dir):
    return self._create_examples(self._read_lines(os.path.join(data_dir, 'dev.jsonl')))

  def get_test_examples(self, data_dir):
    return self._create_examples(self._read_lines(os.path.join(data_dir, 'test.jsonl')))

  def get_labels(self):
    return [0, 1, 2, 3, 4]
    
  def _read_lines(self, path):
    return open(path).read().split('\n')[:-1]
    
  def _create_examples(self, lines):
    ans_dict = {'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4}
    examples = []
    for line in lines:
      d = json.loads(line)
      uid = d['id']
      Q = d['question']['stem']
      A = [
          d['question']['choices'][0]['text'],
          d['question']['choices'][1]['text'],
          d['question']['choices'][2]['text'],
          d['question']['choices'][3]['text'],
          d['question']['choices'][4]['text']
      ]
      label = ans_dict.get(d.get('answerKey'), -1)
      examples.append(CsQAExample(uid, Q, A, label))
    return examples

### Define Tokenizer

In [0]:
class SimpleTokenizer(object):
  
  def __init__(self, data_dir, include_set=['train', 'dev', 'test'], do_lower_case=True):
    processor = CsQAProcessor()
    data = []
    if 'train' in include_set:
      data.extend(processor.get_train_examples(data_dir))
    if 'dev' in include_set:
      data.extend(processor.get_dev_examples(data_dir))
    if 'test' in include_set:
      data.extend(processor.get_test_examples(data_dir))
    tokens = set()
    self.bstk = tkz.BasicTokenizer(do_lower_case)
    for example in data:
      Q = example.Q
      tokens.update(self.bstk.tokenize(Q))
      for A in example.A:
        tokens.update(self.bstk.tokenize(A))
    self.vocab = collections.OrderedDict()
    self.vocab['[PAD]'] = 0
    self.vocab['[UNK]'] = 1
    self.vocab['[CLS]'] = 2
    self.vocab['[SEP]'] = 3
    index = 4
    for token in sorted(list(tokens)):
      self.vocab[token] = index
    self.inv_vocab = {v: k for k, v in self.vocab.items()}
  
  def tokenize(self, text):
    return self.bstk.tokenize(text)
  
  def convert_tokens_to_ids(self, tokens):
    return tkz.convert_by_vocab(self.vocab, tokens)
  
  def convert_ids_to_tokens(self, ids):
    return tkz.convert_by_vocab(self.inv_vocab, ids)

## Custom Layers

### SEmb Layer

In [0]:
class SEmbLayer(layers.Layer):
  
  def __init__(self, data_dir, embed_path, trainable=True, **kwargs):
    self.output_dim = 300
    self.data_dir = data_dir
    self.embed_path = embed_path
    self.trainable = trainable
    super(SEmbLayer, self).__init__(**kwargs)
    
  def build(self, input_shape):
    tokenizer = SimpleTokenizer(self.data_dir)
    embed_df = pd.read_table(self.embed_path, sep=' ', header=None, index_col=0, quoting=csv.QUOTE_NONE)
    word_embed = embed_df.loc[tokenizer.vocab.keys(), :].values
    nan_mask = np.isnan(word_embed)
    word_embed[nan_mask] = np.random.normal(scale=0.02, size=np.count_nonzero(nan_mask))
    word_embed[tokenizer.convert_tokens_to_ids(['[PAD]'])] = 0.
    word_embed = utils.normalize(word_embed)
    voc_size, emb_dim = word_embed.shape
    assert emb_dim == self.output_dim
    self.embed = self.add_weight(
        name='embeddings',
        shape=(voc_size, emb_dim),
        initializer=ini.Constant(word_embed),
        trainable=self.trainable
    )
    super(SEmbLayer, self).build(input_shape)
  
  def call(self, x):
    _, seq = K.int_shape(x)
    mask = K.tf.count_nonzero(x, axis=-1, keepdims=True, dtype=K.tf.float32)
    flat_x = K.reshape(x, (-1,))
    flat_x_emb = K.gather(self.embed, flat_x)
    x_seq = K.reshape(flat_x_emb, (-1, seq, self.output_dim))
    feature = K.sum(x_seq, axis=1) / mask
    return feature
  
  def compute_output_shape(self, input_shape):
    return input_shape[0], self.output_dim

### ELMo Layer

In [0]:
class ELMoLayer(layers.Layer):
  
  def __init__(self, trainable=True, **kwargs):
    self.output_dim = 1024
    self.trainable = trainable
    super(ELMoLayer, self).__init__(**kwargs)
  
  def build(self, input_shape):
    self.elmo = hub.Module(
        spec='https://tfhub.dev/google/elmo/2',
        trainable=self.trainable,
        name='elmo_module'
    )
    self.trainable_weights += K.tf.trainable_variables(scope='elmo_module/.*')
    super(ELMoLayer, self).build(input_shape)
  
  def call(self, x):
    return self.elmo(
        inputs=K.squeeze(K.cast(x, tf.string), axis=1),
        signature='default',
        as_dict=True
    )['default']
  
  def compute_output_shape(self, input_shape):
    return input_shape[0], self.output_dim

### BERT Layer

In [0]:
class BERTLayer(layers.Layer):
  
  def __init__(self, trainable=True, **kwargs):
    self.output_dim = 768
    self.trainable = trainable
    super(BERTLayer, self).__init__(**kwargs)
  
  def build(self, input_shape):
    self.bert = hub.Module(
        spec='https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1',
        trainable=self.trainable,
        name='bert_module'
    )
    self.trainable_weights += K.tf.trainable_variables(scope='bert_module/bert/.*')
    self.non_trainable_weights += K.tf.trainable_variables(scope='bert_module/cls/.*')
    super(BERTLayer, self).build(input_shape)
    
  def call(self, x):
    input_ids, input_mask, segment_ids = K.tf.unstack(K.tf.transpose(x, perm=[1, 0, 2]))
    bert_inputs = dict(
        input_ids=input_ids,
        input_mask=input_mask,
        segment_ids=segment_ids
    )
    return self.bert(
        inputs=bert_inputs,
        signature='tokens',
        as_dict=True
    )['pooled_output']
  
  def compute_output_shape(self, input_shape):
    return input_shape[0], self.output_dim

## Feature Extraction

### SEmb Feature Extraction Function

( FLAG: Flag ) -> lambda

In [0]:
def get_semb_feature(FLAG):
  semb = SEmbLayer(data_dir=FLAG.data_dir, embed_path=FLAG.embed_path, name='semb_layer')
  return lambda x: semb(x)

### ELMo Feature Extraction Function

( drop_rate: float ) -> lambda

In [0]:
def get_elmo_feature():
  elmo = ELMoLayer(name='elmo_layer')
  out = layers.Dense(256, activation='tanh', name='elmo_dense') # 256
  return lambda x: out(elmo(x))

### BERT Feature Extraction Function

( ) -> lambda

In [0]:
def get_bert_feature():
  bert = BERTLayer(name='bert_layer')
  return lambda x: bert(x)

## Output Function

( drop_rate: float, name: str ) -> lambda

In [0]:
def get_output_layer(drop_rate=0.1, name=None):
  drop = layers.Dropout(drop_rate, name='{}_output_dropout'.format(name))
  out = layers.Dense(1, name='{}_output_dense'.format(name))
  return lambda x: out(drop(x))

## Define Models

( FLAG: Flag ) -> tuple

In [0]:
def build_model(FLAG):
  
  # semb input interface
  semb_inputs = [layers.Input(
      shape=(4*FLAG.max_seq_length,),
      dtype='int32',
      name='semb_input_{}'.format(n)
  ) for n in range(FLAG.n_choices)]
  # elmo input interface
  elmo_inputs = [layers.Input(
      shape=(1,),
      dtype='string',
      name='elmo_input_{}'.format(n)
  ) for n in range(FLAG.n_choices)]
  # bert input interface
  bert_inputs = [layers.Input(
      shape=(3, FLAG.max_seq_length),
      dtype='int32',
      name='bert_input_{}'.format(n)
  ) for n in range(FLAG.n_choices)]
  
  # feature extraction layers
  semb = get_semb_feature(FLAG) # [B, 300]
  elmo = get_elmo_feature() # [B, 256]
  bert = get_bert_feature() # [B, 768]
  
  # semb model
  semb_out = get_output_layer(drop_rate=FLAG.semb_output_dropout, name='semb')
  semb_vec = [semb_out(semb(x)) for x in semb_inputs]
  semb_res = layers.Concatenate(name='semb_choice_concat')(semb_vec)
  semb_pred = layers.Softmax(name='semb_pred')(semb_res)
  semb_model = models.Model(inputs=semb_inputs, outputs=semb_pred, name='semb_model')
  
  # elmo model
  elmo_out = get_output_layer(drop_rate=FLAG.elmo_output_dropout, name='elmo')
  elmo_vec = [elmo_out(elmo(x)) for x in elmo_inputs]
  elmo_res = layers.Concatenate(name='elmo_choice_concat')(elmo_vec)
  elmo_pred = layers.Softmax(name='elmo_pred')(elmo_res)
  elmo_model = models.Model(inputs=elmo_inputs, outputs=elmo_pred, name='elmo_model')
  
  # bert model
  bert_out = get_output_layer(drop_rate=FLAG.bert_output_dropout, name='bert')
  bert_vec = [bert_out(bert(x)) for x in bert_inputs]
  bert_res = layers.Concatenate(name='bert_choice_concat')(bert_vec)
  bert_pred = layers.Softmax(name='bert_pred')(bert_res)
  bert_model = models.Model(inputs=bert_inputs, outputs=bert_pred, name='bert_model')
  
  # hybrid model
  comb_out = get_output_layer(drop_rate=FLAG.comb_output_dropout, name='comb')
  comb_vec = []
  for i in range(FLAG.n_choices):
    comb_prt = []
    if FLAG.exclude != 'SEmb':
      comb_prt.append(semb(semb_inputs[i]))
    if FLAG.exclude != 'ELMo':
      comb_prt.append(elmo(elmo_inputs[i]))
    if FLAG.exclude != 'BERT':
      comb_prt.append(bert(bert_inputs[i]))
    combined = layers.Concatenate(name='comb_feature_concat_{}'.format(i))(comb_prt) # Concatenate
    comb_vec.append(comb_out(combined))
  comb_res = layers.Concatenate(name='comb_choice_concat')(comb_vec)
  comb_pred = layers.Softmax(name='comb_pred')(comb_res)
  comb_model = models.Model(inputs=semb_inputs+elmo_inputs+bert_inputs, outputs=comb_pred, name='comb_model')
  
  return semb_model, elmo_model, bert_model, comb_model

## Data Loader

( FLAG: Flag ) -> dict

In [0]:
def load_data(FLAG):
  # load BERT tokenization info
  bert_module = hub.Module('https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1')
  token_info = bert_module(signature='tokenization_info', as_dict=True)
  with tf.Session():
    vocab_file = token_info['vocab_file'].eval()
    do_lower_case = token_info['do_lower_case'].eval()
  
  ftkz = tkz.FullTokenizer(vocab_file=vocab_file, do_lower_case=do_lower_case)
  stkz = SimpleTokenizer(FLAG.data_dir, do_lower_case=do_lower_case)
  
  processor = CsQAProcessor()
  
  # target
  # semb (5, N, S), int32
  # elmo (5, N, 1), string
  # bert (5, N, 3, S), int32
  # label (N, 5), float32
  
  def to_features(examples):
    semb_features = [] # (N, 5, S)
    elmo_features = [] # (N, 5)
    bert_features = [] # (N, 5, 3, S)
    labels = [] # (N, 3)
    for example in examples:
      semb_feature = [] # (5, S)
      elmo_feature = [] # (5,)
      bert_feature = [] # (5, 3, S)
      Q = example.Q
      Q_stk = stkz.tokenize(Q)
      _Q_ftk = ftkz.tokenize(Q)
      for A in example.A:
        # semb
        A_stk = stkz.tokenize(A)
        s_tokens = Q_stk + A_stk
        s_tk_ids = stkz.convert_tokens_to_ids(s_tokens)
        zero_pad = [0] * (4*FLAG.max_seq_length - len(s_tk_ids))
        s_tk_ids += zero_pad
        semb_feature.append(s_tk_ids)
        # elmo
        sentence = ' '.join([Q, A])
        elmo_feature.append(sentence)
        # bert
        Q_tk = _Q_ftk[:]
        A_tk = ftkz.tokenize(A)
        rcf._truncate_seq_pair(Q_tk, A_tk, FLAG.max_seq_length-3)
        tokens = ['[CLS]'] + Q_tk + ['[SEP]'] + A_tk + ['[SEP]']
        input_ids = ftkz.convert_tokens_to_ids(tokens)
        input_mask = [1] * len(input_ids)
        segment_ids = [0] * (len(Q_tk)+2) + [1] * (len(A_tk)+1)
        padding = [0] * (FLAG.max_seq_length - len(input_ids))
        input_ids += padding
        input_mask += padding
        segment_ids += padding
        assert len(input_ids) == FLAG.max_seq_length, 'Require {0}, actual {1}'.format(FLAG.max_seq_length, len(input_ids))
        assert len(input_mask) == FLAG.max_seq_length, 'Require {0}, actual {1}'.format(FLAG.max_seq_length, len(input_mask))
        assert len(segment_ids) == FLAG.max_seq_length, 'Require {0}, actual {1}'.format(FLAG.max_seq_length, len(segment_ids))
        bert_feature.append([input_ids, input_mask, segment_ids]) # (3, S)
      semb_features.append(semb_feature)
      elmo_features.append(elmo_feature)
      bert_features.append(bert_feature)
      labels.append(np.eye(FLAG.n_choices)[example.label])
    semb_features = np.transpose(semb_features, (1, 0, 2)).tolist() # (5, N, S)
    elmo_features = np.transpose(elmo_features)[:,:,None].tolist() # (5, N, 1)
    bert_features = np.transpose(bert_features, (1, 0, 2, 3)).tolist() # (5, N, 3, S)
    semb_inputs = []
    elmo_inputs = []
    bert_inputs = []
    for n in range(FLAG.n_choices):
      semb_inputs.append(np.asarray(semb_features[n], dtype=np.int32))
      elmo_inputs.append(np.asarray(elmo_features[n]))
      bert_inputs.append(np.asarray(bert_features[n], dtype=np.int32))
    label_inputs = np.asarray(labels, dtype=np.float32)
    return semb_inputs, elmo_inputs, bert_inputs, label_inputs
  
  data = {'train': {}, 'dev': {}, 'test': {}}
  data['train']['semb'], data['train']['elmo'], data['train']['bert'], data['train']['label'] = to_features(processor.get_train_examples(FLAG.data_dir))
  data['train']['comb'] = data['train']['semb'] + data['train']['elmo'] + data['train']['bert']
  data['dev']['semb'], data['dev']['elmo'], data['dev']['bert'], data['dev']['label'] = to_features(processor.get_dev_examples(FLAG.data_dir))
  data['dev']['comb'] = data['dev']['semb'] + data['dev']['elmo'] + data['dev']['bert']
  data['test']['semb'], data['test']['elmo'], data['test']['bert'], _ = to_features(processor.get_test_examples(FLAG.data_dir))
  data['test']['comb'] = data['test']['semb'] + data['test']['elmo'] + data['test']['bert']
  
  return data

## Model Training and Evaluation

( model: keras.models.Model, data:  dict, FLAG: Flag ) -> None

In [0]:
def train_and_eval(model, data, FLAG):
  
  # obtain model parameters
  model_type = model.name.replace('_model', '')
  learning_rate = getattr(FLAG, '{}_learning_rate'.format(model_type))
  warmup_ratio = getattr(FLAG, '{}_warmup_ratio'.format(model_type))
  batch_size = getattr(FLAG, '{}_batch_size'.format(model_type))
  granulity = getattr(FLAG, '{}_granulity'.format(model_type))
  epochs = getattr(FLAG, '{}_epochs'.format(model_type))
  
  # number of train samples
  n_train = len(data['train']['label'])
  # number of dev samples
  n_valid = len(data['dev']['label'])
  
  # number of steps per epoch
  epoch_steps = int(n_train/batch_size)
  # number of total steps
  total_steps = int(epochs*epoch_steps)
  
  # callbacks
  cb = cbs.CallbackList()
  cb.append(cbs.ModelCheckpoint(
      filepath=os.path.join(FLAG.ckpt_dir, model_type+'.{epoch:02d}-{val_acc:.4f}.h5'),
      monitor='val_acc',
      save_best_only=True,
      save_weights_only=True
  ))
  cb.append(cbs.TensorBoard(
      log_dir=os.path.join(FLAG.log_dir, model_type+'/'),
      update_freq=3*batch_size
  ))
  cb.append(cbs.ProgbarLogger(
      count_mode='steps',
      stateful_metrics=['val_loss', 'val_acc']
  ))
  cb.set_params({
      'verbose': 1,
      'epochs': epochs,
      'samples': n_train,
      'steps': epoch_steps,
      'metrics': ['loss', 'acc', 'val_loss', 'val_acc']
  })
  cb.set_model(model)
  
  # output logs
  log = {'size': batch_size}
  
  # learning rate schedule
  lrs = lambda s: K.set_value(
      x=model.optimizer.lr,
      value=(learning_rate-K.epsilon())*(1-s/total_steps)**0.8+K.epsilon()
  )
  
  # evaluate model
  val = lambda: model.evaluate(
      x=data['dev'][model_type],
      y=data['dev']['label'],
      batch_size=batch_size,
      verbose=0
  )
  
  # warmup steps
  warmup_steps = int(warmup_ratio*total_steps)
  warmup_round = int(warmup_steps/epoch_steps)
  if warmup_steps:
    print('Warming up ...')
    pg = utils.Progbar(target=warmup_steps, stateful_metrics=['val_loss', 'val_acc'])
    for turn in range(warmup_round+1):
      slices = np.random.permutation(n_train)
      for step in range(warmup_steps-turn*epoch_steps):
        K.set_value(model.optimizer.lr, learning_rate*(turn*epoch_steps+step+1)/warmup_steps)
        index = slices[step*batch_size:(step+1)*batch_size]
        batch = [d[index] for d in data['train'][model_type]]
        label = data['train']['label'][index]
        res = model.train_on_batch(x=batch, y=label)
        pg.update(step, [('loss', res[0]), ('acc', res[1])])
      val_loss, val_acc = val()
      pg.update(warmup_steps-turn*epoch_steps, [('val_loss', val_loss), ('val_acc', val_acc)])
  
  # train and evaluation steps
  print('Train on {0} samples, validate on {1} samples'.format(n_train, n_valid))
  print('Total steps: {0}, {1} steps per epoch'.format(total_steps, epoch_steps))
  cb.on_train_begin(logs=log)
  steps = 0
  for itr in range(epochs):
    cb.on_epoch_begin(itr, logs=log)
    slices = np.random.permutation(n_train)
    for step in range(epoch_steps):
      log.update({'batch': step})
      cb.on_batch_begin(step, logs=log)
      steps += 1
      index = slices[step*batch_size:(step+1)*batch_size]
      batch = [d[index] for d in data['train'][model_type]]
      label = data['train']['label'][index]
      res = model.train_on_batch(x=batch, y=label)
      log.update({'loss': res[0], 'acc': res[1]})
      if steps % granulity == 0:
        lrs(steps)
        gc.collect()
        val_loss, val_acc = val()
        log.update({'val_loss': val_loss, 'val_acc': val_acc, 'lr': K.get_value(model.optimizer.lr)})
      cb.on_batch_end(step, logs=log)
    val_loss, val_acc = val()
    log.update({'val_loss': val_loss, 'val_acc': val_acc})
    cb.on_epoch_end(itr, logs=log)
  cb.on_train_end(logs=log)

# Run Classifier

## Main Procedure

( FLAG: Flag ) -> None

In [0]:
def main(FLAG):
  
  tf.logging.set_verbosity(FLAG.logging_level)
  tf.gfile.MakeDirs(FLAG.ckpt_dir)
  tf.gfile.MakeDirs(FLAG.log_dir)
  
  print('Loading data...')
  data = load_data(FLAG)
  gc.collect()
  print('Building models...')
  semb_model, elmo_model, bert_model, comb_model = build_model(FLAG)
  gc.collect()
  
  if FLAG.semb: # SEmb fine-tuning
    print('Running SEmb...')
    if FLAG.semb_checkpoint_path:
      semb_model.load_weights(FLAG.semb_checkpoint_path)
      print('SEmb weights loaded.')
    semb_model.compile(
        loss='categorical_crossentropy',
        optimizer=opt.Adam(lr=FLAG.semb_learning_rate),
        metrics=['accuracy']
    )
    semb_model.summary()
    train_and_eval(semb_model, data, FLAG)
  
  if FLAG.elmo: # ELMo fine-tuning
    print('Running ELMo...')
    if FLAG.elmo_checkpoint_path:
      elmo_model.load_weights(FLAG.elmo_checkpoint_path)
      print('ELMo weights loaded.')
    elmo_model.compile(
        loss='categorical_crossentropy',
        optimizer=opt.Adam(lr=FLAG.elmo_learning_rate),
        metrics=['accuracy']
    )
    elmo_model.summary()
    train_and_eval(elmo_model, data, FLAG)
  
  if FLAG.bert: # BERT fine-tuning
    print('Running BERT...')
    if FLAG.bert_checkpoint_path:
      bert_model.load_weights(FLAG.bert_checkpoint_path)
      print('BERT weights loaded.')
    bert_model.compile(
        loss='categorical_crossentropy',
        optimizer=opt.Adam(lr=FLAG.bert_learning_rate, clipnorm=1.),
        metrics=['accuracy']
    )
    bert_model.summary()
    train_and_eval(bert_model, data, FLAG)
  
  if FLAG.comb: # Hybrid model fine-tuning
    print('Running hybrid model...')
    if FLAG.semb_checkpoint_path:
      semb_model.load_weights(FLAG.semb_checkpoint_path)
      print('SEmb weights loaded.')
    if FLAG.elmo_checkpoint_path:
      elmo_model.load_weights(FLAG.elmo_checkpoint_path)
      print('ELMo weights loaded.')
    if FLAG.bert_checkpoint_path:
      bert_model.load_weights(FLAG.bert_checkpoint_path)
      print('BERT weights loaded.')
    if FLAG.comb_checkpoint_path:
      print('Hybrid model weights loaded.')
      comb_model.load_weights(FLAG.comb_checkpoint_path)
    if FLAG.freeze_semb:
      for layer in semb_model.layers:
        layer.trainable = False
    if FLAG.freeze_elmo:
      for layer in elmo_model.layers:
        layer.trainable = False
    if FLAG.freeze_bert:
      for layer in bert_model.layers:
        layer.trainable = False
    comb_model.compile(
        loss='categorical_crossentropy',
        optimizer=opt.Adam(lr=FLAG.comb_learning_rate),
        metrics=['accuracy']
    )
    comb_model.summary()
    train_and_eval(comb_model, data, FLAG)

## Flag Class

In [0]:
class Flag(object):
  
  def __init__(self):
    self.logging_level = None
    self.embed_path = None
    self.data_dir = None
    self.ckpt_dir = None
    self.log_dir = None
    self.n_choices = 3
    self.max_seq_length = 32
    self.semb_checkpoint_path = None
    self.elmo_checkpoint_path = None
    self.bert_checkpoint_path = None
    self.comb_checkpoint_path = None
    self.semb_output_dropout = 0.1
    self.elmo_output_dropout = 0.1
    self.bert_output_dropout = 0.1
    self.comb_output_dropout = 0.1
    self.semb_learning_rate = 1e-3
    self.elmo_learning_rate = 1e-3
    self.bert_learning_rate = 1e-5
    self.comb_learning_rate = 1e-5
    self.semb_warmup_ratio = 0.01
    self.elmo_warmup_ratio = 0.01
    self.bert_warmup_ratio = 0.01
    self.comb_warmup_ratio = 0.01
    self.semb_batch_size = 32
    self.elmo_batch_size = 32
    self.bert_batch_size = 32
    self.comb_batch_size = 32
    self.semb_granulity = 77
    self.elmo_granulity = 77
    self.bert_granulity = 77
    self.comb_granulity = 10
    self.semb_epochs = 9
    self.elmo_epochs = 9
    self.bert_epochs = 3
    self.comb_epochs = 3
    self.exclude = None
    self.freeze_semb = False
    self.freeze_elmo = False
    self.freeze_bert = False
    self.semb = False
    self.elmo = False
    self.bert = False
    self.comb = False
    
FLAG = Flag()

## Define Parameters

In [0]:
# @title Model Parameters { run: 'auto' }
# @markdown #### General Settings
FLAG.logging_level = "ERROR" # @param ['DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL']
FLAG.embed_path = "embed/glove_sm.txt" # @param {type: 'string'}
FLAG.data_dir = "data/" # @param {type:'string'}
FLAG.ckpt_dir = "ckpt/" # @param {type:'string'}
FLAG.log_dir = "log/" # @param {type:'string'}
FLAG.n_choices = 5 # @param {type:"integer"}
FLAG.max_seq_length = 32 # @param {type:"integer"}
# @markdown ---
# @markdown #### SEmb Model Parameters
FLAG.semb_checkpoint_path = "ckpt/semb.01-0.1788.h5" # @param {type:'string'}
FLAG.semb_output_dropout = 0.05 # @param {type:"number"}
FLAG.semb_learning_rate = 1e-5 # @param {type:"number"}
FLAG.semb_warmup_ratio = 0.01 # @param {type:"number"}
FLAG.semb_batch_size = 64 # @param {type:"integer"}
FLAG.semb_granulity = 77 # @param {type:"integer"}
FLAG.semb_epochs = 1 # @param {type:"integer"}
# @markdown ---
# @markdown #### ELMo Model Parameters
FLAG.elmo_checkpoint_path = "ckpt/elmo.08-0.3333.h5" # @param {type:'string'}
FLAG.elmo_output_dropout = 0.05 # @param {type:"number"}
FLAG.elmo_learning_rate = 1e-3 # @param {type:"number"}
FLAG.elmo_warmup_ratio = 0.01 # @param {type:"number"}
FLAG.elmo_batch_size = 64 # @param {type:"integer"}
FLAG.elmo_granulity = 77 # @param {type:"integer"}
FLAG.elmo_epochs = 7 # @param {type:"integer"}
# @markdown ---
# @markdown #### BERT Model Parameters
FLAG.bert_checkpoint_path = "ckpt/bert.01-0.5340.h5" # @param {type:'string'}
FLAG.bert_output_dropout = 0.05 # @param {type:"number"}
FLAG.bert_learning_rate = 1e-5 # @param {type:"number"}
FLAG.bert_warmup_ratio = 0.05 # @param {type:"number"}
FLAG.bert_batch_size = 32 # @param {type:"integer"}
FLAG.bert_granulity = 77 # @param {type:"integer"}
FLAG.bert_epochs = 1 # @param {type:"integer"}
# @markdown ---
# @markdown #### Hybrid Model Parameters
FLAG.comb_checkpoint_path = "" # @param {type:'string'}
FLAG.comb_output_dropout = 0.1 # @param {type:"number"}
FLAG.comb_learning_rate = 1e-5 # @param {type:"number"}
FLAG.comb_warmup_ratio = 0.01 # @param {type:"number"}
FLAG.comb_batch_size = 32 # @param {type:"integer"}
FLAG.comb_granulity = 20 # @param {type:"integer"}
FLAG.comb_epochs = 1 # @param {type:"integer"}
FLAG.exclude = "None" # @param ['None', 'SEmb', 'ELMo', 'BERT']
FLAG.freeze_semb = False # @param {type:"boolean"}
FLAG.freeze_elmo = False # @param {type:"boolean"}
FLAG.freeze_bert = False # @param {type:"boolean"}

## Define Mode

In [0]:
# @title Model to Run { run: 'auto' }
FLAG.semb = False # @param {type:"boolean"}
FLAG.elmo = False # @param {type:"boolean"}
FLAG.bert = False # @param {type:"boolean"}
FLAG.comb = True # @param {type:"boolean"}

## Run Tensorboard

In [0]:
tbc.TensorBoardColab(graph_path=FLAG.log_dir);

Wait for 8 seconds...
TensorBoard link:
https://b90483f7.ngrok.io


## Run Classifier

In [0]:
main(FLAG)

Loading data...
Building models...


Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  del sys.path[0]
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  return self._getitem_tuple(key)


Running hybrid model...
SEmb weights loaded.
ELMo weights loaded.
BERT weights loaded.
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
elmo_input_0 (InputLayer)       (None, 1)            0                                            
__________________________________________________________________________________________________
elmo_input_1 (InputLayer)       (None, 1)            0                                            
__________________________________________________________________________________________________
elmo_input_2 (InputLayer)       (None, 1)            0                                            
__________________________________________________________________________________________________
elmo_input_3 (InputLayer)       (None, 1)            0                                            
______________________

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 9860 samples, validate on 1236 samples
Total steps: 308, 308 steps per epoch
Epoch 1/1
 59/308 [====>.........................] - ETA: 27:33 - loss: 1.1917 - acc: 0.5392 - val_loss: 1.2471 - val_acc: 0.4676

KeyboardInterrupt: ignored

# Store Checkpoint

In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


In [0]:
!cp -r ckpt/ gdrive/My\ Drive/ckpt_elmo_bert

In [0]:
!cp -r gdrive/My\ Drive/ckpt_3models/ ckpt