In [1]:
# !wget https://huseinhouse-storage.s3-ap-southeast-1.amazonaws.com/bert-bahasa/electra-bahasa-base-27-04-2020.tar.gz
# !tar -zxf electra-bahasa-base-27-04-2020.tar.gz
# !wget https://raw.githubusercontent.com/huseinzol05/Malaya/master/pretrained-model/electra/BASE-config.json
# !wget https://raw.githubusercontent.com/huseinzol05/Malaya/master/pretrained-model/electra/BASE-config-generator.json

In [2]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = ''

In [3]:
import tensorflow as tf
from model import modeling
from model import optimization
from configure_pretraining import PretrainingConfig

In [4]:
bert_config = modeling.BertConfig.from_json_file('BASE-config-generator.json')
bert_config.__dict__

{'vocab_size': 32000,
 'hidden_size': 256,
 'num_hidden_layers': 12,
 'num_attention_heads': 4,
 'hidden_act': 'gelu',
 'intermediate_size': 1024,
 'hidden_dropout_prob': 0.1,
 'attention_probs_dropout_prob': 0.1,
 'max_position_embeddings': 512,
 'type_vocab_size': 2,
 'initializer_range': 0.02,
 'embedding_size': 768,
 'layer_norm_eps': 1e-12}

In [5]:
from model import tokenization

tokenizer = tokenization.FullTokenizer(vocab_file='out/bahasa.wordpiece')

In [6]:
def gather_positions(sequence, positions):
    shape = modeling.get_shape_list(sequence, expected_rank = [2, 3])
    depth_dimension = len(shape) == 3
    if depth_dimension:
        B, L, D = shape
    else:
        B, L = shape
        D = 1
        sequence = tf.expand_dims(sequence, -1)
    position_shift = tf.expand_dims(L * tf.range(B), -1)
    flat_positions = tf.reshape(positions + position_shift, [-1])
    flat_sequence = tf.reshape(sequence, [B * L, D])
    gathered = tf.gather(flat_sequence, flat_positions)
    if depth_dimension:
        return tf.reshape(gathered, [B, -1, D])
    else:
        return tf.reshape(gathered, [B, -1])

class Model:
    def __init__(
        self,
    ):
        self.X = tf.placeholder(tf.int32, [None, None])
        self.segment_ids = tf.placeholder(tf.int32, [None, None])
        self.input_masks = tf.placeholder(tf.int32, [None, None])
        self.Y = tf.placeholder(tf.int32, [None])
        
        model = modeling.BertModel(
            bert_config=bert_config,
            is_training=False,
            input_ids=self.X,
            input_mask=self.input_masks,
            token_type_ids=self.segment_ids,
            use_one_hot_embeddings=False,
            scope='generator',
            embedding_size = bert_config.embedding_size)
        
        self.logits = model.get_sequence_output()
        
        with tf.variable_scope("generator_predictions"):
            relevant_hidden = gather_positions(
                self.logits, self.input_masks)
            hidden = tf.layers.dense(
                relevant_hidden,
                units=modeling.get_shape_list(model.get_embedding_table())[-1],
                activation=modeling.get_activation(bert_config.hidden_act),
                kernel_initializer=modeling.create_initializer(
                    bert_config.initializer_range))
            hidden = modeling.layer_norm(hidden)
            output_bias = tf.get_variable(
            "output_bias",
            shape=[bert_config.vocab_size],
            initializer=tf.zeros_initializer())
            logits = tf.matmul(hidden, model.get_embedding_table(),
                               transpose_b=True)
            self._logits = tf.nn.bias_add(logits, output_bias)

In [7]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
model = Model()

Instructions for updating:
Use keras.layers.Dense instead.
Instructions for updating:
Please use `layer.__call__` method instead.


In [9]:
sess.run(tf.global_variables_initializer())
var_lists = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope = 'generator')
electra = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope = 'electra')
saver = tf.train.Saver(var_list = var_lists + electra)
saver.restore(sess, 'out/model.ckpt-728800')

INFO:tensorflow:Restoring parameters from out/model.ckpt-728800


In [16]:
attentions = [
    n.name
    for n in tf.get_default_graph().as_graph_def().node
    if 'Softmax' in n.name
]

In [17]:
attentions

['generator/encoder/layer_0/attention/self/Softmax',
 'generator/encoder/layer_1/attention/self/Softmax',
 'generator/encoder/layer_2/attention/self/Softmax',
 'generator/encoder/layer_3/attention/self/Softmax',
 'generator/encoder/layer_4/attention/self/Softmax',
 'generator/encoder/layer_5/attention/self/Softmax',
 'generator/encoder/layer_6/attention/self/Softmax',
 'generator/encoder/layer_7/attention/self/Softmax',
 'generator/encoder/layer_8/attention/self/Softmax',
 'generator/encoder/layer_9/attention/self/Softmax',
 'generator/encoder/layer_10/attention/self/Softmax',
 'generator/encoder/layer_11/attention/self/Softmax']

In [10]:
saver = tf.train.Saver(tf.trainable_variables())
saver.save(sess, 'electra-base/model.ckpt')

'electra-base/model.ckpt'

In [12]:
!cp BASE-config-generator.json electra-base/config.json
!cp out/bahasa.wordpiece electra-base/bahasa.wordpiece
!tar cvzf electra-base.tar.gz electra-base

electra-base/
electra-base/model.ckpt.index
electra-base/config.json
electra-base/model.ckpt.data-00000-of-00001
electra-base/bahasa.wordpiece
electra-base/checkpoint
electra-base/model.ckpt.meta


In [13]:
import boto3

bucketName = 'huseinhouse-storage'
Key = 'electra-base.tar.gz'
outPutname = "v34/pretrained-model/electra-base.tar.gz"

s3 = boto3.client('s3')
s3.upload_file(Key,bucketName,outPutname)