In [1]:
import os

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '../t5/prepare/mesolitica-tpu.json'
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

from google.cloud import storage
client = storage.Client()
bucket = client.bucket('mesolitica-tpu-general')

In [2]:
os.system('mkdir out-large')

blob = bucket.blob('albert-large/model.ckpt-475000.data-00000-of-00001')
blob.download_to_filename('out-large/model.ckpt-475000.data-00000-of-00001')

blob = bucket.blob('albert-large/model.ckpt-475000.index')
blob.download_to_filename('out-large/model.ckpt-475000.index')

blob = bucket.blob('albert-large/model.ckpt-475000.meta')
blob.download_to_filename('out-large/model.ckpt-475000.meta')

In [3]:
from albert import modeling
from albert import optimization
from albert import tokenization
import tensorflow as tf
import numpy as np




In [4]:
tokenizer = tokenization.FullTokenizer(
      vocab_file='sp10m.cased.v10.vocab', do_lower_case=False,
      spm_model_file='sp10m.cased.v10.model')


INFO:tensorflow:loading sentence piece model


In [5]:
tokenizer.tokenize('Husein comel')

['▁Hu', 'se', 'in', '▁comel']

In [6]:
albert_config = modeling.AlbertConfig.from_json_file('LARGE_config.json')
albert_config




<albert.modeling.AlbertConfig at 0x7f9209fddc18>

In [7]:
def gather_indexes(sequence_tensor, positions):
    """Gathers the vectors at the specific positions over a minibatch."""
    sequence_shape = modeling.get_shape_list(sequence_tensor, expected_rank=3)
    batch_size = sequence_shape[0]
    seq_length = sequence_shape[1]
    width = sequence_shape[2]

    flat_offsets = tf.reshape(
      tf.range(0, batch_size, dtype=tf.int32) * seq_length, [-1, 1])
    flat_positions = tf.reshape(positions + flat_offsets, [-1])
    flat_sequence_tensor = tf.reshape(sequence_tensor,
                                    [batch_size * seq_length, width])
    output_tensor = tf.gather(flat_sequence_tensor, flat_positions)
    return output_tensor

class Model:
    def __init__(
        self,
    ):
        self.X = tf.placeholder(tf.int32, [None, None])
        self.segment_ids = tf.placeholder(tf.int32, [None, None])
        self.input_masks = tf.placeholder(tf.int32, [None, None])
        
        model = modeling.AlbertModel(
            config=albert_config,
            is_training=False,
            input_ids=self.X,
            input_mask=self.input_masks,
            token_type_ids=self.segment_ids,
            use_one_hot_embeddings=False)
        
        input_tensor = model.get_sequence_output()
        output_weights = model.get_embedding_table()
        
        with tf.variable_scope("cls/predictions"):
            with tf.variable_scope("transform"):
                input_tensor = tf.layers.dense(
                              input_tensor,
                              units=albert_config.embedding_size,
                              activation=modeling.get_activation(albert_config.hidden_act),
                              kernel_initializer=modeling.create_initializer(
                                  albert_config.initializer_range))
                input_tensor = modeling.layer_norm(input_tensor)
            
            output_bias = tf.get_variable(
                "output_bias",
                shape=[albert_config.vocab_size],
                initializer=tf.zeros_initializer())
            logits = tf.matmul(input_tensor, output_weights, transpose_b=True)
            logits = tf.nn.bias_add(logits, output_bias)
            log_probs = tf.nn.log_softmax(logits, axis=-1)

In [8]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
model = Model()

sess.run(tf.global_variables_initializer())





Instructions for updating:
Use keras.layers.Dense instead.
Instructions for updating:
Please use `layer.__call__` method instead.


In [9]:
var_lists = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope = 'bert')
cls = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope = 'cls')
saver = tf.train.Saver(var_list = var_lists + cls)
saver.restore(sess, 'out-large/model.ckpt-475000')

INFO:tensorflow:Restoring parameters from out-large/model.ckpt-475000


In [10]:
saver = tf.train.Saver(tf.trainable_variables())
saver.save(sess, 'albert-large/model.ckpt')

'albert-large/model.ckpt'

In [11]:
import os

out = 'albert-large-bahasa-standard-cased'
os.makedirs(out, exist_ok=True)

In [12]:
from transformers import AlbertTokenizer, AlbertModel, AlbertConfig, AutoTokenizer, AutoModelWithLMHead, pipeline

In [13]:
tokenizer = AlbertTokenizer('sp10m.cased.v10.model', do_lower_case = False)
tokenizer.save_pretrained(out)

('albert-large-bahasa-standard-cased/spiece.model',
 'albert-large-bahasa-standard-cased/special_tokens_map.json',
 'albert-large-bahasa-standard-cased/added_tokens.json')

In [14]:
import torch
import logging
from transformers import AlbertConfig, AlbertForMaskedLM, load_tf_weights_in_albert


logging.basicConfig(level=logging.INFO)


def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, albert_config_file, pytorch_dump_path):
    # Initialise PyTorch model
    config = AlbertConfig.from_json_file(albert_config_file)
    print("Building PyTorch model from configuration: {}".format(str(config)))
    model = AlbertForMaskedLM(config)

    # Load weights from tf checkpoint
    load_tf_weights_in_albert(model, config, tf_checkpoint_path)

    # Save pytorch-model
    print("Save PyTorch model to {}".format(pytorch_dump_path))
    torch.save(model.state_dict(), pytorch_dump_path)

In [15]:
convert_tf_checkpoint_to_pytorch('albert-large/model.ckpt', 
                                 'LARGE_config.json', 
                                 f'{out}/pytorch_model.bin')

Building PyTorch model from configuration: AlbertConfig {
  "attention_probs_dropout_prob": 0,
  "bos_token_id": 2,
  "classifier_dropout_prob": 0.1,
  "down_scale_factor": 1,
  "embedding_size": 128,
  "eos_token_id": 3,
  "gap_size": 0,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0,
  "hidden_size": 1024,
  "initializer_range": 0.02,
  "inner_group_num": 1,
  "intermediate_size": 4096,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "albert",
  "net_structure_type": 0,
  "num_attention_heads": 16,
  "num_hidden_groups": 1,
  "num_hidden_layers": 24,
  "num_memory_blocks": 0,
  "pad_token_id": 0,
  "type_vocab_size": 2,
  "vocab_size": 32000
}



INFO:transformers.modeling_albert:Converting TensorFlow checkpoint from /home/husein/albert/albert-large/model.ckpt
INFO:transformers.modeling_albert:Loading TF weight bert/embeddings/LayerNorm/beta with shape [128]
INFO:transformers.modeling_albert:Loading TF weight bert/embeddings/LayerNorm/gamma with shape [128]
INFO:transformers.modeling_albert:Loading TF weight bert/embeddings/position_embeddings with shape [512, 128]
INFO:transformers.modeling_albert:Loading TF weight bert/embeddings/token_type_embeddings with shape [2, 128]
INFO:transformers.modeling_albert:Loading TF weight bert/embeddings/word_embeddings with shape [32000, 128]
INFO:transformers.modeling_albert:Loading TF weight bert/encoder/embedding_hidden_mapping_in/bias with shape [1024]
INFO:transformers.modeling_albert:Loading TF weight bert/encoder/embedding_hidden_mapping_in/kernel with shape [128, 1024]
INFO:transformers.modeling_albert:Loading TF weight bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta wi

bert/embeddings/LayerNorm/beta
bert/embeddings/LayerNorm/gamma
bert/embeddings/position_embeddings
bert/embeddings/token_type_embeddings
bert/embeddings/word_embeddings
bert/encoder/embedding_hidden_mapping_in/bias
bert/encoder/embedding_hidden_mapping_in/kernel
bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta
bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma
bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta
bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma
bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias
bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel
bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias
bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel
bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias
bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel
bert/encoder/transformer/gr

In [16]:
!rm -rf albert-large

In [17]:
!cp sp10m.cased.v10.* out-large
!cp LARGE_config.json out-large/config.json
!tar cvzf albert-large-475k-19-10-2020.tar.gz out-large

out-large/
out-large/sp10m.cased.v10.model
out-large/sp10m.cased.v10.vocab
out-large/model.ckpt-475000.index
out-large/model.ckpt-475000.meta
out-large/config.json
out-large/model.ckpt-475000.data-00000-of-00001


In [18]:
tokenizer = AlbertTokenizer.from_pretrained(f'./{out}', do_lower_case = False)

INFO:transformers.tokenization_utils_base:Model name './albert-large-bahasa-standard-cased' not found in model shortcut name list (albert-base-v1, albert-large-v1, albert-xlarge-v1, albert-xxlarge-v1, albert-base-v2, albert-large-v2, albert-xlarge-v2, albert-xxlarge-v2). Assuming './albert-large-bahasa-standard-cased' is a path, a model identifier, or url to a directory containing tokenizer files.
INFO:transformers.tokenization_utils_base:Didn't find file ./albert-large-bahasa-standard-cased/added_tokens.json. We won't load it.
INFO:transformers.tokenization_utils_base:Didn't find file ./albert-large-bahasa-standard-cased/tokenizer.json. We won't load it.
INFO:transformers.tokenization_utils_base:loading file ./albert-large-bahasa-standard-cased/spiece.model
INFO:transformers.tokenization_utils_base:loading file None
INFO:transformers.tokenization_utils_base:loading file ./albert-large-bahasa-standard-cased/special_tokens_map.json
INFO:transformers.tokenization_utils_base:loading file 

In [19]:
config = AlbertConfig('LARGE_config.json')
config.vocab_size = 32000
config.intermediate_size = 4096
config.hidden_size = 1024
config.num_attention_heads = 16
config.num_hidden_groups = 1
config.num_hidden_layers = 24

In [20]:
model = AutoModelWithLMHead.from_pretrained(f'./{out}/pytorch_model.bin', config = config)

INFO:transformers.modeling_utils:loading weights file ./albert-large-bahasa-standard-cased/pytorch_model.bin
INFO:transformers.modeling_utils:All model checkpoint weights were used when initializing AlbertForMaskedLM.

INFO:transformers.modeling_utils:All the weights of AlbertForMaskedLM were initialized from the model checkpoint at ./albert-large-bahasa-standard-cased/pytorch_model.bin.
If your task is similar to the task the model of the ckeckpoint was trained on, you can already use AlbertForMaskedLM for predictions without further training.


In [21]:
fill_mask = pipeline('fill-mask', model=model, tokenizer=tokenizer)

In [22]:
fill_mask('tolonglah gov buat something, kami dah [MASK]')

[{'sequence': '[CLS] tolonglah gov buat something, kami dah join[SEP]',
  'score': 0.05382946506142616,
  'token': 1801,
  'token_str': '▁join'},
 {'sequence': '[CLS] tolonglah gov buat something, kami dah.[SEP]',
  'score': 0.019622132182121277,
  'token': 5,
  'token_str': '.'},
 {'sequence': '[CLS] tolonglah gov buat something, kami dah tahu[SEP]',
  'score': 0.017230793833732605,
  'token': 178,
  'token_str': '▁tahu'},
 {'sequence': '[CLS] tolonglah gov buat something, kami dah tau[SEP]',
  'score': 0.016450127586722374,
  'token': 157,
  'token_str': '▁tau'},
 {'sequence': '[CLS] tolonglah gov buat something, kami dah,[SEP]',
  'score': 0.013463296927511692,
  'token': 19,
  'token_str': ','}]

In [23]:
model.save_pretrained(out)

INFO:transformers.configuration_utils:Configuration saved in albert-large-bahasa-standard-cased/config.json
INFO:transformers.modeling_utils:Model weights saved in albert-large-bahasa-standard-cased/pytorch_model.bin


In [37]:
# !transformers-cli upload ./albert-large-bahasa-standard-cased