In [1]:
!pip install mxnet-cu100
!pip install gluonnlp pandas tqdm



In [0]:
import pandas as pd
import numpy as np
from mxnet.gluon import nn, rnn
from mxnet import gluon, autograd
import gluonnlp as nlp
from mxnet import nd 
import mxnet as mx
import time
import itertools
import random

### 버트 로딩 

In [0]:
ctx = mx.gpu()

In [5]:
bert_base, vocabulary = nlp.model.get_model('bert_12_768_12',
                                             dataset_name='wiki_multilingual_cased',
                                             pretrained=True, ctx=ctx, use_pooler=True,
                                             use_decoder=False, use_classifier=False)
#print(bert_base)

Vocab file is not found. Downloading.
Downloading /root/.mxnet/models/1565856577.1304765wiki_multilingual_cased-0247cb44.zip from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/vocab/wiki_multilingual_cased-0247cb44.zip...
Downloading /root/.mxnet/models/bert_12_768_12_wiki_multilingual_cased-b0f57a20.zip from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/bert_12_768_12_wiki_multilingual_cased-b0f57a20.zip...


In [6]:
ds = gluon.data.SimpleDataset([['나 보기가 역겨워', '김소월']])

tok = nlp.data.BERTTokenizer(vocab=vocabulary, lower=False)

trans = nlp.data.BERTSentenceTransform(tok, max_seq_length=10)

list(ds.transform(trans))

[(array([    2,  8982,  9356, 47869,  9566,     3,  8935, 22333, 38851,
             3], dtype=int32),
  array(10, dtype=int32),
  array([0, 0, 0, 0, 0, 0, 1, 1, 1, 1], dtype=int32))]

In [8]:
!wget https://www.dropbox.com/s/374ftkec978br3d/ratings_train.txt?dl=1
!wget https://www.dropbox.com/s/977gbwh542gdy94/ratings_test.txt?dl=1

--2019-08-15 08:13:44--  https://www.dropbox.com/s/374ftkec978br3d/ratings_train.txt?dl=1
Resolving www.dropbox.com (www.dropbox.com)... 162.125.8.1, 2620:100:601b:1::a27d:801
Connecting to www.dropbox.com (www.dropbox.com)|162.125.8.1|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: /s/dl/374ftkec978br3d/ratings_train.txt [following]
--2019-08-15 08:13:44--  https://www.dropbox.com/s/dl/374ftkec978br3d/ratings_train.txt
Reusing existing connection to www.dropbox.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://ucef6f352d774589b42c518ef3f0.dl.dropboxusercontent.com/cd/0/get/Amq3zXJvW28FXK4jT8dPnncZI9ibrr6FYx7ZR_SOt-Z5Jt2lsXU9Y7bLGmO0LkPMnZ2eufdoF14xTEuRd-jV11A02AOXHYKmXj_MJPGzEGROAYWCe02sMg0a5Dnj0MkvXMo/file?dl=1# [following]
--2019-08-15 08:13:45--  https://ucef6f352d774589b42c518ef3f0.dl.dropboxusercontent.com/cd/0/get/Amq3zXJvW28FXK4jT8dPnncZI9ibrr6FYx7ZR_SOt-Z5Jt2lsXU9Y7bLGmO0LkPMnZ2eufdoF14xTEuRd-jV11A02AOXHYKmX

In [0]:
dataset_train = nlp.data.TSVDataset("ratings_train.txt?dl=1", field_indices=[1,2], num_discard_samples=1)
dataset_test = nlp.data.TSVDataset("ratings_test.txt?dl=1", field_indices=[1,2], num_discard_samples=1)

In [0]:
class BERTDataset(mx.gluon.data.Dataset):
    def __init__(self, dataset, sent_idx, label_idx, bert_tokenizer, max_len,
                 pad, pair):
        transform = nlp.data.BERTSentenceTransform(
            bert_tokenizer, max_seq_length=max_len, pad=pad, pair=pair)
        sent_dataset = gluon.data.SimpleDataset([[
            i[sent_idx],
        ] for i in dataset])
        self.sentences = sent_dataset.transform(transform)
        self.labels = gluon.data.SimpleDataset(
            [np.array(np.int32(i[label_idx])) for i in dataset])

    def __getitem__(self, i):
        return (self.sentences[i] + (self.labels[i], ))

    def __len__(self):
        return (len(self.labels))


In [0]:
bert_tokenizer = nlp.data.BERTTokenizer(vocabulary, lower=False)
max_len = 64

In [0]:
data_train = BERTDataset(dataset_train, 0, 1, bert_tokenizer, max_len, True, False)
data_test = BERTDataset(dataset_test, 0, 1, bert_tokenizer, max_len, True, False)

In [0]:
class BERTClassifier(nn.Block):
    def __init__(self,
                 bert,
                 num_classes=2,
                 dropout=None,
                 prefix=None,
                 params=None):
        super(BERTClassifier, self).__init__(prefix=prefix, params=params)
        self.bert = bert
        with self.name_scope():
            self.classifier = nn.HybridSequential(prefix=prefix)
            if dropout:
                self.classifier.add(nn.Dropout(rate=dropout))
            self.classifier.add(nn.Dense(units=num_classes))

    def forward(self, inputs, token_types, valid_length=None):
        _, pooler = self.bert(inputs, token_types, valid_length)
        return self.classifier(pooler)
                                           

In [0]:
model = BERTClassifier(bert_base, num_classes=2, dropout=0.3)
# 분류 레이어만 초기화 한다. 
model.classifier.initialize(ctx=ctx)
model.hybridize()

# softmax cross entropy loss for classification
loss_function = gluon.loss.SoftmaxCELoss()

metric = mx.metric.Accuracy()

In [0]:
batch_size = 16
lr = 5e-5

train_dataloader = mx.gluon.data.DataLoader(data_train, batch_size=batch_size, num_workers=5)
test_dataloader = mx.gluon.data.DataLoader(data_test, batch_size=int(batch_size/2), num_workers=5)

In [0]:
trainer = gluon.Trainer(model.collect_params(), 'bertadam',
                        {'learning_rate': lr, 'epsilon': 1e-9, 'wd':0.01})

log_interval = 4
num_epochs = 4

In [0]:
# LayerNorm과 Bias에는 Weight Decay를 적용하지 않는다. 
for _, v in model.collect_params('.*beta|.*gamma|.*bias').items():
    v.wd_mult = 0.0
params = [
    p for p in model.collect_params().values() if p.grad_req != 'null'
]


In [0]:
def evaluate_accuracy(model, data_iter, ctx=ctx):
    acc = mx.metric.Accuracy()
    i = 0
    for i, (t,v,s, label) in enumerate(data_iter):
        token_ids = t.as_in_context(ctx)
        valid_length = v.as_in_context(ctx)
        segment_ids = s.as_in_context(ctx)
        label = label.as_in_context(ctx)
        output = model(token_ids, segment_ids, valid_length.astype('float32'))
        acc.update(preds=output, labels=label)
        if i > 1000:
            break
        i += 1
    return(acc.get()[1])

In [0]:
#learning rate warmup을 위한 준비 
step_size = batch_size 
num_train_examples = len(data_train)
num_train_steps = int(num_train_examples / step_size * num_epochs)
warmup_ratio = 0.1
num_warmup_steps = int(num_train_steps * warmup_ratio)
step_num = 0

In [0]:
for epoch_id in range(num_epochs):
    metric.reset()
    step_loss = 0
    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(train_dataloader):
        step_num += 1
        if step_num < num_warmup_steps:
            new_lr = lr * step_num / num_warmup_steps
        else:
            offset = (step_num - num_warmup_steps) * lr / (
                num_train_steps - num_warmup_steps)
            new_lr = lr - offset
        trainer.set_learning_rate(new_lr)
        with mx.autograd.record():
            # load data to GPU
            token_ids = token_ids.as_in_context(ctx)
            valid_length = valid_length.as_in_context(ctx)
            segment_ids = segment_ids.as_in_context(ctx)
            label = label.as_in_context(ctx)

            # forward computation
            out = model(token_ids, segment_ids, valid_length.astype('float32'))
            ls = loss_function(out, label).mean()

        # backward computation
        ls.backward()
        trainer.allreduce_grads()
        nlp.utils.clip_grad_global_norm(params, 1)
        trainer.update(token_ids.shape[0])

        step_loss += ls.asscalar()
        metric.update([label], [out])
        if (batch_id + 1) % (50) == 0:
            print('[Epoch {} Batch {}/{}] loss={:.4f}, lr={:.10f}, acc={:.3f}'
                         .format(epoch_id + 1, batch_id + 1, len(train_dataloader),
                                 step_loss / log_interval,
                                 trainer.learning_rate, metric.get()[1]))
            step_loss = 0
    test_acc = evaluate_accuracy(model, test_dataloader, ctx)
    print('Test Acc : {}'.format(test_acc))

[Epoch 1 Batch 50/9375] loss=8.7709, lr=0.0000006667, acc=0.505
[Epoch 1 Batch 100/9375] loss=8.5914, lr=0.0000013333, acc=0.526
[Epoch 1 Batch 150/9375] loss=8.3027, lr=0.0000020000, acc=0.555
[Epoch 1 Batch 200/9375] loss=7.6643, lr=0.0000026667, acc=0.579
[Epoch 1 Batch 250/9375] loss=7.4951, lr=0.0000033333, acc=0.603
[Epoch 1 Batch 300/9375] loss=7.2966, lr=0.0000040000, acc=0.620
[Epoch 1 Batch 350/9375] loss=7.2736, lr=0.0000046667, acc=0.632
[Epoch 1 Batch 400/9375] loss=7.0332, lr=0.0000053333, acc=0.641
[Epoch 1 Batch 450/9375] loss=7.3415, lr=0.0000060000, acc=0.647
