In this tutorial, we are going to fine-tune a pre-trained BERT model for a sentiment classification test. For fine-tuning, we use the KNBC corpus. 





In [1]:
from google.colab import drive
drive.mount('/content/drive/sentiment_bert')
%cd /content/drive/My Drive/

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive/
/content/drive/My Drive


#### Install necessary libraries

In [2]:
!pip install transformers
!pip install mecab-python3


Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/ee/fc/bd726a15ab2c66dc09306689d04da07a3770dad724f0883f0a4bfb745087/transformers-2.4.1-py3-none-any.whl (475kB)
[K     |████████████████████████████████| 481kB 3.4MB/s 
[?25hCollecting tokenizers==0.0.11
[?25l  Downloading https://files.pythonhosted.org/packages/5e/36/7af38d572c935f8e0462ec7b4f7a46d73a2b3b1a938f50a5e8132d5b2dc5/tokenizers-0.0.11-cp36-cp36m-manylinux1_x86_64.whl (3.1MB)
[K     |████████████████████████████████| 3.1MB 56.7MB/s 
Collecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/a6/b4/7a41d630547a4afd58143597d5a49e07bfd4c42914d8335b2a5657efc14b/sacremoses-0.0.38.tar.gz (860kB)
[K     |████████████████████████████████| 870kB 47.3MB/s 
[?25hCollecting sentencepiece
[?25l  Downloading https://files.pythonhosted.org/packages/74/f4/2d5214cbf13d06e7cb2c20d84115ca25b53ea76fa1f0ade0e3c9749de214/sentencepiece-0.1.85-cp36-cp36m-manylinux1_x86_64.whl (1.0MB)
[K  

In [3]:
import pandas as pd
import os
import glob
import transformers
import torch
import random
import numpy as np

In [0]:
# Set the seed value all over the place to make this reproducible.
SEED = 1234
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True


 ### Load dataset


In [5]:
import pandas as pd
# Load the dataset into a pandas dataframe.
df = pd.read_csv("sentiment_data/all.tsv", delimiter='\t', header=None, names=['domain', 'sentence', 'label'])

# Report the number of sentences.
print('Total number of sentences: {:,}\n'.format(df.shape[0]))

Total number of sentences: 680



In [6]:
# Display 10 random rows from the data.
df.sample(10).style.hide_index()

domain,sentence,label
gourmet,中華そばより高いが、アサリが入ったスープはボンゴレ風で、これが今までにない新しい感覚でうまい。,1
sports,しかし、３位のチームにまで出られるというのはいかがなものか？,0
kyoto,これ以上素敵な都市って、日本にもそうないんじゃないかしらん？？,1
kyoto,水面は朝日で輝いている。,1
kyoto,胎内巡りってゆうのがあって、それはお寺の地下を真っ暗な中を綱を頼りに出口を目指すものですが、本当の本当に真っ暗闇で、神聖なお寺の中やのに、大変スリリングな体験でした☆,1
gourmet,ほんとに京都は学生にやさしい町だなぁと思います！！,1
sports,これによって優勝決定戦により多くの注目が集まるからだ。,1
keitai,携帯は確かに便利なんですが、時々携帯圏外のところに行くと若干の不安を覚えると同時に、うれしくもなります。,1
kyoto,文学作品の舞台としてお馴染みなのも関係あるかもしれない。,1
keitai,大体『携帯を振って遊べるんです！！』と声高に言われてもあんなので遊んでる人いるんですかねぇ。,0


### Split dataset into train, val and test parts

In [7]:
from sklearn.model_selection import train_test_split
#get the sentences and their labels only
sentences = df.sentence.values
labels = df.label.values

# Use 70% for training, 15% for validation and 15% for test.
train_sents, validation_sents, train_labels, validation_labels = train_test_split(sentences, labels, 
                                                            random_state=2018, test_size=0.3)

test_sents, validation_sents, test_labels, validation_labels = train_test_split(validation_sents, validation_labels, 
                                                            random_state=2018, test_size=0.5)

print("Number of train sentences: ", len(train_sents))
print("Number of validation sentences: ", len(validation_sents))
print("Number of test sentences: ", len(test_sents))

Number of train sentences:  476
Number of validation sentences:  102
Number of test sentences:  102


### Convert dataset into BERT input format

In [8]:
from bert_data_processor_ja import BERTInputConverter
train_set = BERTInputConverter(train_sents, train_labels)
validation_set = BERTInputConverter(validation_sents, validation_labels)
test_set = BERTInputConverter(test_sents, test_labels)

print("Dataset converted to BERT input format!")

HBox(children=(IntProgress(value=0, description='Downloading', max=257706, style=ProgressStyle(description_wid…


Dataset converted to BERT input format!


### Convert dataset into pytorch format

In [9]:
from torch.utils.data import DataLoader
batch_size = 16

train_dataloader = DataLoader(train_set, batch_size=batch_size)
validation_dataloader = DataLoader(validation_set, batch_size=batch_size)
test_dataloader = DataLoader(test_set, batch_size=batch_size)

print("Dataset converted to pytorch format!")

Dataset converted to pytorch format!


### Build BERT classifer

In [10]:
from transformers import BertForSequenceClassification, AdamW, BertConfig
#check if gpu is available
if torch.cuda.is_available:
  device = torch.device("cuda")

  #print number and type of gpu available
  print("Number of GPUs available: %d" % torch.cuda.device_count())
  print("GPU type:", torch.cuda.get_device_name(0))
  print("")

else:
  device = torch.device("cpu")

# Load BertForSequenceClassification, the pretrained BERT model with a single linear classification layer on top.
model = BertForSequenceClassification.from_pretrained(
    "bert-base-japanese-whole-word-masking", #we use Japanese BERT model
    num_labels = 2, #number of labels
    output_attentions = False, # Whether the model returns attentions weights
    output_hidden_states = False, # Whether the model returns all hidden-states
)

# run the model on the GPU, if available, or CPU, if not.
model.to(device)

Number of GPUs available: 1
GPU type: Tesla T4



HBox(children=(IntProgress(value=0, description='Downloading', max=383, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='Downloading', max=445021143, style=ProgressStyle(description_…




BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(32000, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

#### Set up optimizer and learning rate scheduler

In [0]:
# set up optimizer
learning_rate = 2e-5
adam_eps = 1e-8
optimizer = AdamW(model.parameters(),
                  lr = learning_rate, 
                  eps = adam_eps 
                )

In [0]:
from transformers import get_linear_schedule_with_warmup

# Number of training epochs
epochs = 4

# Total number of training steps is number of batches * number of epochs.
total_steps = len(train_dataloader) * epochs

# compute warmup step
warmup_steps = 0

# Create the learning rate scheduler.
scheduler = get_linear_schedule_with_warmup(optimizer, 
                                            num_warmup_steps = warmup_steps, 
                                            num_training_steps = total_steps)

## Fine-Tuning BERT


In [13]:
import eval_utils

best_acc = 0
best_model = None

# For each epoch...
for epoch_i in range(0, epochs):
    print("")
    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
    print('Training...')

    # Reset the total loss
    total_loss = 0

    # Put the model into training mode
    model.train()

    # For each batch of training data...
    for step, batch in enumerate(train_dataloader):

        # Unpack training batch    
        b_input_ids = batch[0].to(device)  #[0]: input ids 
        b_input_mask = batch[1].to(device) #[1]: attention masks
        b_labels = batch[2].to(device)     #[2]: labels 

        # clear gradients
        model.zero_grad()        

        # evaluate the model on this training batch
        outputs = model(b_input_ids, 
                    token_type_ids=None, 
                    attention_mask=b_input_mask, 
                    labels=b_labels)
        
        # loss value
        loss = outputs[0]

        # Accumulate the training loss over all of the batches
        total_loss += loss.item()

        # backward pass 
        loss.backward()

        # Clip the norm  
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        # Update parameters 
        optimizer.step()

        # Update the learning rate.
        scheduler.step()

    # Calculate the average loss over the training data
    avg_train_loss = total_loss / len(train_dataloader)            

    print("")
    print("  Average training loss: {0:.2f}".format(avg_train_loss))
    
    #computes model accuracy on the validation set (for current epoch)
    print("")
    print("Running Validation...")
    val_acc = eval_utils.evaluate(model, validation_dataloader)
    
    #report accuracy
    print("  Accuracy: {0:.2f}".format(val_acc))

    #saves best model so far
    if val_acc > best_acc:
        best_acc = val_acc
        best_model = model

print("")
print("Training complete!")
print("Best accuracy on validation set: {0:.2f}".format(best_acc))


Training...

  Average training loss: 0.55

Running Validation...
  Accuracy: 0.80

Training...

  Average training loss: 0.27

Running Validation...
  Accuracy: 0.85

Training...

  Average training loss: 0.16

Running Validation...
  Accuracy: 0.85

Training...

  Average training loss: 0.10

Running Validation...
  Accuracy: 0.85

Training complete!
Best accuracy on validation set: 0.85


## Evaluation on test set

In [14]:
print("")
print("Running evaluation on test set...")
test_acc = eval_utils.evaluate(best_model, test_dataloader)
#report accuracy
print("  Accuracy: {0:.2f}".format(test_acc))



Running evaluation on test set...
  Accuracy: 0.83


####Test on a single sentence

In [15]:
sentence = "中華そばより高いが、アサリが入ったスープはボンゴレ風で、これが今までにない新しい感覚でうまい。"
predicted_label, probability = eval_utils.evaluate_single_sentence(best_model, sentence)

print("Predicted label:", predicted_label)
print("Probability: {0:.4f}".format(probability))

Predicted label: Positive
Probability: 0.9893
