## GPU prepare
1. 使用可能GPUの確認
2. GPUの指定
3. PyTorchで利用できるGPU数の確認

In [4]:
# 使用可能GPUの確認
!nvidia-smi

Mon Aug  1 06:28:02 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.57.02    Driver Version: 470.57.02    CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA RTX A6000    On   | 00000000:01:00.0 Off |                  Off |
| 33%   61C    P2   158W / 300W |   1445MiB / 48685MiB |    100%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA RTX A6000    On   | 00000000:25:00.0 Off |                  Off |
| 30%   27C    P8    14W / 300W |      5MiB / 48685MiB |      0%      Default |
|       

In [5]:
# GPUの指定
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1' #0番を使用するとき

In [6]:
# 確認
import torch
print(torch.cuda.device_count()) #Pytorchで使用できるGPU数を取得

1


## prepare
1. ~~語彙数の取得~~
2. ~~学習データの用意(ラベル)~~
3. ~~学習データの用意(特徴量)~~
4. 乱数の種を固定
5. 学習データの用意(テキスト)
6. 学習データの用意(ラベル)

In [7]:
# 訓練・検証・評価データの用意
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F

# 乱数シードの固定
import random

def fix_seed(seed):
    # random
    random.seed(seed)
    # Numpy
    np.random.seed(seed)
    # Pytorch
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

In [9]:
# データの読み込み
def read_text(fname):
    '''
    input :fname
    output:list(text)
    '''
    with open(fname, encoding='utf-8') as f:
        lines = f.readlines()
    
    titles = [line[2:] for line in lines]

    return titles

# タイトルの保存
X_train_text = read_text('../data/ch06/train.txt')
X_valid_text = read_text('../data/ch06/valid.txt')
X_test_text = read_text('../data/ch06/test.txt')

In [10]:
# ラベル: ch08の出力を利用
Y_train = np.loadtxt('../data/ch08/Y_train.txt')
Y_valid = np.loadtxt('../data/ch08/Y_valid.txt')
Y_test = np.loadtxt('../data/ch08/Y_test.txt')

# pytorch用に変換
Y_train_long = torch.tensor(Y_train, dtype=torch.int64)
Y_valid_long = torch.tensor(Y_valid, dtype=torch.int64)
Y_test_long = torch.tensor(Y_test, dtype=torch.int64)

## 89. 事前学習済み言語モデルからの転移学習
1. BERTの前処理/データ準備
2. 学習済みモデルのロード
3. train, validの定義
4. fine-tuning

In [21]:
# BERT Tokenizerを用いた単語分割・ID変換
from transformers import BertTokenizer
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# 最大単語数の確認
X_all_text = X_train_text + X_valid_text + X_test_text
max_lens = []
for X_text in X_all_text:
    token_words = tokenizer.tokenize(X_text)
    max_lens.append(len(token_words))
maximam_len = max(max_lens) + 2

# 単語分割・ID変換, special tokenの追加, 文章の長さの固定, Attention mask arrayの作成
def make_dataset(X, Y):
    input_ids = []
    attention_masks = []
    for X_text in X:
        encoded_dict = tokenizer.encode_plus(
                X_text,
                add_special_tokens = True,      # special tokenの追加
                max_length = maximam_len,       # 文章の長さの固定
                padding = 'max_length',         # PADIING: new_style
                truncation = True,              # truncation: new_style
                return_attention_mask = True,   # Attention maskの作成
                return_tensors = 'pt'           # pytorch tensorsで返却
            )
            
        # 単語IDを取得
        input_ids.append(encoded_dict['input_ids'])

        # attention maskの取得
        attention_masks.append(encoded_dict['attention_mask'])

    # リストに入ったtensorを縦方向へ結合(dim=0)
    input_ids = torch.cat(input_ids, dim=0)
    attention_masks = torch.cat(attention_masks, dim=0)

    return TensorDataset(input_ids, attention_masks, Y)

train_dataset = make_dataset(X_train_text, Y_train_long)
valid_dataset = make_dataset(X_valid_text, Y_valid_long)
test_dataset = make_dataset(X_test_text, Y_test_long)

In [22]:
# 学習済みモデルのロード
from transformers import BertModel

class BERTClass(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.fc = torch.nn.Linear(768, 4)
    
    def forward(self, ids, mask):
        out = self.bert(ids, attention_mask=mask)
        out = self.fc(out[1])
        return out

In [23]:
import time

# 損失, 正解率の計算
def calculate_loss_accuracy(model, criterion, loader, device, batch_size):
    model.eval()
    loss = 0
    correct_num = 0
    total_num = 0
    with torch.no_grad():
        for data in loader:
            ids = data[0].to(device)
            mask = data[1].to(device)
            labels = data[2].to(device)

            # 順伝播
            outputs = model(ids, mask)
            # 損失計算
            loss += criterion(outputs, labels).item()

            # 正解率計算
            predict_labels = torch.max(outputs, 1)[1]
            for i in range(batch_size):
                if predict_labels[i] == labels[i]:
                    correct_num += 1
            total_num += len(labels)
    
    return loss / len(loader), correct_num / total_num

# trainの定義
def train(model, train_dataset, valid_dataset, batch_size, criterion, optimizer, num_epochs, device):
    # dataloaderの作成
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
    valid_dataloader = DataLoader(valid_dataset, batch_size=len(valid_dataset), shuffle=True, drop_last=True)
    
    train_accs = []
    valid_accs = []
    train_losses = []
    valid_losses = []
    for epoch in range(num_epochs):
        # 開始時間の記録
        start_time = time.time()

        # 訓練モードで実行
        model.train()
        for data in train_dataloader:
            ids = data[0].to(device)
            mask = data[1].to(device)
            labels = data[2].to(device)
            optimizer.zero_grad()
            # forward
            outputs = model(ids, mask)
            loss = criterion(outputs, labels)
            # backward
            loss.backward()
            # 勾配クリッピング
            torch.nn.utils.clip_grad_norm_(model.parameters(),1.0)
            optimizer.step()
        
        # 損失, 正解率の計算
        train_loss, train_acc = calculate_loss_accuracy(model, criterion, train_dataloader, device, batch_size)
        valid_loss, valid_acc = calculate_loss_accuracy(model, criterion, valid_dataloader, device, len(valid_dataset))
        train_accs.append(train_acc)
        valid_accs.append(valid_acc)
        train_losses.append(train_loss)
        valid_losses.append(valid_loss)

        # 終了時刻の記録
        end_time = time.time()

        print(f'epoch: {epoch}, train_loss: {train_loss:.4}, train_accuracy: {train_acc:.4f}, valid_loss: {valid_loss:.4f}, valid_accuracy: {valid_acc:.4f}, {(end_time-start_time):.4f}sec')

    return {'train_loss': train_losses, 'train_acc': train_accs, 'valid_loss': valid_losses, 'valid_acc': valid_accs}

In [24]:
# fine-tuning
# ハイパーパラメータ
batch_size = 32
num_epochs = 4
lr = 2e-5

# モデルの定義
fix_seed(42)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = BERTClass().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(params=model.parameters(), lr=lr)

# モデルの学習
log = train(model, train_dataset, valid_dataset, batch_size, criterion, optimizer, num_epochs, device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


epoch: 0, train_loss: 0.1385, train_accuracy: 0.9604, valid_loss: 0.2236, valid_accuracy: 0.9259, 22.0368sec
epoch: 1, train_loss: 0.06551, train_accuracy: 0.9805, valid_loss: 0.2031, valid_accuracy: 0.9364, 21.2869sec
epoch: 2, train_loss: 0.04653, train_accuracy: 0.9866, valid_loss: 0.2741, valid_accuracy: 0.9274, 21.2034sec
epoch: 3, train_loss: 0.02636, train_accuracy: 0.9932, valid_loss: 0.2733, valid_accuracy: 0.9394, 21.1590sec
