In [1]:
import torch
from torch import nn
import numpy as np
from torch.utils.data import Dataset,DataLoader
import os
import sys

In [2]:
notebook_path = os.getcwd()
parent_dir = os.path.dirname(notebook_path)
sys.path.append(parent_dir)

In [3]:
import importnb
with __import__('importnb').Notebook(): 
    from utils.TransformerDataset import MyDataset
    from model import Transformer

## 各パラメータの指定

In [4]:
import json
with open('../data/config.json', 'r') as file:
    config = json.load(file)
    
max_len = config["max_len"]
src_vocab_size = config["src_vocab_size"]
tgt_vocab_size = config["tgt_vocab_size"]
batch_size = 16
num_head = 8
d_model = 512
d_ff =2048
N = 6
pad_idx = 0
dropout_rate=0.1
layer_norm_eps = 1e-5

epoch = 2

## Datasetの読み込み、DataLoaderへの変形

In [5]:
train_data = torch.load('../data/train_data.pth')
test_data = torch.load('../data/test_data.pth')

In [6]:
train_loader = DataLoader(train_data,batch_size=16,shuffle=True,drop_last=True)
test_loader = DataLoader(test_data,batch_size=16,shuffle=True,drop_last=True)

## 辞書の読み込み

In [7]:
import torchtext
vocab_en = torch.load('../data/vocab_en.pth')
vocab_ja = torch.load('../data/vocab_ja.pth')

### 一応データセット、辞書の確認　

In [8]:
tmp = iter(train_loader)
ja_text = 0
en_text = 0
for batch in tmp:
  ja_text,en_text = batch
  print("文章の量は{}".format(len(en_text)))
  print("en_textのshapeは{}".format(en_text.shape))
  print("最初のencoding文は{}".format(en_text[0][0:30]))
  print("辞書の最初の30文字は{}".format(vocab_en.lookup_tokens(range(30))))
  print("文の最大長さは{}".format(len(en_text[0])))
  print("------------------------------------------------")
  print("文章の量は{}".format(len(ja_text)))
  print("ja_textのshapeは{}".format(ja_text.shape))
  print("最初のencoding文は{}".format(ja_text[0][0:30]))
  print("辞書の最初の30文字は{}".format(vocab_ja.lookup_tokens(range(30))))
  print("文の最大長さは{}".format(len(ja_text[0])))
  print("------------------------------------------------")
  break

文章の量は16
en_textのshapeはtorch.Size([16, 159])
最初のencoding文はtensor([   3,  935, 4190,    4,  842,  351,   33,  178,  410,  141,  271,  855,
           4,  163,  180,    5,  935,    8,   58,   25, 1495,  280,    4,  881,
           8,    4, 1489,  501, 1763,    7])
辞書の最初の30文字は['<pad>', '<unk>', '<eos>', '<bos>', 'the', ',', 'of', '.', 'and', 'in', '(', ')', 'to', 'was', 'a', '"', 'is', 'as', "'s", 'that', 'by', 'kyoto', 'for', 'it', 'his', 'university', 'with', 'he', 'emperor', '-']
文の最大長さは159
------------------------------------------------
文章の量は16
ja_textのshapeはtorch.Size([16, 159])
最初のencoding文はtensor([1330,   31,   28,  275,  506,    6,  305,  660,  951,    9, 1875,   11,
        3619,   13,  691,   20,   31,  305,  638,  106,   62, 1645,  207, 1324,
           9,  837,    7,    0,    0,    0])
辞書の最初の30文字は['<pad>', '<unk>', '<eos>', '<bos>', 'の', '、', 'に', '。', 'は', 'を', 'る', 'た', 'て', 'と', 'し', '（', '）', 'が', 'い', '年', 'で', 'な', 'あ', 'っ', 'れ', '・', 'さ', 'り', '-', '京都']
文の最大長さは159
----

In [9]:
words = [vocab_en.lookup_token(index) for index in en_text[0].tolist() if vocab_en.lookup_token(index) != '<pad>']
sentence = ' '.join(words)
print(sentence)
print('-------------------------------------------')
words = [vocab_ja.lookup_token(index) for index in ja_text[0].tolist() if vocab_ja.lookup_token(index) != '<pad>']
sentence = ' '.join(words)
print(sentence)

<bos> harvard university-as the 350th anniversary from their foundation came around almost the same year , harvard and ryukoku university jointly held the commemoration and the japan-u.s. international symposium . <eos>
-------------------------------------------
ハーバード 大学 - 同 時期 に 創立 350 周年 を 迎え た 本学 と 共同 で 大学 創立 記念 会 や 日米 国際 シンポジウム を 開催 。


In [10]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cpu


## モデルの読み込み

In [11]:
model = Transformer(
    src_vocab_size=src_vocab_size,
    tgt_vocab_size=tgt_vocab_size,
    max_len=max_len,
    num_head=num_head,
    N=N,
    pad_idx=pad_idx,
    dropout_rate=dropout_rate,
    layer_norm_eps=layer_norm_eps,
    device = device
).to(device)

## 損失関数と最適化関数の定義

In [12]:
from torch import optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),
                       lr=1e-3,
                      betas=(0.9,0.98),
                      eps=1e-9)

In [13]:
class WarmupLinearSchedule(torch.optim.lr_scheduler._LRScheduler):
    def __init__(
        self,
        optimizer,
        warmup_steps,
        total_steps,
        last_epoch=-1
    )->None:
        self.warmup_steps = warmup_steps
        self.total_steps = total_steps
        super(WarmupLinearSchedule, self).__init__(optimizer, last_epoch)
    
    def get_lr(
        self
    )->list:
        step = self.last_epoch
        if step < self.warmup_steps:
            return [base_lr * float(step) / self.warmup_steps for base_lr in self.base_lrs]
        return [base_lr * max(0.0, float(self.total_steps - step) / (self.total_steps - self.warmup_steps)) for base_lr in self.base_lrs]

In [14]:
warmup_steps = 4000
total_steps = epoch * ((len(train_loader.dataset)//batch_size)+(1 if len(train_loader.dataset) % batch_size > 0 else 0))
scheduler = WarmupLinearSchedule(optimizer, warmup_steps,total_steps)

In [None]:
from tqdm import tqdm
train_loss_list = []
test_loss_list = []

for i in range(epoch):
    print('---------------------')
    print("Epoch:{}/{}".format(i+1,epoch))
    train_loss = 0
    test_loss = 0

    model.train()
    for ja_text,en_text in tqdm(train_loader):
        ja_text = ja_text.to(device)
        en_text = en_text.to(device)
        optimizer.zero_grad()

        y_pred_prob = model(
            src=ja_text,
            tgt=en_text
        ).view(-1,tgt_vocab_size)
        loss=criterion(
            y_pred_prob,
            en_text.view(-1)
        )
        loss.backward()

        optimizer.step()
        scheduler.step()

        train_loss_list.append(loss.item())
    epoch_train_loss = train_loss/len(train_loader)

    model.eval()
    with torch.no_grad():
        for ja_text,en_text in tqdm(test_loader):
            ja_text = ja_text.to(device)
            en_text = en_text.to(device)
            y_pred_prob = model(
                src=ja_text,
                tgt=en_text
            ).view(-1,tgt_vocab_size)
            loss = criterion(y_pred_prob,
                             en_text.view(-1)
                            )

            test_loss_list.append(loss.item())

---------------------
Epoch:1/2


  3%|▎         | 2/58 [00:32<15:11, 16.28s/it]

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(20, 6), sharey=True)
sns.set(style="darkgrid")
palette = sns.color_palette("muted")
# グラフ1（リスト 'a' 用）
sns.lineplot(ax=axes[0], x=np.arange(len(train_loss_list)), y=train_loss_list, palette=palette, marker='o', linewidth=2.5)
axes[0].set_title("Train_loss/steps", fontsize=16, fontweight='bold')
axes[0].set_xlabel("Step", fontsize=14)
axes[0].set_ylabel("Loss", fontsize=14)

sns.lineplot(ax=axes[1], x=np.arange(len(test_loss_list)), y=test_loss_list, palette=palette, marker='o', linewidth=2.5)
axes[1].set_title("Test_loss/steps", fontsize=16, fontweight='bold')
axes[1].set_xlabel("Step", fontsize=14)
axes[1].set_ylabel("Loss", fontsize=14)

# グラフの表示
plt.show()