In [1]:
import pandas as pd
train_df = pd.read_csv('train.txt', delimiter='\t')
valid_df = pd.read_csv('valid.txt', delimiter='\t')
test_df = pd.read_csv('test.txt', delimiter='\t')

In [2]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [3]:
import re
import time
from collections import  Counter
from pathlib import Path

import numpy as np
import gensim
import pickle
import scipy

from tqdm.auto import tqdm

import torch
import torch.nn as nn
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import TensorDataset, DataLoader, Dataset

from sklearn.metrics import accuracy_score

import matplotlib.pyplot as plt

In [4]:
# 第6章と同様に前処理
# ulrの削除
def remove_url(text):
    url_pat = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
    text = re.sub(url_pat, ' ', text)
    return text

# 小文字化
def lower_text(text):
    return text.lower()

# トークン化
def tokenize(text):
    tokens = ' '.join([str(token) for token in nlp.make_doc(text) if str(token) != ' '])
    return tokens

def preporcessing(text):
    text = lower_text(text)
    text = remove_url(text)
    tokens = tokenize(text)
    return tokens

In [5]:
train_title_tokenized = train_df['title'].map(preporcessing)
valid_title_tokenized = valid_df['title'].map(preporcessing)
test_title_tokenized = test_df['title'].map(preporcessing)

In [6]:
category_map = {'b': 0, 't': 1, 'e':2, 'm': 3}
train_df['category'] = train_df['category'].map(category_map)
valid_df['category'] = valid_df['category'].map(category_map)
test_df['category'] = test_df['category'].map(category_map)

# ch 80

In [7]:
# train内で2回以上出現した単語を選択
counter = Counter()
for text in train_title_tokenized:
    counter.update(text.split())

vocab = {'<unk>': 0, '<pad>': 1}
vocab_id = 2
for word, freq in counter.items():
    if freq >= 2:
        vocab[word] = vocab_id
        vocab_id += 1

In [8]:
len(vocab)

6496

In [9]:
it2token = {0:'<unk>', 1: '<pad>'}
it2token.update({v:k for k, v in vocab.items()})

In [10]:
def encode(text):
    return [vocab[token] if vocab.get(token) else 0 for token in text.split()]

In [151]:
x_train = list(map(encode, train_title_tokenized))
y_train = train_df['category'].values

x_valid = list(map(encode, valid_title_tokenized))
y_valid = valid_df['category'].values

x_test = list(map(encode, test_title_tokenized))
y_test = test_df['category'].values

In [12]:
print(x_train[10])
print(train_title_tokenized[10])
print(' '.join([it2token[token] for token in x_train[10]]))

[93, 37, 94, 7, 95, 0, 96, 45, 97, 98]
southern and turner to buy macho springs from first solar
southern and turner to buy <unk> springs from first solar


# 81 - 85
RNNによる予測
- RNNの出力を確認
- SGDによる学習
- ミニバッチ化, GPU上での学習
- 単語ベクトルの導入
- 多層化


In [13]:
class TextDataset(Dataset):
    def __init__(self, xs, ys=None):
        self.xs = xs
        self.ys = ys

    def __len__(self):
        return len(self.xs)

    def __getitem__(self, index):
        if self.ys is None:
            return self.xs[index]
        return self.xs[index], self.ys[index]

In [14]:
def collate_fn(batch):
    def _pad_sequences(seqs):
        lens = [len(seq) for seq in seqs]
        max_len = max(lens)

        # 最初にPADDINGの配列用意
        # 1がPAD TOKEN
        padded_seqs = torch.ones(len(seqs), max_len).long()
        for i, seq in enumerate(seqs):
            start = max_len - lens[i]
            padded_seqs[i, :lens[i]] = torch.LongTensor(seq)
        return padded_seqs
    
    if isinstance(batch[0], tuple):
        transposed = list(zip(*batch))
        data = transposed[0]
        data = _pad_sequences(data)
        return data, torch.LongTensor(transposed[1])

    else:
        data = _pad_sequences(batch)
        return data

In [15]:
trn_ds = TextDataset(x_train, y_train)
trn_dl = DataLoader(trn_ds, batch_size=128, collate_fn=collate_fn)

In [16]:
xs, ys = next(iter(trn_dl))

In [17]:
xs

tensor([[  2,   3,   4,  ...,   1,   1,   1],
        [ 14,   7,  15,  ...,   1,   1,   1],
        [ 25,  26,  27,  ...,   1,   1,   1],
        ...,
        [803, 351, 255,  ...,   1,   1,   1],
        [ 15, 806, 807,  ...,   1,   1,   1],
        [813, 501, 390,  ...,   1,   1,   1]])

In [18]:
ys

tensor([2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 1, 1, 0, 2, 2, 1, 2, 2,
        2, 2, 0, 3, 2, 3, 3, 2, 0, 2, 2, 1, 2, 2, 2, 2, 2, 2, 0, 3, 2, 2, 3, 0,
        1, 2, 2, 1, 3, 0, 0, 2, 3, 2, 0, 1, 2, 2, 0, 0, 1, 0, 3, 3, 2, 0, 3, 2,
        2, 2, 2, 2, 2, 0, 2, 0, 1, 2, 0, 1, 2, 0, 2, 2, 2, 2, 1, 1, 2, 3, 0, 2,
        0, 0, 2, 0, 2, 1, 2, 2, 1, 3, 2, 2, 0, 0, 0, 0, 2, 1, 2, 2, 2, 2, 2, 1,
        2, 0, 2, 2, 3, 1, 2, 0])

In [19]:
y_train[:128]

array([2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 1, 1, 0, 2, 2, 1,
       2, 2, 2, 2, 0, 3, 2, 3, 3, 2, 0, 2, 2, 1, 2, 2, 2, 2, 2, 2, 0, 3,
       2, 2, 3, 0, 1, 2, 2, 1, 3, 0, 0, 2, 3, 2, 0, 1, 2, 2, 0, 0, 1, 0,
       3, 3, 2, 0, 3, 2, 2, 2, 2, 2, 2, 0, 2, 0, 1, 2, 0, 1, 2, 0, 2, 2,
       2, 2, 1, 1, 2, 3, 0, 2, 0, 0, 2, 0, 2, 1, 2, 2, 1, 3, 2, 2, 0, 0,
       0, 0, 2, 1, 2, 2, 2, 2, 2, 1, 2, 0, 2, 2, 3, 1, 2, 0])

In [20]:
class RnnNet(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, embed=None, arch='rnn', num_layers=1, bi_dir=False):
        super().__init__()

        self.emb = nn.Embedding(vocab_size, embed_dim)

        if embed is None:
            self.emb.weight.data.uniform_(-0.1, 0.1)
        else:
            self.emb.weight = nn.Parameter(torch.tensor(embed, dtype=torch.float32))
            self.emb.weight.requires_grad = False

        if arch == 'rnn':
            self.rnn = nn.RNN(embed_dim, hidden_dim, batch_first=True, num_layers=num_layers, bidirectional=bi_dir)
        elif arch == 'lstm':
            self.rnn = nn.LSTM(embed_dim, hidden_dim, batch_first=True, num_layers=num_layers, bidirectional=bi_dir)

        self.fc = nn.Linear(hidden_dim if not bi_dir else hidden_dim * 2, 4)

    def forward(self, x):
        out = self.emb(x)
        out, _  = self.rnn(out)
        out = self.fc(out[:, -1, :])
        return out

In [21]:
model = RnnNet(vocab_size=len(vocab), embed_dim=300, hidden_dim=50)

In [22]:
with torch.no_grad():
    out = model(xs)

In [23]:
out[:5]

tensor([[ 0.1273, -0.1423, -0.1674, -0.1708],
        [ 0.1246, -0.1398, -0.1633, -0.1696],
        [ 0.1278, -0.1415, -0.1681, -0.1708],
        [ 0.1311, -0.1386, -0.1639, -0.1726],
        [ 0.1236, -0.1395, -0.1711, -0.1683]])

In [24]:
def to_cpu(x):
    return x.contiguous().detach().cpu()

def to_numpy(x):
    return to_cpu(x).numpy()

In [25]:
class Trainer:
    def __init__(self, model, optimizer, trn_dl, val_dl, test_dl, num_epoch, device):
        self.trn_dl = trn_dl
        self.val_dl = val_dl
        self.test_dl = test_dl

        self.model = model
        self.num_epoch = num_epoch

        self.device = device

        self.optimizer = optimizer
        self.criterion = nn.CrossEntropyLoss()

    def batch_step(self):
        for xs, ys in self.trn_dl:
            xs, ys = xs.to(self.device), ys.to(self.device)
            
            y_hat = self.model(xs)
            loss = self.criterion(y_hat, ys)

            loss.backward()
            self.optimizer.step()
            self.optimizer.zero_grad()

    def evaluation(self, dl):
        acc_scores = []
        losses = []
        self.model.eval()
        with torch.no_grad():
            for xs, ys in dl:
                xs, ys = xs.to(self.device), ys.to(self.device)
                y_hat = self.model(xs)

                loss = self.criterion(y_hat, ys).item()

                ys = to_numpy(ys)
                y_hat = np.argmax(to_numpy(y_hat), axis=-1)

                acc = accuracy_score(ys, y_hat)

                acc_scores.append(acc)
                losses.append(loss)

        return np.mean(acc_scores), np.mean(losses)

    def train(self):
        self.model.zero_grad()

        acc_epcohs = []
        losses_epochs = []
        epoch_time = []

        with tqdm(range(self.num_epoch)) as pbar:
            for epoch in pbar:
                self.model.train()
                self.batch_step()
                # 評価
                acc_score = {}
                losses = {}

                acc_epoch, loss_epoch = self.evaluation(self.trn_dl)
                acc_score['train_acc'] = acc_epoch
                losses['train_loss'] = loss_epoch

                acc_epoch, loss_epoch = self.evaluation(self.val_dl)
                acc_score['valid_acc'] = acc_epoch
                losses['valid_loss'] = loss_epoch

                acc_epoch, loss_epoch = self.evaluation(self.test_dl)
                acc_score['test_acc'] = acc_epoch
                losses['test_loss'] = loss_epoch


                acc_epcohs.append(acc_score)
                losses_epochs.append(losses)
                pbar.set_postfix(acc_score)

        return acc_epcohs, losses_epochs

In [26]:
#　確率的勾配
trn_ds = TextDataset(x_train, y_train)
trn_dl = DataLoader(trn_ds, batch_size=1, collate_fn=collate_fn, shuffle=True)

val_ds = TextDataset(x_valid, y_valid)
val_dl = DataLoader(val_ds, batch_size=128, collate_fn=collate_fn, shuffle=False)

test_ds = TextDataset(x_test, y_test)
test_dl = DataLoader(test_ds, batch_size=128, collate_fn=collate_fn, shuffle=False)

model = RnnNet(vocab_size=len(vocab), embed_dim=300, hidden_dim=50)
optimizer = torch.optim.SGD(model.parameters(), lr=3e-1)

In [27]:
trainer = Trainer(model, optimizer, trn_dl, val_dl, test_dl, 5, device='cpu')

In [28]:
acc_epcohs, losses_epochs = trainer.train()

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))




In [29]:
#　ミニバッチ＋GPU
trn_ds = TextDataset(x_train, y_train)
trn_dl = DataLoader(trn_ds, batch_size=64, collate_fn=collate_fn, shuffle=True)

val_ds = TextDataset(x_valid, y_valid)
val_dl = DataLoader(val_ds, batch_size=128, collate_fn=collate_fn, shuffle=False)


test_ds = TextDataset(x_test, y_test)
test_dl = DataLoader(test_ds, batch_size=128, collate_fn=collate_fn, shuffle=False)

model = RnnNet(vocab_size=len(vocab), embed_dim=300, hidden_dim=50)
model.to('cuda')

# SGDだと学習が進まないので以降Adamに変更する
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [30]:
trainer = Trainer(model, optimizer, trn_dl, val_dl, test_dl, 15, device='cuda')

In [31]:
acc_epcohs, losses_epochs = trainer.train()

HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))




単語ベクトルの導入

In [32]:
# donwload word2vec
import gdown
url = 'https://drive.google.com/u/0/uc?export=download&confirm=xJKk&id=0B7XkCwpI5KDYNlNUTTlSS21pQmM'
output = 'GoogleNews-vectors-negative300.bin.gz'
gdown.download(url, output, quiet=False)
!gzip -d '/content/GoogleNews-vectors-negative300.bin.gz'

Downloading...
From: https://drive.google.com/u/0/uc?export=download&confirm=xJKk&id=0B7XkCwpI5KDYNlNUTTlSS21pQmM
To: /content/GoogleNews-vectors-negative300.bin.gz
1.65GB [00:23, 70.7MB/s]


In [33]:
model = gensim.models.KeyedVectors.load_word2vec_format('/content/GoogleNews-vectors-negative300.bin', binary=True)

  'See the migration notes for details: %s' % _MIGRATION_NOTES_URL


In [34]:
# 平均と分散を算出して初期化する
embedding_matrix = np.random.normal(np.mean(model.vectors), np.std(model.vectors), (len(vocab), 300))

In [35]:
for word, index in vocab.items():
    if word in model.vocab:
        embedding_matrix[index] = model.get_vector(word)

In [36]:
trn_ds = TextDataset(x_train, y_train)
trn_dl = DataLoader(trn_ds, batch_size=64, collate_fn=collate_fn, shuffle=True)

val_ds = TextDataset(x_valid, y_valid)
val_dl = DataLoader(val_ds, batch_size=128, collate_fn=collate_fn, shuffle=False)


test_ds = TextDataset(x_test, y_test)
test_dl = DataLoader(test_ds, batch_size=128, collate_fn=collate_fn, shuffle=False)

model = RnnNet(vocab_size=len(vocab), embed_dim=300, hidden_dim=50, embed=embedding_matrix)
model.to('cuda')

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [37]:
trainer = Trainer(model, optimizer, trn_dl, val_dl, test_dl, 15, device='cuda')

In [38]:
acc_epcohs, losses_epochs = trainer.train()

HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))




In [39]:
# 多層化
#　ミニバッチ＋GPU+bi_dir+multilayer
trn_ds = TextDataset(x_train, y_train)
trn_dl = DataLoader(trn_ds, batch_size=128, collate_fn=collate_fn, shuffle=True)

val_ds = TextDataset(x_valid, y_valid)
val_dl = DataLoader(val_ds, batch_size=128, collate_fn=collate_fn, shuffle=False)


test_ds = TextDataset(x_test, y_test)
test_dl = DataLoader(test_ds, batch_size=128, collate_fn=collate_fn, shuffle=False)

model = RnnNet(vocab_size=len(vocab), embed_dim=300, hidden_dim=50, embed=embedding_matrix, num_layers=2, bi_dir=True)
model.to('cuda')

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [40]:
trainer = Trainer(model, optimizer, trn_dl, val_dl, test_dl, 15, device='cuda')

In [41]:
acc_epcohs, losses_epochs = trainer.train()

HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))




# 86-87
- CNNによる分類

In [42]:
class CnnNet(nn.Module):
    def __init__(self, embed, hidden_dim=50):
        super().__init__()

        self.emb = nn.Embedding(*embed.shape)
        self.emb.weight = nn.Parameter(torch.tensor(embed, dtype=torch.float32))
        self.emb.weight.requires_grad = False

        self.conv = nn.Conv2d(
            in_channels = 1,
            out_channels = hidden_dim,
            kernel_size = (3, embed.shape[1]),
            padding = (1, 0)
        )

        self.fc = nn.Linear(hidden_dim, 4)

    def forward(self, x):
        out = self.emb(x)
        out = out.unsqueeze(1)
        out = self.conv(out)
        
        #[batch_size, hidden_dim, seq_len]
        out = out.squeeze()

        max_pool, _ = torch.max(out, -1)
        out = self.fc(max_pool)
        return out

In [43]:
model = CnnNet(embedding_matrix)

In [44]:
with torch.no_grad():
    out = model(xs)

In [45]:
out[:3, :]

tensor([[ 0.0577,  0.0576,  0.1822, -0.2564],
        [-0.0293,  0.0647,  0.1331, -0.1519],
        [-0.0022,  0.0747,  0.0779, -0.2070]])

学習

In [157]:
trn_ds = TextDataset(x_train, y_train)
trn_dl = DataLoader(trn_ds, batch_size=128, collate_fn=collate_fn, shuffle=True)

val_ds = TextDataset(x_valid, y_valid)
val_dl = DataLoader(val_ds, batch_size=128, collate_fn=collate_fn, shuffle=False)


test_ds = TextDataset(x_test, y_test)
test_dl = DataLoader(test_ds, batch_size=128, collate_fn=collate_fn, shuffle=False)

model = CnnNet(embedding_matrix)
model.to('cuda')

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [158]:
trainer = Trainer(model, optimizer, trn_dl, val_dl, test_dl, 10, device='cuda')

In [159]:
acc_epcohs, losses_epochs = trainer.train()

HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




In [160]:
pd.DataFrame(acc_epcohs)

Unnamed: 0,train_acc,valid_acc,test_acc
0,0.778156,0.76735,0.775211
1,0.872589,0.875644,0.852639
2,0.895236,0.906084,0.868264
3,0.911778,0.910696,0.873147
4,0.927949,0.913626,0.874123
5,0.93696,0.916826,0.877053
6,0.94476,0.918509,0.885842
7,0.956561,0.919486,0.887795
8,0.966097,0.919486,0.884865
9,0.973246,0.917532,0.881936


In [161]:
pd.DataFrame(losses_epochs)

Unnamed: 0,train_loss,valid_loss,test_loss
0,0.605197,0.621732,0.629327
1,0.407857,0.42472,0.461269
2,0.321714,0.349428,0.389409
3,0.272928,0.315754,0.36747
4,0.23569,0.303662,0.353428
5,0.205368,0.294526,0.34019
6,0.177283,0.278743,0.329126
7,0.154625,0.275986,0.326148
8,0.135545,0.275547,0.324773
9,0.117621,0.271838,0.326719


# 88 ハイパーパラメーターのチューニング
- アーキテクチャの変更

In [152]:
#　ミニバッチ＋GPU+LSTM
trn_ds = TextDataset(x_train, y_train)
trn_dl = DataLoader(trn_ds, batch_size=128, collate_fn=collate_fn, shuffle=True)

val_ds = TextDataset(x_valid, y_valid)
val_dl = DataLoader(val_ds, batch_size=128, collate_fn=collate_fn, shuffle=False)

test_ds = TextDataset(x_test, y_test)
test_dl = DataLoader(test_ds, batch_size=128, collate_fn=collate_fn, shuffle=False)

model = RnnNet(vocab_size=len(vocab), embed_dim=300, hidden_dim=50, embed=embedding_matrix, num_layers=2, bi_dir=True, arch='lstm')
model.to('cuda')

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [153]:
trainer = Trainer(model, optimizer, trn_dl, val_dl, test_dl, 15, device='cuda')

In [154]:
acc_epcohs, losses_epochs = trainer.train()

HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))




In [155]:
pd.DataFrame(acc_epcohs)

Unnamed: 0,train_acc,valid_acc,test_acc
0,0.703575,0.720475,0.713847
1,0.761739,0.757148,0.781728
2,0.771388,0.771962,0.759427
3,0.783972,0.777115,0.78683
4,0.828487,0.818837,0.809072
5,0.851134,0.838805,0.834084
6,0.871133,0.862242,0.847975
7,0.88211,0.864466,0.857362
8,0.876291,0.847324,0.857143
9,0.898402,0.890729,0.868483


In [156]:
pd.DataFrame(losses_epochs)

Unnamed: 0,train_loss,valid_loss,test_loss
0,0.905948,0.886408,0.890796
1,0.605668,0.610758,0.595089
2,0.624611,0.591338,0.634463
3,0.543932,0.568563,0.56285
4,0.480004,0.500629,0.519602
5,0.442483,0.47239,0.495153
6,0.389233,0.414854,0.456212
7,0.358939,0.409241,0.43776
8,0.359755,0.420029,0.441109
9,0.317784,0.373283,0.439657


#89 BERTによる分類

In [52]:
!pip install -q transformers

[K     |████████████████████████████████| 1.3MB 2.8MB/s 
[K     |████████████████████████████████| 2.9MB 18.7MB/s 
[K     |████████████████████████████████| 1.1MB 39.9MB/s 
[K     |████████████████████████████████| 890kB 41.8MB/s 
[?25h  Building wheel for sacremoses (setup.py) ... [?25l[?25hdone


In [53]:
from transformers import AutoConfig, AutoModel, AutoTokenizer

In [63]:
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased', use_fast=True)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=466062.0, style=ProgressStyle(descripti…




In [120]:
def encode_text(tokenizer, texts):
    encoded = tokenizer.batch_encode_plus(
        texts,
        return_token_type_ids=False,
        max_length=120,
        padding=True,
        truncation=True,
    )

    # {'input_ids': [[A], [B], [C]],}
    # {'attention_mask': [[1], [2], [3]]}
    # list(zip(input_ids, attention_mask')) => [(A, 1), (B, 2), (C, 3)]
    # np.array => (train_size, type, seq_len)

    return np.array(
        list(zip(encoded.input_ids, encoded.attention_mask))
    )

In [121]:
class BertTextDataset(Dataset):
    def __init__(self, xy, ys, tokenizer):
        self.xs = xs
        self.ys = ys
    
    def __len__(self):
        return len(self.xs)

    def __getitem__(self, index):
        return xs[index], ys[index]

In [126]:
x_train = encode_text(tokenizer, train_df['title'].values.tolist())
x_valid = encode_text(tokenizer, valid_df['title'].values.tolist())
x_test = encode_text(tokenizer, test_df['title'].values.tolist())

In [140]:
class BertModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.transformer = AutoModel.from_pretrained('bert-base-uncased')
        self.fc = nn.Linear(768, 4)

    def forward(self, x):
        input_ids = x[:, 0, :]
        attention_mask = x[:, 1, :]

        out, _ = self.transformer(input_ids, attention_mask)
        out = self.fc(out[:, 0, :])
        return out

In [145]:
trn_ds = TextDataset(x_train, y_train)
trn_dl = DataLoader(trn_ds, batch_size=32, shuffle=True)

val_ds = TextDataset(x_valid, y_valid)
val_dl = DataLoader(val_ds, batch_size=32, shuffle=False)

test_ds = TextDataset(x_test, y_test)
test_dl = DataLoader(test_ds, batch_size=32, shuffle=False)

model = BertModel()
model.to('cuda')

optimizer = torch.optim.Adam(model.parameters(), lr=3e-5)

In [146]:
trainer = Trainer(model, optimizer, trn_dl, val_dl, test_dl, 3, device='cuda')

In [147]:
acc_epcohs, losses_epochs = trainer.train()

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))




In [148]:
pd.DataFrame(acc_epcohs)

Unnamed: 0,train_acc,valid_acc,test_acc
0,0.95723,0.929028,0.898162
1,0.985152,0.949861,0.917953
2,0.99285,0.943611,0.924081


In [149]:
pd.DataFrame(losses_epochs)

Unnamed: 0,train_loss,valid_loss,test_loss
0,0.144934,0.221538,0.276624
1,0.053327,0.183858,0.234453
2,0.025281,0.200559,0.241853
