# 第8章: ニューラルネット

[https://nlp100.github.io/ja/ch08.html](https://nlp100.github.io/ja/ch08.html)

第6章で取り組んだニュース記事のカテゴリ分類を題材として，ニューラルネットワークでカテゴリ分類モデルを実装する．なお，この章ではPyTorch, TensorFlow, Chainerなどの機械学習プラットフォームを活用せよ．

## 70. 単語ベクトルの和による特徴量

In [1]:
import gensim
import numpy as np
import pandas as pd
import spacy
import torch
import tqdm

# global variables
dataset_types = ['train', 'valid', 'test']
label2int = {
    "b": 0,
    "t": 1,
    "e": 2,
    "m": 3
}
Xs, ys = {}, {}
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print('Using {} device'.format(device.type))


def makeDatasetFiles():
    nlp = spacy.load("en_core_web_sm")

    # Word2Vec
    w2v = gensim.models.KeyedVectors.load_word2vec_format(
        '../Chapter7/GoogleNews-vectors-negative300.bin', 
        binary=True)

    for dataset in dataset_types:
        tmp_x, tmp_y = [], []
        tmp_df = pd.read_table('../Chapter6/{:}.txt'.format(dataset))

        for each in tmp_df.itertuples():

            # make X
            tokens = [token for token in nlp(each.TITLE)]
            num_tokens = len(tokens)

            x_i = np.zeros(300)
            for token in tokens:
                try:
                    token_embedding = w2v[str(token)]
                    x_i = np.add(x_i, token_embedding)

                except KeyError:
                    num_tokens -= 1
                    continue

            x_i = np.divide(x_i, num_tokens)
            tmp_x.append(x_i)

            # make y
            tmp_y.append(label2int[each.CATEGORY])
        
        # convert to torch.Tensor
        Xs[dataset] = torch.Tensor([tmp_x]).float()
        ys[dataset] = torch.Tensor([tmp_y]).long()

        # pickle
        torch.save(Xs[dataset], 'X_{:}.pickle'.format(dataset))
        torch.save(ys[dataset], 'y_{:}.pickle'.format(dataset))
    
    return Xs, ys

Using cpu device


## 71. 単層ニューラルネットワークによる予測

In [2]:
# Data load
try:
    for dataset in dataset_types:
        Xs[dataset] = torch.load(
            'X_{:}.pickle'.format(dataset),
            map_location=device)
        ys[dataset] = torch.load(
            'y_{:}.pickle'.format(dataset),
            map_location=device)
except FileNotFoundError:
    Xs, ys = makeDatasetFiles()
    assert Xs != {} and ys != {}

In [3]:
import torch.nn as nn
import torch.nn.functional as F

# Define network
torch.manual_seed(46)
single_layer_network = nn.Sequential(
    nn.Linear(300, 4)
)

hat_Y, x_i, logits, probs = [], [], [], []
for i in range(4):
    x_i.append(torch.index_select(Xs['train'], dim=1, index=torch.tensor([i])))
    logits.append(single_layer_network(x_i[-1]))
    probs.append(F.softmax(logits[-1], dim=2))
    hat_Y.append(probs[-1].detach())

print('hat_y_1 = {:}'.format(hat_Y[0]))
print('hat_Y = {:}'.format(hat_Y))

hat_y_1 = tensor([[[0.2525, 0.2487, 0.2648, 0.2341]]])
hat_Y = [tensor([[[0.2525, 0.2487, 0.2648, 0.2341]]]), tensor([[[0.2490, 0.2528, 0.2500, 0.2482]]]), tensor([[[0.2359, 0.2576, 0.2604, 0.2460]]]), tensor([[[0.2497, 0.2551, 0.2544, 0.2408]]])]


## 72. 損失と勾配の計算

In [4]:
targets, losses = [], []
for i in range(4):
    targets.append(int(torch.index_select(ys['train'], dim=1, index=torch.tensor([i]))))
    losses.append(-torch.log(torch.index_select(probs[i], dim=2, index=torch.tensor([targets[-1]]))))

print('=== Losses ===========')
print('x_1 loss = {:}'.format(losses[0]))
print('average loss = {:}'.format(torch.mean(torch.Tensor(losses))))

print('=== Grads ===========')
for loss_ in losses:
    loss_.backward()
    print(single_layer_network[0].weight.grad)

x_1 loss = tensor([[[1.3289]]], grad_fn=<NegBackward>)
average loss = 1.3831130266189575
tensor([[ 0.0302,  0.0053, -0.0058,  ..., -0.0229, -0.0055, -0.0250],
        [ 0.0298,  0.0053, -0.0057,  ..., -0.0225, -0.0055, -0.0246],
        [-0.0881, -0.0156,  0.0168,  ...,  0.0666,  0.0162,  0.0727],
        [ 0.0280,  0.0050, -0.0054,  ..., -0.0212, -0.0051, -0.0231]])
tensor([[-0.0532,  0.0070,  0.0091,  ..., -0.0758, -0.0147,  0.0010],
        [ 0.0579,  0.0047, -0.0107,  ..., -0.0047, -0.0024, -0.0333],
        [-0.0603, -0.0161,  0.0119,  ...,  0.0842,  0.0192,  0.0640],
        [ 0.0556,  0.0044, -0.0103,  ..., -0.0037, -0.0021, -0.0317]])
tensor([[-0.0126, -0.0036,  0.0129,  ..., -0.1097, -0.0855, -0.0159],
        [ 0.0442,  0.0083, -0.0120,  ...,  0.0067,  0.0215, -0.0276],
        [-0.0741, -0.0125,  0.0106,  ...,  0.0957,  0.0433,  0.0698],
        [ 0.0425,  0.0078, -0.0115,  ...,  0.0072,  0.0207, -0.0263]])
tensor([[-0.0080, -0.0007,  0.0022,  ..., -0.1139, -0.0879, -0.0106]

## 73. 確率的勾配降下法による学習

In [5]:
from torch.optim import SGD

cross_entropy_loss = nn.CrossEntropyLoss()
optimizer = SGD(single_layer_network.parameters(), lr=0.1)
X, y = Xs['train'], ys['train']

for epoch in tqdm.tqdm(range(100)):
    optimizer.zero_grad()
    logits = single_layer_network(X)
    loss = cross_entropy_loss(logits.squeeze(), y.squeeze())
    loss.backward()
    optimizer.step()

100%|██████████| 100/100 [00:00<00:00, 336.01it/s]
