# 第9章: RNN, CNN

## 80. ID番号への変換
問題51で構築した学習データ中の単語にユニークなID番号を付与したい．  
学習データ中で最も頻出する単語に1，2番目に頻出する単語に2，……といった方法で，学習データ中で2回以上出現する単語にID番号を付与せよ．  
そして，与えられた単語列に対して，ID番号の列を返す関数を実装せよ．  
ただし，出現頻度が2回未満の単語のID番号はすべて0とせよ．

In [34]:
import pandas as pd
import os
import joblib
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer

In [35]:
data_dir = '../ch06'

In [36]:
X_train = pd.read_table(os.path.join(data_dir,'train.txt'), header=None)
X_test = pd.read_table(os.path.join(data_dir,'test.txt'), header=None)
X_valid = pd.read_table(os.path.join(data_dir,'valid.txt'), header=None)

cols = ['TITLE', 'CATEGORY']
for data in [X_train, X_test, X_valid]:
    data.columns = cols

X_train['LABEL'] = 'train'
X_test['LABEL'] = 'test'
X_valid['LABEL'] = 'valid'

concat_df = pd.concat([X_train, X_test, X_valid]).reset_index(drop=True)
print(concat_df.shape)
CV = CountVectorizer(stop_words="english")
feature = CV.fit_transform(concat_df['TITLE'])

(13340, 3)


In [38]:
frequency = np.asarray(feature.toarray().sum(axis=0))

In [56]:
vocabulary_freq = {w:f for w,f in zip(CV.vocabulary_, frequency)}
vocabulary_freq = sorted(vocabulary_freq.items(), key=lambda x: -x[-1])
word2id = {vf[0]:i if vf[1] > 2 else 0  for i,vf in enumerate(vocabulary_freq)}

In [58]:
word2id

{'cubist': 0,
 'shape': 1,
 'ipcc': 2,
 'dolorosa': 3,
 'chases': 4,
 'peacekeeping': 5,
 'nuri': 6,
 'wither': 7,
 'buyouts': 8,
 '16k': 9,
 'korean': 10,
 'starbucks': 11,
 'prompt': 12,
 'spot': 13,
 'added': 14,
 'clinton': 15,
 'raunchy': 16,
 'relative': 17,
 '14th': 18,
 'seasons': 19,
 'subsidiary': 20,
 'preacher': 21,
 'xl': 22,
 'cipriani': 23,
 'sow': 24,
 'shire': 25,
 'comeback': 26,
 'detective': 27,
 'kreme': 28,
 'scheming': 29,
 'thirds': 30,
 'cfdas': 31,
 'doctrine': 32,
 'strategies': 33,
 'partnering': 34,
 'pilgrim': 35,
 'virgin': 36,
 'abating': 37,
 'exemption': 38,
 'join': 39,
 'depresses': 40,
 'barring': 41,
 'vacation': 42,
 'floppy': 43,
 'arnett': 44,
 'canon': 45,
 'channel': 46,
 'applauds': 47,
 'true': 48,
 'pot': 49,
 'biodelivery': 50,
 'engineering': 51,
 'flooding': 52,
 'joaquin': 53,
 'mix': 54,
 'corset': 55,
 'exploring': 56,
 '325': 57,
 'alps': 58,
 'swarovski': 59,
 'eurobond': 60,
 'illustrates': 61,
 'encouraging': 62,
 'turnaround': 63

## 81. RNNによる予測

In [62]:
%%bash
pip install torchtext

Collecting torchtext
  Downloading https://files.pythonhosted.org/packages/f2/17/e7c588245aece7aa93f360894179374830daf60d7ed0bbb59332de3b3b61/torchtext-0.6.0-py3-none-any.whl (64kB)
Collecting tqdm (from torchtext)
  Downloading https://files.pythonhosted.org/packages/4a/1c/6359be64e8301b84160f6f6f7936bbfaaa5e9a4eab6cbc681db07600b949/tqdm-4.45.0-py2.py3-none-any.whl (60kB)
Collecting sentencepiece (from torchtext)
  Downloading https://files.pythonhosted.org/packages/70/59/19d287e3ddb00fa494422acb0ddab9964733e8c1b74fa20e7632a7825510/sentencepiece-0.1.86-cp36-cp36m-macosx_10_6_x86_64.whl (1.1MB)
Installing collected packages: tqdm, sentencepiece, torchtext
Successfully installed sentencepiece-0.1.86 torchtext-0.6.0 tqdm-4.45.0


You are using pip version 18.1, however version 20.1b1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.


In [63]:
from torchtext import data
import torch
from torch import nn

In [66]:
class RNN(nn.Module):
    def __init__(
        self,
        num_embeddings,
        embedding_dim=50,
        hidden_size=50,
        output_size=1,
        num_layers=1,
        dropout=0.2
                ):
        super().__init__()
        
        self.emb = nn.Embedding(num_embeddings, embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(embedding_dim, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.linear = nn.Linear(hidden_size, output_size)
        
    def forward(self, x, h0=None):
        x = self.emb(x)
        x, h = self.lstm(x, h0)
        x = x[:, -1, :]
        x = self.linear(x)
        return x

In [79]:
TEXT = data.Field(sequential=True, lower=True, batch_first=True)
LABEL = data.Field(sequential=True, lower=True, batch_first=True)

In [80]:
train, val, test = data.TabularDataset.splits(
    path='../ch06', train='train.txt', validation='valid.txt', test='test.txt',format='tsv',
    fields=[('TEXT', TEXT), ('LABEL', LABEL)]
)

In [81]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu:0')
train_iter, val_iter, test_iter = data.Iterator.splits(
    (train, val, test), batch_sizes=(64, 64, 64), device=device, repeat=False, sort=False
)

In [84]:
TEXT.build_vocab(train, min_freq=2)
LABEL.build_vocab(train)
model = RNN(len(TEXT.vocab.stoi)+1, num_layers=2, output_size=4)

In [85]:
for epoch in range(1):
    model.train()
    for batch in train_iter:
        x, y = batch.TEXT, batch.LABEL
        y_pred = model(x)
        print(y_pred)
        print(y_pred.shape)

tensor([[0.2020, 0.1594, 0.0248, 0.0930],
        [0.2182, 0.1560, 0.0042, 0.0897],
        [0.1925, 0.1512, 0.0328, 0.1192],
        [0.2145, 0.1528, 0.0370, 0.0929],
        [0.2251, 0.1659, 0.0031, 0.0978],
        [0.2052, 0.1390, 0.0452, 0.0965],
        [0.1920, 0.1557, 0.0349, 0.0976],
        [0.2037, 0.1460, 0.0189, 0.0955],
        [0.1967, 0.1464, 0.0343, 0.1066],
        [0.1974, 0.1504, 0.0169, 0.1006],
        [0.1815, 0.1646, 0.0387, 0.1036],
        [0.1752, 0.1345, 0.0375, 0.0918],
        [0.2207, 0.1404, 0.0569, 0.1046],
        [0.1786, 0.1263, 0.0417, 0.1097],
        [0.1989, 0.1586, 0.0343, 0.0943],
        [0.1894, 0.1487, 0.0419, 0.0734],
        [0.1860, 0.1570, 0.0155, 0.0931],
        [0.2203, 0.1440, 0.0197, 0.0969],
        [0.2068, 0.1540, 0.0235, 0.0931],
        [0.1945, 0.1479, 0.0537, 0.0986],
        [0.2101, 0.1387, 0.0538, 0.1071],
        [0.2135, 0.1465, 0.0245, 0.1061],
        [0.1083, 0.1590, 0.0510, 0.0794],
        [0.2184, 0.1515, 0.0294, 0

tensor([[ 0.2026,  0.1462,  0.0028,  0.1104],
        [ 0.2227,  0.1380,  0.0488,  0.1015],
        [ 0.1821,  0.1694,  0.0158,  0.0604],
        [ 0.2058,  0.1564,  0.0274,  0.0998],
        [ 0.1949,  0.1491,  0.0352,  0.0980],
        [ 0.1890,  0.1622,  0.0293,  0.0835],
        [ 0.2106,  0.1504,  0.0349,  0.0948],
        [ 0.2103,  0.1634,  0.0314,  0.1008],
        [ 0.1090,  0.1563,  0.0085,  0.0550],
        [ 0.2155,  0.1689,  0.0142,  0.0928],
        [ 0.1958,  0.1504,  0.0380,  0.0926],
        [ 0.1962,  0.1432,  0.0338,  0.0558],
        [ 0.2140,  0.1448, -0.0015,  0.0798],
        [ 0.1903,  0.1468,  0.0313,  0.1041],
        [ 0.2071,  0.1488,  0.0336,  0.0930],
        [ 0.2063,  0.1477,  0.0117,  0.0958],
        [ 0.1384,  0.1286, -0.0087,  0.0755],
        [ 0.1645,  0.1411,  0.0139,  0.0744],
        [ 0.2075,  0.1568,  0.0393,  0.0935],
        [ 0.2178,  0.1444,  0.0024,  0.1107],
        [ 0.2235,  0.1400,  0.0344,  0.0837],
        [ 0.2011,  0.1549,  0.0113

tensor([[ 0.2181,  0.1313,  0.0525,  0.1081],
        [ 0.1769,  0.1541,  0.0179,  0.0650],
        [ 0.1807,  0.1371,  0.0428,  0.0966],
        [ 0.2043,  0.1636,  0.0358,  0.1061],
        [ 0.2189,  0.1491,  0.0153,  0.0984],
        [ 0.2006,  0.1541,  0.0088,  0.0679],
        [ 0.2011,  0.1420,  0.0512,  0.1014],
        [ 0.2003,  0.1664,  0.0058,  0.0895],
        [ 0.2337,  0.1511,  0.0334,  0.0982],
        [ 0.2131,  0.1723,  0.0363,  0.0874],
        [ 0.1966,  0.1533,  0.0179,  0.0917],
        [ 0.1295,  0.1493,  0.0107,  0.0617],
        [ 0.2101,  0.1431,  0.0445,  0.1217],
        [ 0.1911,  0.1420,  0.0392,  0.0979],
        [ 0.2037,  0.1446,  0.0295,  0.1031],
        [ 0.2274,  0.1382,  0.0401,  0.1056],
        [ 0.1763,  0.1580,  0.0159,  0.0710],
        [ 0.2105,  0.1596, -0.0075,  0.0955],
        [ 0.1477,  0.1504,  0.0041,  0.0978],
        [ 0.1811,  0.1452,  0.0571,  0.1036],
        [ 0.1635,  0.1529,  0.0041,  0.0582],
        [ 0.2221,  0.1397,  0.0354

tensor([[ 0.2176,  0.1502,  0.0332,  0.1026],
        [ 0.1992,  0.1463,  0.0210,  0.1004],
        [ 0.2037,  0.1320, -0.0180,  0.0797],
        [ 0.1823,  0.1431,  0.0438,  0.0849],
        [ 0.2001,  0.1611,  0.0306,  0.0831],
        [ 0.1796,  0.1383,  0.0201,  0.0869],
        [ 0.1979,  0.1481,  0.0183,  0.1210],
        [ 0.1829,  0.1432,  0.0104,  0.0939],
        [ 0.1940,  0.1456,  0.0391,  0.1006],
        [ 0.1540,  0.1394,  0.0085,  0.0727],
        [ 0.2161,  0.1628,  0.0223,  0.1142],
        [ 0.2092,  0.1406,  0.0246,  0.0985],
        [ 0.2134,  0.1514,  0.0147,  0.1180],
        [ 0.1969,  0.1656,  0.0313,  0.0915],
        [ 0.2138,  0.1483,  0.0195,  0.0889],
        [ 0.2045,  0.1736,  0.0096,  0.0913],
        [ 0.1849,  0.1260,  0.0342,  0.0862],
        [ 0.1961,  0.1413,  0.0070,  0.0970],
        [ 0.1964,  0.1329,  0.0243,  0.1090],
        [ 0.2291,  0.1436,  0.0119,  0.1033],
        [ 0.1984,  0.1342,  0.0101,  0.0996],
        [ 0.2089,  0.1439,  0.0388

tensor([[ 2.0783e-01,  1.5669e-01,  1.6049e-02,  9.1164e-02],
        [ 1.9680e-01,  1.5745e-01,  1.7646e-02,  8.6417e-02],
        [ 2.1088e-01,  1.4721e-01,  3.3451e-02,  8.4623e-02],
        [ 2.0646e-01,  1.6154e-01,  2.6803e-02,  1.0465e-01],
        [ 2.0603e-01,  1.5443e-01,  2.5008e-02,  1.0554e-01],
        [ 2.1276e-01,  1.6165e-01,  3.5328e-02,  1.0572e-01],
        [ 1.8135e-01,  1.4925e-01,  4.5117e-02,  1.0211e-01],
        [ 1.9401e-01,  1.5113e-01,  1.3574e-02,  9.3106e-02],
        [ 1.9847e-01,  1.5642e-01,  1.3122e-02,  7.9074e-02],
        [ 2.1496e-01,  1.4416e-01,  1.6513e-02,  9.5520e-02],
        [ 1.7801e-01,  1.4433e-01,  1.2311e-02,  1.0948e-01],
        [ 1.9059e-01,  1.5244e-01,  4.7388e-02,  1.0562e-01],
        [ 2.1606e-01,  1.5852e-01,  2.2042e-02,  7.8082e-02],
        [ 1.6607e-01,  1.3521e-01,  3.1028e-04,  6.5960e-02],
        [ 1.9936e-01,  1.3768e-01,  2.6832e-02,  1.0218e-01],
        [ 2.0294e-01,  1.4487e-01,  2.6685e-02,  1.0655e-01],
        

tensor([[ 0.2170,  0.1456,  0.0307,  0.0936],
        [ 0.1943,  0.1572,  0.0271,  0.1011],
        [ 0.1981,  0.1445,  0.0197,  0.0917],
        [ 0.2129,  0.1490,  0.0204,  0.1056],
        [ 0.1917,  0.1593,  0.0314,  0.0944],
        [ 0.2195,  0.1495,  0.0305,  0.1056],
        [ 0.2132,  0.1390,  0.0295,  0.0987],
        [ 0.2126,  0.1647,  0.0428,  0.1017],
        [ 0.2033,  0.1463,  0.0170,  0.0926],
        [ 0.1752,  0.1498,  0.0382,  0.0822],
        [ 0.1600,  0.1477,  0.0236,  0.0718],
        [ 0.2121,  0.1424,  0.0263,  0.1056],
        [ 0.2035,  0.1469,  0.0186,  0.0980],
        [ 0.1763,  0.1365,  0.0275,  0.1062],
        [ 0.2080,  0.1604,  0.0366,  0.0937],
        [ 0.1807,  0.1633,  0.0136,  0.1086],
        [ 0.2154,  0.1476,  0.0138,  0.1005],
        [ 0.1775,  0.1561,  0.0203,  0.0832],
        [ 0.1476,  0.1397,  0.0238,  0.0932],
        [ 0.2091,  0.1516,  0.0270,  0.1092],
        [ 0.2020,  0.1509,  0.0160,  0.1126],
        [ 0.1974,  0.1511,  0.0277

tensor([[ 0.2083,  0.1373,  0.0392,  0.1057],
        [ 0.2023,  0.1583,  0.0658,  0.1052],
        [ 0.2007,  0.1511,  0.0372,  0.0961],
        [ 0.2236,  0.1416,  0.0469,  0.1069],
        [ 0.2095,  0.1480,  0.0407,  0.0902],
        [ 0.2013,  0.1480,  0.0377,  0.1070],
        [ 0.1989,  0.1524,  0.0330,  0.0913],
        [ 0.1494,  0.1435, -0.0026,  0.0776],
        [ 0.2062,  0.1557,  0.0472,  0.0963],
        [ 0.2071,  0.1488,  0.0280,  0.1003],
        [ 0.2009,  0.1539,  0.0079,  0.0988],
        [ 0.2078,  0.1477,  0.0418,  0.0877],
        [ 0.2018,  0.1615, -0.0019,  0.0801],
        [ 0.1590,  0.1719,  0.0206,  0.0805],
        [ 0.2313,  0.1399,  0.0244,  0.0945],
        [ 0.2144,  0.1621,  0.0435,  0.0977],
        [ 0.1728,  0.1549,  0.0006,  0.0691],
        [ 0.2022,  0.1662,  0.0428,  0.0979],
        [ 0.2038,  0.1321,  0.0561,  0.1003],
        [ 0.2115,  0.1525,  0.0309,  0.0969],
        [ 0.1966,  0.1595,  0.0073,  0.0921],
        [ 0.1959,  0.1425, -0.0519

tensor([[ 0.1767,  0.1382,  0.0151,  0.0850],
        [ 0.1994,  0.1268,  0.0276,  0.0958],
        [ 0.2138,  0.1477,  0.0341,  0.1140],
        [ 0.1810,  0.1460,  0.0465,  0.1039],
        [ 0.1872,  0.1549, -0.0125,  0.0914],
        [ 0.1885,  0.1381,  0.0408,  0.1112],
        [ 0.2165,  0.1380,  0.0407,  0.0976],
        [ 0.1341,  0.1418,  0.0274,  0.0729],
        [ 0.2165,  0.1523,  0.0039,  0.0940],
        [ 0.2069,  0.1659,  0.0450,  0.1001],
        [ 0.1816,  0.1392,  0.0095,  0.0835],
        [ 0.1888,  0.1466,  0.0355,  0.0946],
        [ 0.2010,  0.1600,  0.0122,  0.1032],
        [ 0.2133,  0.1448,  0.0219,  0.0848],
        [ 0.1502,  0.1443,  0.0083,  0.0698],
        [ 0.1920,  0.1639,  0.0400,  0.1038],
        [ 0.1899,  0.1417,  0.0268,  0.1025],
        [ 0.1958,  0.1526,  0.0395,  0.1052],
        [ 0.1530,  0.1547,  0.0186,  0.0705],
        [ 0.2133,  0.1345,  0.0267,  0.1020],
        [ 0.2035,  0.1553,  0.0025,  0.0850],
        [ 0.2089,  0.1288,  0.0153

tensor([[ 0.1035,  0.1312,  0.0096,  0.0667],
        [ 0.1602,  0.1385,  0.0111,  0.0753],
        [ 0.1947,  0.1512,  0.0352,  0.0794],
        [ 0.2080,  0.1587,  0.0098,  0.1064],
        [ 0.2018,  0.1555,  0.0262,  0.0925],
        [ 0.2000,  0.1463,  0.0414,  0.1036],
        [ 0.2112,  0.1624,  0.0358,  0.0915],
        [ 0.1849,  0.1529,  0.0225,  0.1082],
        [ 0.1883,  0.1509,  0.0249,  0.0924],
        [ 0.1900,  0.1649,  0.0149,  0.0941],
        [ 0.2022,  0.1543,  0.0220,  0.1037],
        [ 0.1990,  0.1426,  0.0254,  0.0967],
        [ 0.2037,  0.1527,  0.0193,  0.1004],
        [ 0.1974,  0.1475,  0.0232,  0.1028],
        [ 0.1559,  0.1417,  0.0173,  0.0718],
        [ 0.2188,  0.1479,  0.0345,  0.1100],
        [ 0.1726,  0.1677, -0.0045,  0.0782],
        [ 0.2076,  0.1503,  0.0078,  0.1060],
        [ 0.1995,  0.1539,  0.0405,  0.0844],
        [ 0.2056,  0.1414,  0.0247,  0.0966],
        [ 0.1929,  0.1352,  0.0152,  0.0925],
        [ 0.2012,  0.1621,  0.0697

        [ 0.2048,  0.1371,  0.0283,  0.1099]], grad_fn=<AddmmBackward>)
torch.Size([64, 4])
tensor([[ 0.1876,  0.1409,  0.0396,  0.1024],
        [ 0.1782,  0.1635,  0.0380,  0.0982],
        [ 0.1874,  0.1428, -0.0080,  0.0958],
        [ 0.1773,  0.1489,  0.0058,  0.0934],
        [ 0.1864,  0.1454,  0.0435,  0.1042],
        [ 0.1977,  0.1395,  0.0249,  0.0912],
        [ 0.1215,  0.1432,  0.0318,  0.0751],
        [ 0.2241,  0.1343,  0.0535,  0.1078],
        [ 0.2020,  0.1627,  0.0104,  0.1048],
        [ 0.1768,  0.1369,  0.0414,  0.0742],
        [ 0.1804,  0.1711,  0.0236,  0.0902],
        [ 0.2148,  0.1479,  0.0228,  0.1018],
        [ 0.2113,  0.1496, -0.0048,  0.0895],
        [ 0.1990,  0.1477,  0.0372,  0.0911],
        [ 0.1298,  0.1513,  0.0135,  0.0732],
        [ 0.2240,  0.1557,  0.0274,  0.0851],
        [ 0.2222,  0.1602,  0.0265,  0.0974],
        [ 0.2092,  0.1444,  0.0298,  0.1044],
        [ 0.1324,  0.1552,  0.0084,  0.0789],
        [ 0.1949,  0.1487,  0.0406

## 82. 確率的勾配降下法による学習

In [86]:
%%bash
pip install -U catalyst

Collecting catalyst
  Downloading https://files.pythonhosted.org/packages/a2/96/87158d74688d7d2e3c233f1fd931d86b3ca4663e2d8869e0786463fb79bf/catalyst-20.4.2-py2.py3-none-any.whl (323kB)
Collecting tensorboard>=1.14.0 (from catalyst)
  Downloading https://files.pythonhosted.org/packages/1d/fd/4f3ca1516cbb3713259ef229abd9314bba0077ef6070285dde0dd1ed21b2/tensorboard-2.2.1-py3-none-any.whl (3.0MB)
Collecting opencv-python (from catalyst)
  Downloading https://files.pythonhosted.org/packages/8f/67/d30cb5ee1210c6778a718780d227fa27fa066b46cc25e0b09cd8e51faffe/opencv_python-4.2.0.34-cp36-cp36m-macosx_10_9_x86_64.whl (49.1MB)
Collecting GitPython>=2.1.11 (from catalyst)
  Downloading https://files.pythonhosted.org/packages/19/1a/0df85d2bddbca33665d2148173d3281b290ac054b5f50163ea735740ac7b/GitPython-3.1.1-py3-none-any.whl (450kB)
Collecting tensorboardX (from catalyst)
  Downloading https://files.pythonhosted.org/packages/35/f1/5843425495765c8c2dd0784a851a93ef204d314fc87bcc2bbb9f662a3ad1/tensorb

You are using pip version 18.1, however version 20.1b1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.


In [87]:
import torch
from torch import nn, optim
from torchtext import data
from catalyst.dl import SupervisedRunner
from catalyst.dl.callbacks import AccuracyCallback
from torch.utils.data import DataLoader
from torchtext.data import Iterator


numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject


numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject


numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject



In [88]:
class BucketIteratorWrapper(DataLoader):
    __initialized__ = False

    def __init__(self, iterator: Iterator):
        self.batch_size = iterator.batch_size
        self.num_workers = 1
        self.collate_fn = None
        self.pin_memory = False
        self.drop_last = False
        self.timeout = 0
        self.worker_init_fn = None
        self.sampler = iterator
        self.batch_sampler = iterator
        self.__initialized__ = True

    def __iter__(self):
        return map(lambda batch: {
            'features': batch.TEXT,
            'targets': batch.LABEL,
        }, self.batch_sampler.__iter__())

    def __len__(self):
        return len(self.batch_sampler)

In [89]:
class RNN(nn.Module):
    def __init__(self, num_embeddings,
                 embedding_dim=50,
                 hidden_size=50,
                 output_size=1,
                 num_layers=1,
                 dropout=0.2):
        super().__init__()
        self.emb = nn.Embedding(num_embeddings, embedding_dim,
                                padding_idx=0)
        self.lstm = nn.LSTM(embedding_dim,
                            hidden_size, num_layers,
                            batch_first=True, dropout=dropout)
        self.linear = nn.Linear(hidden_size, output_size)

    def forward(self, x, h0=None):
        x = self.emb(x)
        x, h = self.lstm(x, h0)
        x = x[:, -1, :]
        x = self.linear(x)
        return x


TEXT = data.Field(sequential=True, lower=True, batch_first=True)
LABELS = data.Field(sequential=False, batch_first=True, use_vocab=False)

train, val, test = data.TabularDataset.splits(
    path='../ch06', train='train.txt',
    validation='valid.txt', test='test.txt', format='tsv',
    fields=[('TEXT', TEXT), ('LABEL', LABELS)])

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [90]:
train_iter, val_iter, test_iter = data.BucketIterator.splits(
    (train, val, test), batch_sizes=(len(train), len(val), len(test)), device=device, repeat=False, sort=False)

train_loader = BucketIteratorWrapper(train_iter)
valid_loader = BucketIteratorWrapper(val_iter)
loaders = {"train": train_loader, "valid": valid_loader}

In [91]:
TEXT.build_vocab(train, min_freq=2)
LABELS.build_vocab(train)

In [92]:
model = RNN(len(TEXT.vocab.stoi) + 1, num_layers=2, output_size=4)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

runner = SupervisedRunner()

In [93]:
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    loaders=loaders,
    logdir="./logdir",
    callbacks=[AccuracyCallback(num_classes=4, accuracy_args=[1])],
    num_epochs=10,
    verbose=True,
)

1/10 * Epoch (train): 100% 1/1 [00:56<00:00, 56.04s/it, accuracy01=0.114, loss=1.410]
1/10 * Epoch (valid): 100% 1/1 [00:00<00:00,  8.12it/s, accuracy01=0.114, loss=1.407]
[2020-04-29 22:55:29,585] 
1/10 * Epoch 1 (_base): lr=0.0100 | momentum=0.000e+00
1/10 * Epoch 1 (train): accuracy01=0.1142 | loss=1.4104
1/10 * Epoch 1 (valid): accuracy01=0.1139 | loss=1.4073
2/10 * Epoch (train): 100% 1/1 [01:08<00:00, 68.03s/it, accuracy01=0.114, loss=1.408]
2/10 * Epoch (valid): 100% 1/1 [00:00<00:00,  8.45it/s, accuracy01=0.114, loss=1.405]
[2020-04-29 22:56:37,761] 
2/10 * Epoch 2 (_base): lr=0.0100 | momentum=0.000e+00
2/10 * Epoch 2 (train): accuracy01=0.1142 | loss=1.4082
2/10 * Epoch 2 (valid): accuracy01=0.1139 | loss=1.4052
3/10 * Epoch (train): 100% 1/1 [01:05<00:00, 65.42s/it, accuracy01=0.114, loss=1.406]
3/10 * Epoch (valid): 100% 1/1 [00:00<00:00,  5.62it/s, accuracy01=0.114, loss=1.403]
[2020-04-29 22:57:43,382] 
3/10 * Epoch 3 (_base): lr=0.0100 | momentum=0.000e+00
3/10 * Epoch 3

## 83. ミニバッチ化・GPU上での学習
問題82のコードを改変し，B事例ごとに損失・勾配を計算して学習を行えるようにせよ（Bの値は適当に選べ）．また，GPU上で学習を実行せよ．

In [94]:
# 82のbatch_sizesを変えるだけなのでスキップ

## 84. 単語ベクトルの導入
事前学習済みの単語ベクトル（例えば，Google Newsデータセット（約1,000億単語）での学習済み単語ベクトル）で単語埋め込みemb(x)を初期化し，学習せよ．

In [95]:
import torch
from torch import nn, optim
from torchtext import data
from catalyst.dl import SupervisedRunner
from catalyst.dl.callbacks import AccuracyCallback
from torch.utils.data import DataLoader
from torchtext.data import Iterator
from gensim.models import KeyedVectors

unable to import 'smart_open.gcs', disabling that module

`scipy.sparse.sparsetools` is deprecated!
scipy.sparse.sparsetools is a private module for scipy.sparse, and should not be used.



In [97]:
class RNN(nn.Module):
    def __init__(self, num_embeddings,
                 embedding_dim=300,
                 hidden_size=300,
                 output_size=1,
                 num_layers=1,
                 dropout=0.2):
        super().__init__()
        model = KeyedVectors.load_word2vec_format('../ch07/GoogleNews-vectors-negative300.bin', binary=True)
        weights = torch.FloatTensor(model.vectors)
        self.emb = nn.Embedding.from_pretrained(weights)
        self.lstm = nn.LSTM(embedding_dim,
                            hidden_size, num_layers,
                            batch_first=True, dropout=dropout)
        self.linear = nn.Linear(hidden_size, output_size)

    def forward(self, x, h0=None):
        x = self.emb(x)
        x, h = self.lstm(x, h0)
        x = x[:, -1, :]
        x = self.linear(x)
        return x

In [98]:
TEXT = data.Field(sequential=True, lower=True, batch_first=True)
LABELS = data.Field(sequential=False, batch_first=True, use_vocab=False)

train, val, test = data.TabularDataset.splits(
    path='../ch06', train='train.txt',
    validation='valid.txt', test='test.txt', format='tsv',
    fields=[('TEXT', TEXT), ('LABEL', LABELS)])

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

train_iter, val_iter, test_iter = data.BucketIterator.splits(
    (train, val, test), batch_sizes=(len(train), len(val), len(test)), device=device, repeat=False, sort=False)

train_loader = BucketIteratorWrapper(train_iter)
valid_loader = BucketIteratorWrapper(val_iter)
loaders = {"train": train_loader, "valid": valid_loader}

TEXT.build_vocab(train, min_freq=2)
LABELS.build_vocab(train)

model = RNN(len(TEXT.vocab.stoi) + 1, num_layers=2, output_size=4)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

runner = SupervisedRunner()

runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    loaders=loaders,
    logdir="./logdir",
    callbacks=[AccuracyCallback(num_classes=4, accuracy_args=[1])],
    num_epochs=10,
    verbose=True,
)

## 85. 双方向RNN・多層化Permalink

In [1]:
import torch
from torch import nn, optim
from torchtext import data
from catalyst.dl import SupervisedRunner
from catalyst.dl.callbacks import AccuracyCallback
from torch.utils.data import DataLoader
from torchtext.data import Iterator
from gensim.models import KeyedVectors


numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject


numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject


numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject

unable to import 'smart_open.gcs', disabling that module

`scipy.sparse.sparsetools` is deprecated!
scipy.sparse.sparsetools is a private module for scipy.sparse, and should not be used.



In [2]:
class BucketIteratorWrapper(DataLoader):
    __initialized__ = False

    def __init__(self, iterator: Iterator):
        self.batch_size = iterator.batch_size
        self.num_workers = 1
        self.collate_fn = None
        self.pin_memory = False
        self.drop_last = False
        self.timeout = 0
        self.worker_init_fn = None
        self.sampler = iterator
        self.batch_sampler = iterator
        self.__initialized__ = True

    def __iter__(self):
        return map(lambda batch: {
            'features': batch.TEXT,
            'targets': batch.LABEL,
        }, self.batch_sampler.__iter__())

    def __len__(self):
        return len(self.batch_sampler)

In [3]:
class RNN(nn.Module):
    def __init__(self, num_embeddings,
                 embedding_dim=300,
                 hidden_size=300,
                 output_size=1,
                 num_layers=1,
                 dropout=0.2):
        super().__init__()
        model = KeyedVectors.load_word2vec_format('../ch07/GoogleNews-vectors-negative300.bin', binary=True)
        weights = torch.FloatTensor(model.vectors)
        self.emb = nn.Embedding.from_pretrained(weights)
        # enable bidirectional
        self.lstm = nn.LSTM(embedding_dim,
                            hidden_size, num_layers,
                            batch_first=True, dropout=dropout, bidirectional=True)
        # hidden_size * 2
        self.linear = nn.Linear(hidden_size*2, output_size)

    def forward(self, x, h0=None):
        x = self.emb(x)
        x, h = self.lstm(x, h0)
        x = x[:, -1, :]
        x = self.linear(x)
        return x

In [4]:
TEXT = data.Field(sequential=True, lower=True, batch_first=True)
LABELS = data.Field(sequential=False, batch_first=True, use_vocab=False)

train, val, test = data.TabularDataset.splits(
    path='../ch06', train='train.txt',
    validation='valid.txt', test='test.txt', format='tsv',
    fields=[('TEXT', TEXT), ('LABEL', LABELS)])

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

train_iter, val_iter, test_iter = data.BucketIterator.splits(
    (train, val, test), batch_sizes=(len(train), len(val), len(test)), device=device, repeat=False, sort=False)

train_loader = BucketIteratorWrapper(train_iter)
valid_loader = BucketIteratorWrapper(val_iter)
loaders = {"train": train_loader, "valid": valid_loader}

TEXT.build_vocab(train, min_freq=2)
LABELS.build_vocab(train)

model = RNN(len(TEXT.vocab.stoi) + 1, num_layers=2, output_size=4)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

runner = SupervisedRunner()

runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    loaders=loaders,
    logdir="./logdir",
    callbacks=[AccuracyCallback(num_classes=4, accuracy_args=[1])],
    num_epochs=10,
    verbose=True,
)

KeyboardInterrupt: 

## 86. 畳み込みニューラルネットワーク (CNN)

In [5]:
import torch
from torch import nn, optim
import torch.nn.functional as F
from torchtext import data
from torch.utils.data import DataLoader
from torchtext.data import Iterator
from gensim.models import KeyedVectors


class BucketIteratorWrapper(DataLoader):
    __initialized__ = False

    def __init__(self, iterator: Iterator):
        self.batch_size = iterator.batch_size
        self.num_workers = 1
        self.collate_fn = None
        self.pin_memory = False
        self.drop_last = False
        self.timeout = 0
        self.worker_init_fn = None
        self.sampler = iterator
        self.batch_sampler = iterator
        self.__initialized__ = True

    def __iter__(self):
        return map(lambda batch: {
            'features': batch.TEXT,
            'targets': batch.LABEL,
        }, self.batch_sampler.__iter__())

    def __len__(self):
        return len(self.batch_sampler)




In [22]:
class CNN(nn.Module):

    def __init__(self, load_model, output_dim, kernel_num, kernel_sizes=[3, 4, 5], dropout=0.5, static=False):
        super(CNN, self).__init__()

#         model = KeyedVectors.load_word2vec_format('../ch07/GoogleNews-vectors-negative300.bin', binary=True)
        model = load_model
        weights = torch.FloatTensor(model.vectors)
        self.embed = nn.Embedding.from_pretrained(weights)
        self.convs1 = nn.ModuleList([nn.Conv2d(1, kernel_num, (k, self.embed.weight.shape[1])) for k in kernel_sizes])
        self.dropout = nn.Dropout(dropout)
        self.fc1 = nn.Linear(len(kernel_sizes) * kernel_num, output_dim)
        self.static = static

    def conv_and_pool(self, x, conv):
        x = F.relu(conv(x)).squeeze(3)
        x = F.max_pool1d(x, x.size(2)).squeeze(2)
        return x

    def forward(self, x):
        x = self.embed(x)
        print(x.size())

        if self.static:
            x = x.detach()

        x = x.unsqueeze(1)
        x = x.float()
        x = [F.relu(conv(x)).squeeze(3) for conv in self.convs1]

        x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x]

        x = torch.cat(x, 1)
        x = self.dropout(x)
        logit = self.fc1(x)
        return logit

In [16]:
load_model = KeyedVectors.load_word2vec_format('../ch07/GoogleNews-vectors-negative300.bin', binary=True)

In [17]:
model = CNN(load_model, output_dim=4, kernel_num=3, kernel_sizes=[3, 4, 5], dropout=0.2)
print(model)

CNN(
  (embed): Embedding(3000000, 300)
  (convs1): ModuleList(
    (0): Conv2d(1, 3, kernel_size=(3, 300), stride=(1, 1))
    (1): Conv2d(1, 3, kernel_size=(4, 300), stride=(1, 1))
    (2): Conv2d(1, 3, kernel_size=(5, 300), stride=(1, 1))
  )
  (dropout): Dropout(p=0.2, inplace=False)
  (fc1): Linear(in_features=9, out_features=4, bias=True)
)


In [25]:
TEXT = data.Field(sequential=True, lower=True, batch_first=True)
LABELS = data.Field(sequential=False, batch_first=True, use_vocab=False)

train, val, test = data.TabularDataset.splits(
    path='../ch06', train='train.txt',
    validation='valid.txt', test='test.txt', format='tsv',
    fields=[('TEXT', TEXT), ('LABEL', LABELS)])

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
train_iter, val_iter, test_iter = data.BucketIterator.splits(
    (train, val, test), batch_sizes=(64, 64, 64), device=device, repeat=False, sort=False)

train_loader = BucketIteratorWrapper(train_iter)
valid_loader = BucketIteratorWrapper(val_iter)
loaders = {"train": train_loader, "valid": valid_loader}

TEXT.build_vocab(train, min_freq=2)
LABELS.build_vocab(train)
# model = CNN(output_dim=4, kernel_num=3, kernel_sizes=[3, 4, 5], dropout=0.2)
model = CNN(load_model, output_dim=4, kernel_num=3, kernel_sizes=[3, 4, 5], dropout=0.2)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

for epoch in range(1):
    model.train()
    for batch in train_iter:
        x, y = batch.TEXT, batch.LABEL
        y_pred = model(x)
        print(y_pred)
        print(y_pred.shape)

torch.Size([64, 16, 300])
tensor([[ 1.7179e-01,  1.7355e-01,  1.9457e-02,  3.7832e-01],
        [ 1.6944e-01,  8.8402e-02,  3.6254e-02,  3.0183e-01],
        [ 7.7913e-02,  1.1209e-01, -2.9325e-03,  3.0808e-01],
        [ 1.3456e-01,  1.2476e-01,  4.4395e-02,  3.0155e-01],
        [ 1.0359e-01,  1.6148e-01,  8.9008e-02,  2.7242e-01],
        [ 4.3941e-02,  4.8915e-02, -1.6148e-02,  3.1379e-01],
        [ 1.5289e-01,  8.3876e-02, -5.5911e-03,  3.2751e-01],
        [ 1.0971e-01,  1.0873e-01,  3.5778e-02,  2.8973e-01],
        [ 5.6291e-02,  8.7034e-02,  1.5546e-02,  2.4172e-01],
        [ 1.1258e-01,  1.3631e-01,  1.0548e-01,  2.4538e-01],
        [ 6.8570e-02,  1.3824e-01,  3.4784e-02,  3.2248e-01],
        [ 1.2548e-01,  1.3900e-01,  8.3377e-02,  2.3477e-01],
        [ 1.4717e-01,  1.1324e-01,  7.6012e-02,  2.8044e-01],
        [ 1.2930e-01,  9.2827e-02,  5.9276e-02,  2.7201e-01],
        [ 1.1217e-01, -1.0841e-02, -5.4356e-02,  3.2785e-01],
        [ 1.1918e-01,  9.9392e-02,  1.0968e-

tensor([[ 0.1169,  0.0735,  0.0236,  0.2876],
        [ 0.1418,  0.1455,  0.0442,  0.3608],
        [ 0.1902,  0.1809,  0.0958,  0.2700],
        [ 0.2017,  0.1013, -0.0112,  0.3530],
        [ 0.1287,  0.0964,  0.0798,  0.2459],
        [ 0.1713,  0.1003,  0.0314,  0.3040],
        [ 0.1788,  0.1187,  0.0580,  0.3272],
        [ 0.0987,  0.1233,  0.0533,  0.2345],
        [ 0.1574,  0.1360,  0.0590,  0.2576],
        [ 0.1611,  0.0822,  0.0148,  0.2819],
        [ 0.1570,  0.1001,  0.0339,  0.2509],
        [ 0.1646,  0.0835,  0.0052,  0.3029],
        [ 0.1073,  0.1069,  0.0303,  0.2806],
        [ 0.1003,  0.2296,  0.0829,  0.3248],
        [ 0.1327,  0.1016,  0.0507,  0.3136],
        [ 0.0944,  0.0817,  0.0169,  0.3378],
        [ 0.0769,  0.1466,  0.0387,  0.3348],
        [ 0.1350,  0.0655,  0.0346,  0.2875],
        [ 0.1763,  0.0196,  0.0124,  0.3199],
        [ 0.1698,  0.0397,  0.0258,  0.2929],
        [ 0.1240,  0.0672,  0.0467,  0.2394],
        [ 0.1278,  0.0853,  0.0221

tensor([[ 0.1991,  0.1615,  0.0422,  0.3629],
        [ 0.0780,  0.0891,  0.0263,  0.2993],
        [ 0.0610,  0.0115, -0.0033,  0.3024],
        [ 0.1905,  0.0131,  0.0070,  0.2957],
        [ 0.1642,  0.1599,  0.0847,  0.2720],
        [ 0.0640,  0.1036,  0.0706,  0.2308],
        [ 0.1327,  0.0804,  0.0364,  0.2778],
        [ 0.1026,  0.0786,  0.0817,  0.2607],
        [ 0.1813,  0.0631, -0.0050,  0.3300],
        [ 0.1729,  0.1618,  0.0723,  0.3038],
        [ 0.0788,  0.1028,  0.0563,  0.2685],
        [ 0.2166,  0.1445,  0.0507,  0.3365],
        [ 0.1751,  0.0704, -0.0201,  0.3177],
        [ 0.1498,  0.1450,  0.0173,  0.3367],
        [ 0.1338,  0.0138,  0.0138,  0.2544],
        [ 0.2131,  0.1538,  0.0630,  0.2991],
        [ 0.1247,  0.1017,  0.0402,  0.2508],
        [ 0.0642,  0.0649,  0.0389,  0.2130],
        [ 0.1592,  0.1156,  0.0507,  0.2902],
        [ 0.1356,  0.1096,  0.0253,  0.2663],
        [ 0.1520,  0.1260,  0.0746,  0.2736],
        [ 0.1170,  0.1196, -0.0070

tensor([[ 0.1850,  0.2278,  0.1188,  0.2581],
        [ 0.1186,  0.1475,  0.0362,  0.3068],
        [ 0.2076,  0.1211,  0.0123,  0.3708],
        [ 0.1416,  0.1494,  0.0700,  0.2443],
        [ 0.0749, -0.0057, -0.0417,  0.2950],
        [ 0.1029,  0.0667, -0.0434,  0.3040],
        [ 0.1325,  0.1187,  0.0189,  0.3371],
        [ 0.1167,  0.1020,  0.0554,  0.3320],
        [ 0.1163,  0.0137, -0.0038,  0.2702],
        [ 0.1139,  0.1115,  0.0416,  0.2406],
        [ 0.1294,  0.0393,  0.0229,  0.2790],
        [ 0.1418,  0.0273, -0.0057,  0.2741],
        [ 0.0362,  0.1085,  0.0104,  0.3269],
        [ 0.1162,  0.0965,  0.0455,  0.3061],
        [ 0.0969,  0.1117,  0.0568,  0.2987],
        [ 0.0918,  0.0658, -0.0220,  0.3403],
        [ 0.1488,  0.0684,  0.0148,  0.2557],
        [ 0.0608,  0.0764,  0.0248,  0.2220],
        [ 0.1323,  0.0729, -0.0107,  0.2540],
        [ 0.0971,  0.0851,  0.0580,  0.2309],
        [ 0.1464,  0.1110,  0.0303,  0.2723],
        [ 0.1080,  0.0845, -0.0099

tensor([[ 1.1848e-01, -8.7821e-03, -3.0333e-02,  2.8783e-01],
        [ 1.5158e-01,  7.2080e-02,  3.5639e-02,  2.2299e-01],
        [ 1.4910e-01,  7.3067e-02,  4.5675e-02,  2.5956e-01],
        [ 1.4711e-01,  1.7416e-01,  6.4371e-02,  2.7434e-01],
        [ 1.5710e-01,  1.1739e-01,  4.0815e-02,  2.8905e-01],
        [ 8.9351e-02,  5.8419e-02, -1.0902e-02,  2.9691e-01],
        [ 4.7744e-02, -2.0270e-03, -6.4893e-02,  2.4613e-01],
        [ 1.0842e-01,  1.0381e-01,  1.2605e-02,  3.1761e-01],
        [ 2.4865e-01,  2.0580e-01,  7.7572e-02,  3.4110e-01],
        [ 1.4673e-01,  1.1468e-01,  4.6780e-02,  2.9427e-01],
        [ 1.3522e-01,  9.5726e-02,  7.7311e-02,  2.3279e-01],
        [ 1.3837e-01,  9.4436e-02,  6.2324e-02,  2.8347e-01],
        [ 1.0765e-01,  7.1740e-02,  8.6294e-03,  3.1012e-01],
        [ 1.8854e-01,  1.6746e-01,  1.1632e-01,  2.7691e-01],
        [ 1.6638e-01,  2.0063e-01,  1.1127e-01,  2.4541e-01],
        [ 1.3933e-01,  8.7367e-02,  5.5235e-02,  2.8723e-01],
        

tensor([[ 0.0928,  0.0803,  0.0587,  0.2314],
        [ 0.0839, -0.0243, -0.0517,  0.2860],
        [ 0.1682,  0.1407,  0.0580,  0.3431],
        [ 0.1341,  0.0793,  0.0318,  0.3524],
        [ 0.2232,  0.0751,  0.0037,  0.3246],
        [ 0.1540,  0.1216,  0.0492,  0.2961],
        [ 0.0795,  0.0421,  0.0306,  0.2291],
        [ 0.1552,  0.1792,  0.0853,  0.2612],
        [ 0.1141, -0.0294, -0.0511,  0.2853],
        [ 0.0748,  0.1678,  0.0663,  0.3064],
        [ 0.1640,  0.1875,  0.1051,  0.2592],
        [ 0.0081,  0.0318, -0.0247,  0.2341],
        [ 0.0898,  0.1088,  0.0118,  0.3338],
        [ 0.1977,  0.2039,  0.0535,  0.3258],
        [ 0.1912,  0.1944,  0.1083,  0.2858],
        [ 0.1244,  0.1193,  0.0230,  0.2313],
        [ 0.0538, -0.0497, -0.0142,  0.2687],
        [ 0.0376,  0.0143, -0.0605,  0.3646],
        [ 0.0815,  0.0299, -0.0207,  0.3391],
        [ 0.2106,  0.2578,  0.1726,  0.2534],
        [ 0.1174,  0.1674,  0.1144,  0.2518],
        [ 0.0957,  0.1314,  0.0239

tensor([[ 0.1135,  0.1401,  0.0264,  0.3140],
        [ 0.0933,  0.1238, -0.0041,  0.3334],
        [ 0.1776,  0.0721,  0.0224,  0.3024],
        [ 0.1490,  0.0960,  0.0216,  0.3294],
        [ 0.1009,  0.0630,  0.0122,  0.2565],
        [ 0.1208,  0.1198,  0.0658,  0.2621],
        [ 0.1312,  0.1846,  0.0641,  0.3310],
        [ 0.0983,  0.1417,  0.0895,  0.2617],
        [ 0.1099,  0.0609,  0.0373,  0.2535],
        [ 0.0854,  0.1570,  0.0670,  0.2954],
        [ 0.1409,  0.0373,  0.0435,  0.2326],
        [ 0.1397,  0.1044,  0.0455,  0.2703],
        [ 0.1622,  0.1143, -0.0129,  0.3812],
        [ 0.0988,  0.1284,  0.0424,  0.3010],
        [ 0.1167,  0.0885,  0.0168,  0.2933],
        [ 0.1606,  0.1194,  0.0396,  0.2977],
        [ 0.0801,  0.0719,  0.0497,  0.2476],
        [ 0.0696,  0.0496, -0.0062,  0.2686],
        [ 0.1339,  0.0865, -0.0283,  0.3532],
        [ 0.1271,  0.0799, -0.0026,  0.2983],
        [ 0.1411,  0.0675,  0.0268,  0.2756],
        [ 0.1286,  0.0392, -0.0238

tensor([[ 1.4114e-01,  1.8558e-01,  6.8173e-02,  3.6016e-01],
        [ 1.2425e-01,  1.2792e-01,  5.4645e-02,  2.3704e-01],
        [ 1.5789e-01,  1.3933e-01,  6.2794e-02,  2.9294e-01],
        [ 1.7489e-01,  1.2285e-01,  3.7769e-02,  3.0693e-01],
        [ 1.6843e-01,  1.4055e-01,  7.0757e-02,  3.2102e-01],
        [ 2.6173e-02,  9.5941e-02,  3.1893e-02,  2.2255e-01],
        [ 1.3689e-01,  1.0475e-01,  5.3784e-02,  3.1507e-01],
        [ 1.3808e-01,  2.1173e-01,  1.0915e-01,  2.8285e-01],
        [ 1.1641e-01,  4.6406e-02, -3.4042e-02,  3.4124e-01],
        [ 1.5414e-01,  8.7071e-02,  1.5860e-02,  3.2679e-01],
        [ 1.0779e-01,  1.1582e-01,  2.6919e-02,  3.1901e-01],
        [ 2.1274e-01,  1.8534e-01,  9.8217e-02,  3.1629e-01],
        [ 1.0983e-01, -9.1523e-03,  1.8114e-02,  2.4298e-01],
        [ 8.7193e-02,  7.3822e-02,  3.5332e-02,  2.4833e-01],
        [ 1.6197e-01,  1.8570e-01,  1.1947e-01,  2.1821e-01],
        [ 1.2248e-01,  8.8239e-02,  6.8846e-02,  2.4319e-01],
        

In [60]:
# class CNN(nn.Module):

#     def __init__(self, load_model, output_dim, kernel_num, kernel_sizes=[3, 4, 5], dropout=0.5, static=False):
#         super(CNN, self).__init__()

# #         model = KeyedVectors.load_word2vec_format('../ch07/GoogleNews-vectors-negative300.bin', binary=True)
#         model = load_model
#         weights = torch.FloatTensor(model.vectors)
#         self.embed = nn.Embedding.from_pretrained(weights)
#         self.convs1 = nn.ModuleList([nn.Conv2d(1, kernel_num, (k, self.embed.weight.shape[1])) for k in kernel_sizes])
#         self.dropout = nn.Dropout(dropout)
#         self.fc1 = nn.Linear(len(kernel_sizes) * kernel_num, output_dim)
#         self.static = static

#     def conv_and_pool(self, x, conv):
#         x = F.relu(conv(x)).squeeze(3)
#         x = F.max_pool1d(x, x.size(2)).squeeze(2)
#         return x

#     def forward(self, x):
#         x = self.embed(x)
#         print(x.size())

#         if self.static:
#             x = x.detach()

#         x = x.unsqueeze(1)
#         print(x.size())
#         x = x.float()
#         z = [F.relu(conv(x)) for conv in self.convs1]
#         print('z')
#         print(z[0].size())
#         print(z[1].size())
#         print(z[2].size())
#         x = [F.relu(conv(x)).squeeze(3) for conv in self.convs1]
#         print('conv2d')
#         print(x[0].size())
#         print(x[1].size())
#         print(x[2].size())

#         x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x]
        
#         print('max pooling')
#         print(x[0].size())
#         print(x[1].size())
#         print(x[2].size())

#         x = torch.cat(x, 1)
#         print(x.size())
#         x = self.dropout(x)
#         logit = self.fc1(x)
#         print(logit.size())
#         return logit

In [61]:
# model = CNN(load_model, output_dim=4, kernel_num=3, kernel_sizes=[3, 4, 5], dropout=0.2)

In [62]:
# model(x)

## 87.確率的勾配降下法

In [59]:
runner = SupervisedRunner()

runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    loaders=loaders,
    logdir="./logdir",
    callbacks=[AccuracyCallback(num_classes=4, accuracy_args=[1])],
    num_epochs=10,
    verbose=True,
)

1/10 * Epoch (train):   0% 0/167 [00:00<?, ?it/s]torch.Size([64, 16, 300])
torch.Size([64, 1, 16, 300])
z
torch.Size([64, 3, 14, 1])
torch.Size([64, 3, 13, 1])
torch.Size([64, 3, 12, 1])
conv2d
torch.Size([64, 3, 14])
torch.Size([64, 3, 13])
torch.Size([64, 3, 12])
max pooling
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 9])
torch.Size([64, 4])
1/10 * Epoch (train):   0% 0/167 [00:00<?, ?it/s, accuracy01=0.125, loss=1.368]torch.Size([64, 16, 300])
torch.Size([64, 1, 16, 300])
z
torch.Size([64, 3, 14, 1])
torch.Size([64, 3, 13, 1])
torch.Size([64, 3, 12, 1])
conv2d
torch.Size([64, 3, 14])
torch.Size([64, 3, 13])
torch.Size([64, 3, 12])
max pooling
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 9])
torch.Size([64, 4])
1/10 * Epoch (train):   1% 1/167 [00:00<00:15, 10.86it/s, accuracy01=0.078, loss=1.407]torch.Size([64, 16, 300])
torch.Size([64, 1, 16, 300])
z
torch.Size([64, 3, 14, 1])
torch.Size([64, 3, 13, 1])
torch.Size([64, 

1/10 * Epoch (train):  11% 19/167 [00:00<00:05, 26.64it/s, accuracy01=0.109, loss=1.392]torch.Size([64, 17, 300])
torch.Size([64, 1, 17, 300])
z
torch.Size([64, 3, 15, 1])
torch.Size([64, 3, 14, 1])
torch.Size([64, 3, 13, 1])
conv2d
torch.Size([64, 3, 15])
torch.Size([64, 3, 14])
torch.Size([64, 3, 13])
max pooling
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 9])
torch.Size([64, 4])
1/10 * Epoch (train):  13% 21/167 [00:00<00:05, 28.20it/s, accuracy01=0.125, loss=1.364]torch.Size([64, 16, 300])
torch.Size([64, 1, 16, 300])
z
torch.Size([64, 3, 14, 1])
torch.Size([64, 3, 13, 1])
torch.Size([64, 3, 12, 1])
conv2d
torch.Size([64, 3, 14])
torch.Size([64, 3, 13])
torch.Size([64, 3, 12])
max pooling
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 9])
torch.Size([64, 4])
1/10 * Epoch (train):  13% 21/167 [00:00<00:05, 28.20it/s, accuracy01=0.125, loss=1.386]torch.Size([64, 15, 300])
torch.Size([64, 1, 15, 300])
z
torch.Size([64, 3, 13

1/10 * Epoch (train):  24% 40/167 [00:01<00:04, 30.46it/s, accuracy01=0.109, loss=1.388]torch.Size([64, 16, 300])
torch.Size([64, 1, 16, 300])
z
torch.Size([64, 3, 14, 1])
torch.Size([64, 3, 13, 1])
torch.Size([64, 3, 12, 1])
conv2d
torch.Size([64, 3, 14])
torch.Size([64, 3, 13])
torch.Size([64, 3, 12])
max pooling
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 9])
torch.Size([64, 4])
1/10 * Epoch (train):  24% 40/167 [00:01<00:04, 30.46it/s, accuracy01=0.078, loss=1.385]torch.Size([64, 17, 300])
torch.Size([64, 1, 17, 300])
z
torch.Size([64, 3, 15, 1])
torch.Size([64, 3, 14, 1])
torch.Size([64, 3, 13, 1])
conv2d
torch.Size([64, 3, 15])
torch.Size([64, 3, 14])
torch.Size([64, 3, 13])
max pooling
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 9])
torch.Size([64, 4])
1/10 * Epoch (train):  25% 41/167 [00:01<00:04, 30.46it/s, accuracy01=0.109, loss=1.376]torch.Size([64, 15, 300])
torch.Size([64, 1, 15, 300])
z
torch.Size([64, 3, 13

1/10 * Epoch (train):  36% 60/167 [00:02<00:03, 31.21it/s, accuracy01=0.172, loss=1.362]torch.Size([64, 16, 300])
torch.Size([64, 1, 16, 300])
z
torch.Size([64, 3, 14, 1])
torch.Size([64, 3, 13, 1])
torch.Size([64, 3, 12, 1])
conv2d
torch.Size([64, 3, 14])
torch.Size([64, 3, 13])
torch.Size([64, 3, 12])
max pooling
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 9])
torch.Size([64, 4])
1/10 * Epoch (train):  36% 60/167 [00:02<00:03, 31.21it/s, accuracy01=0.109, loss=1.387]torch.Size([64, 17, 300])
torch.Size([64, 1, 17, 300])
z
torch.Size([64, 3, 15, 1])
torch.Size([64, 3, 14, 1])
torch.Size([64, 3, 13, 1])
conv2d
torch.Size([64, 3, 15])
torch.Size([64, 3, 14])
torch.Size([64, 3, 13])
max pooling
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 9])
torch.Size([64, 4])
1/10 * Epoch (train):  37% 61/167 [00:02<00:03, 31.21it/s, accuracy01=0.203, loss=1.363]torch.Size([64, 17, 300])
torch.Size([64, 1, 17, 300])
z
torch.Size([64, 3, 15

max pooling
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 9])
torch.Size([64, 4])
1/10 * Epoch (train):  48% 80/167 [00:02<00:02, 31.95it/s, accuracy01=0.141, loss=1.375]torch.Size([64, 16, 300])
torch.Size([64, 1, 16, 300])
z
torch.Size([64, 3, 14, 1])
torch.Size([64, 3, 13, 1])
torch.Size([64, 3, 12, 1])
conv2d
torch.Size([64, 3, 14])
torch.Size([64, 3, 13])
torch.Size([64, 3, 12])
max pooling
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 9])
torch.Size([64, 4])
1/10 * Epoch (train):  48% 80/167 [00:02<00:02, 31.95it/s, accuracy01=0.156, loss=1.378]torch.Size([64, 17, 300])
torch.Size([64, 1, 17, 300])
z
torch.Size([64, 3, 15, 1])
torch.Size([64, 3, 14, 1])
torch.Size([64, 3, 13, 1])
conv2d
torch.Size([64, 3, 15])
torch.Size([64, 3, 14])
torch.Size([64, 3, 13])
max pooling
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 9])
torch.Size([64, 4])
1/10 * Epoch (train):  49% 81/167 [00:02<00:02, 31.95i

1/10 * Epoch (train):  60% 100/167 [00:03<00:02, 31.53it/s, accuracy01=0.109, loss=1.405]torch.Size([64, 16, 300])
torch.Size([64, 1, 16, 300])
z
torch.Size([64, 3, 14, 1])
torch.Size([64, 3, 13, 1])
torch.Size([64, 3, 12, 1])
conv2d
torch.Size([64, 3, 14])
torch.Size([64, 3, 13])
torch.Size([64, 3, 12])
max pooling
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 9])
torch.Size([64, 4])
1/10 * Epoch (train):  60% 100/167 [00:03<00:02, 31.53it/s, accuracy01=0.109, loss=1.401]torch.Size([64, 16, 300])
torch.Size([64, 1, 16, 300])
z
torch.Size([64, 3, 14, 1])
torch.Size([64, 3, 13, 1])
torch.Size([64, 3, 12, 1])
conv2d
torch.Size([64, 3, 14])
torch.Size([64, 3, 13])
torch.Size([64, 3, 12])
max pooling
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 9])
torch.Size([64, 4])
1/10 * Epoch (train):  60% 101/167 [00:03<00:02, 31.53it/s, accuracy01=0.109, loss=1.396]torch.Size([64, 17, 300])
torch.Size([64, 1, 17, 300])
z
torch.Size([64, 3,

1/10 * Epoch (train):  72% 120/167 [00:04<00:02, 20.56it/s, accuracy01=0.219, loss=1.357]torch.Size([64, 15, 300])
torch.Size([64, 1, 15, 300])
z
torch.Size([64, 3, 13, 1])
torch.Size([64, 3, 12, 1])
torch.Size([64, 3, 11, 1])
conv2d
torch.Size([64, 3, 13])
torch.Size([64, 3, 12])
torch.Size([64, 3, 11])
max pooling
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 9])
torch.Size([64, 4])
1/10 * Epoch (train):  72% 120/167 [00:04<00:02, 20.56it/s, accuracy01=0.078, loss=1.384]torch.Size([64, 17, 300])
torch.Size([64, 1, 17, 300])
z
torch.Size([64, 3, 15, 1])
torch.Size([64, 3, 14, 1])
torch.Size([64, 3, 13, 1])
conv2d
torch.Size([64, 3, 15])
torch.Size([64, 3, 14])
torch.Size([64, 3, 13])
max pooling
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 9])
torch.Size([64, 4])
1/10 * Epoch (train):  72% 121/167 [00:04<00:02, 20.56it/s, accuracy01=0.078, loss=1.379]torch.Size([64, 17, 300])
torch.Size([64, 1, 17, 300])
z
torch.Size([64, 3,

max pooling
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 9])
torch.Size([64, 4])
1/10 * Epoch (train):  84% 140/167 [00:04<00:00, 28.81it/s, accuracy01=0.109, loss=1.358]torch.Size([64, 16, 300])
torch.Size([64, 1, 16, 300])
z
torch.Size([64, 3, 14, 1])
torch.Size([64, 3, 13, 1])
torch.Size([64, 3, 12, 1])
conv2d
torch.Size([64, 3, 14])
torch.Size([64, 3, 13])
torch.Size([64, 3, 12])
max pooling
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 9])
torch.Size([64, 4])
1/10 * Epoch (train):  84% 140/167 [00:04<00:00, 28.81it/s, accuracy01=0.109, loss=1.366]torch.Size([64, 15, 300])
torch.Size([64, 1, 15, 300])
z
torch.Size([64, 3, 13, 1])
torch.Size([64, 3, 12, 1])
torch.Size([64, 3, 11, 1])
conv2d
torch.Size([64, 3, 13])
torch.Size([64, 3, 12])
torch.Size([64, 3, 11])
max pooling
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 9])
torch.Size([64, 4])
1/10 * Epoch (train):  84% 141/167 [00:04<00:00, 28.

z
torch.Size([64, 3, 13, 1])
torch.Size([64, 3, 12, 1])
torch.Size([64, 3, 11, 1])
conv2d
torch.Size([64, 3, 13])
torch.Size([64, 3, 12])
torch.Size([64, 3, 11])
max pooling
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 9])
torch.Size([64, 4])
1/10 * Epoch (train):  96% 160/167 [00:05<00:00, 30.47it/s, accuracy01=0.203, loss=1.352]torch.Size([64, 16, 300])
torch.Size([64, 1, 16, 300])
z
torch.Size([64, 3, 14, 1])
torch.Size([64, 3, 13, 1])
torch.Size([64, 3, 12, 1])
conv2d
torch.Size([64, 3, 14])
torch.Size([64, 3, 13])
torch.Size([64, 3, 12])
max pooling
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 9])
torch.Size([64, 4])
1/10 * Epoch (train):  96% 160/167 [00:05<00:00, 30.47it/s, accuracy01=0.125, loss=1.389]torch.Size([64, 19, 300])
torch.Size([64, 1, 19, 300])
z
torch.Size([64, 3, 17, 1])
torch.Size([64, 3, 16, 1])
torch.Size([64, 3, 15, 1])
conv2d
torch.Size([64, 3, 17])
torch.Size([64, 3, 16])
torch.Size([64, 3, 15])
ma

1/10 * Epoch (valid):  52% 11/21 [00:00<00:00, 77.14it/s, accuracy01=0.094, loss=1.369]torch.Size([64, 16, 300])
torch.Size([64, 1, 16, 300])
z
torch.Size([64, 3, 14, 1])
torch.Size([64, 3, 13, 1])
torch.Size([64, 3, 12, 1])
conv2d
torch.Size([64, 3, 14])
torch.Size([64, 3, 13])
torch.Size([64, 3, 12])
max pooling
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 9])
torch.Size([64, 4])
1/10 * Epoch (valid):  57% 12/21 [00:00<00:00, 77.14it/s, accuracy01=0.141, loss=1.376]torch.Size([64, 17, 300])
torch.Size([64, 1, 17, 300])
z
torch.Size([64, 3, 15, 1])
torch.Size([64, 3, 14, 1])
torch.Size([64, 3, 13, 1])
conv2d
torch.Size([64, 3, 15])
torch.Size([64, 3, 14])
torch.Size([64, 3, 13])
max pooling
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 3])
torch.Size([64, 9])
torch.Size([64, 4])
1/10 * Epoch (valid):  62% 13/21 [00:00<00:00, 77.14it/s, accuracy01=0.062, loss=1.371]torch.Size([64, 15, 300])
torch.Size([64, 1, 15, 300])
z
torch.Size([64, 3, 13, 1

AttributeError: 'NoneType' object has no attribute 'write'

## 88. パラメータチューニング
問題85や問題87のコードを改変し，ニューラルネットワークの形状やハイパーパラメータを調整しながら，高性能なカテゴリ分類器を構築せよ．

In [63]:
class RNN(nn.Module):
    def __init__(self, num_embeddings,
                 embedding_dim=300,
                 hidden_size=300,
                 output_size=1,
                 num_layers=1,
                 dropout=0.2):
        super().__init__()
        model = KeyedVectors.load_word2vec_format('../ch07/GoogleNews-vectors-negative300.bin', binary=True)
        weights = torch.FloatTensor(model.vectors)
        self.emb = nn.Embedding.from_pretrained(weights)
        self.lstm = nn.LSTM(embedding_dim,
                            hidden_size, num_layers,
                            batch_first=True, dropout=dropout, bidirectional=True)
        self.linear = nn.Sequential(
            nn.Linear(hidden_size * 2, 100),
            nn.PReLU(),
            nn.BatchNorm1d(100),
            nn.Linear(100, output_size)
        )

    def forward(self, x, h0=None):
        x = self.emb(x)
        x, h = self.lstm(x, h0)
        x = x[:, -1, :]
        x = self.linear(x)
        return x


## 89. 事前学習済み言語モデルからの転移学習
事前学習済み言語モデル（例えばBERTなど）を出発点として，ニュース記事見出しをカテゴリに分類するモデルを構築せよ．

In [68]:
%%bash
# pip install transformers

In [73]:
from tqdm import tqdm
from transformers import BertForSequenceClassification

In [74]:
def eval_net(model, data_loader, device='cpu'):
    model.eval()
    ys = []
    ypreds = []
    for x, y, _ in data_loader:
        with torch.no_grad():
            loss, logit = model(input_ids=x, labels=y)
            _, y_pred = torch.max(logit, 1)
            ys.append(y)
            ypreds.append(y_pred)
    ys = torch.cat(ys)
    ypreds = torch.cat(ypreds)
    print(f'test acc: {(ys == ypreds).sum().item() / len(ys)}')
    return

In [75]:
TEXT = data.Field(sequential=True, lower=True, batch_first=True)
LABELS = data.Field(sequential=False, batch_first=True, use_vocab=False)

train, val, test = data.TabularDataset.splits(
    path='../ch06', train='train.txt',
    validation='valid.txt', test='test.txt', format='tsv',
    fields=[('TEXT', TEXT), ('LABEL', LABELS)])

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
train_iter, val_iter, test_iter = data.Iterator.splits(
    (train, val, test), batch_sizes=(64, 64, 64), device=device, repeat=False, sort=False)

TEXT.build_vocab(train, min_freq=2)
LABELS.build_vocab(train)

model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=4)
model = model.to(device)

optimizer = optim.SGD(model.parameters(), lr=0.01)

for epoch in tqdm(range(10)):
    losses = []
    model.train()
    for batch in train_iter:
        x, y = batch.TEXT, batch.LABEL
        loss, logit = model(input_ids=x, labels=y)
        model.zero_grad()
        loss.backward()
        optimizer.step()
        losses.append(loss.item())
        _, y_pred_train = torch.max(logit, 1)
    eval_net(model, test_iter, device)

  0%|          | 0/10 [00:36<?, ?it/s]


KeyboardInterrupt: 