# Lesson5

- [data/field.py](https://github.com/pytorch/text/blob/master/torchtext/data/field.py)
- [text/test/imdb.py](https://github.com/pytorch/text/blob/master/test/imdb.py)

In [1]:
import torch
from tqdm import tqdm

In [2]:
import time
import os

import numpy as np
np.set_printoptions(precision=4, linewidth=100)
from matplotlib import pyplot as plt
import torch
from torch import nn
from torch import optim
from torch.optim import lr_scheduler
import torchvision
from torch.autograd import Variable
import torchvision.models as models
from torchvision import transforms, datasets

In [3]:
from torchtext import data
from torchtext import datasets
from torchtext.vocab import GloVe

- fix_length: そのlengthまでpadding
- pad_first: 先頭にpaddingするか
- batch_first: batchのdimensionを最初に加えるか
- sequential: tokenizeするかどうか
  - LABELはtokenize不要だが、batch_size x 1 の次元にするためにTrueにする

In [4]:
# Approach 1:
# set up fields (preprocessing pipelineを定義)
TEXT = data.Field(lower=True, fix_length=500, batch_first=True, pad_first=True)
LABEL = data.Field(sequential=True, pad_token=None, unk_token=None, batch_first=True) # vocabularyに反映されてしまうため、unkとpadをNoneに
# 
# make splits for data
train, test = datasets.IMDB.splits(TEXT, LABEL)

In [5]:
# print information about the data
print('train.fields', train.fields)
print('len(train)', len(train))
print('vars(train[0])', vars(train[0]))
print('len(test)', len(test))

train.fields {'label': <torchtext.data.field.Field object at 0x7f2e2dc8cfd0>, 'text': <torchtext.data.field.Field object at 0x7f2e2dc8e048>}
len(train) 25000
vars(train[0]) {'label': ['pos'], 'text': ['master', 'cinéaste', 'alain', 'resnais', 'likes', 'to', 'work', 'with', 'those', 'actors', 'who', 'are', 'a', 'part', 'of', 'his', 'family.in', 'this', 'film', 'too', 'we', 'see', "resnais'", 'family', 'members', 'like', 'pierre', 'arditi,', 'sabine', 'azema,', 'andré', 'dussolier', 'and', 'fanny', 'ardant', 'dealing', 'with', 'serious', 'themes', 'like', 'death,religion,suicide,love', 'and', 'their', 'overall', 'implications', 'on', 'our', 'daily', 'lives.the', 'formal', 'nature', 'of', 'relationship', 'shared', 'by', 'these', 'people', 'is', 'evident', 'as', 'even', 'friends,', 'they', 'address', 'each', 'other', 'using', 'a', 'formal', 'you.in', '1984,while', 'making', "l'amour", 'à', 'mort,resnais', 'dealt', 'with', 'time,memory', 'and', 'space', 'to', 'unravel', 'the', 'mysteries', 

In [6]:
# build the vocabulary
TEXT.build_vocab(train, max_size=4998) # padとunkを考慮 (オプションについてはVocabのコンストラクタを参照)
LABEL.build_vocab(train)

In [8]:
print('len(TEXT.vocab)', len(TEXT.vocab))
print('TEXT.vocab.vectors.size()', TEXT.vocab.vectors) # build_vocabのvecotrs引数に何も渡していないので、単にtermとindexの対応が得られる

len(TEXT.vocab) 5000
TEXT.vocab.vectors.size() None


In [9]:
print(TEXT.vocab.itos[11])
print(LABEL.vocab.itos[0], LABEL.vocab.itos[1])

that
neg pos


In [77]:
# make iterator for splits
train_iter, test_iter = data.BucketIterator.splits((train, test), batch_size=512, device=0, repeat=False)

In [78]:
batch = next(iter(train_iter))
print(batch.text)
print(batch.label)

Variable containing:
    1     1     1  ...     89     5     0
    1     1     1  ...   4180   917     0
    1     1     1  ...      0     4     0
       ...          ⋱          ...       
    1     1     1  ...      0    13     0
    1     1     1  ...    249   563     0
    1     1     1  ...      6   893     0
[torch.cuda.LongTensor of size 512x500 (GPU 0)]

Variable containing:
    0
    0
    0
    0
    0
    0
    0
    0
    1
    1
    1
    0
    0
    1
    1
    0
    0
    0
    0
    0
    1
    0
    1
    1
    1
    1
    0
    1
    0
    1
    0
    1
    0
    0
    0
    0
    1
    1
    0
    0
    0
    0
    0
    1
    0
    1
    1
    0
    0
    0
    1
    1
    1
    0
    0
    0
    1
    1
    1
    0
    1
    0
    1
    0
    1
    1
    0
    0
    0
    1
    0
    0
    1
    1
    0
    1
    1
    0
    1
    0
    1
    0
    0
    0
    0
    1
    1
    1
    1
    1
    1
    0
    0
    0
    1
    1
    0
    1
    0
    1
    0
    1
   

In [79]:
dataloaders = {'train': train_iter, 'test': test_iter}
dataset_sizes = {'train': len(train), 'test': len(test)}

## CNN

- 画像と違って1次元なので、Conv1dを用いる
- CNNのin_channelsをembeddingの各次元とする
- 最終出力は1次元で、[0, 1]の値とする
  - よってこれまでのsoftmaxではなくsigmoidにし、loss functionもbinary cross entropyにする

In [22]:
import time
import os

import numpy as np
np.set_printoptions(precision=4, linewidth=100)
from matplotlib import pyplot as plt
from PIL import Image

import torch
import torchvision
from torch.autograd import Variable
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models
from torchvision import transforms, datasets
torch.set_printoptions(precision=4, linewidth=100)

In [81]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.emb = nn.Embedding(5000, 32) # 32次元のembeddingにする
        self.conv1 = nn.Conv1d(32, 64, kernel_size=5) 
        self.bn1 = nn.BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True)
        # 500x32 -> 496x64 (paddingしていないので落ちる) -> 248x64 (maxpoolのkernel sizeが(2,2)のため)
        self.fc1 = nn.Linear(15872, 100)
        self.fc2 = nn.Linear(100, 1)
        self.sig = nn.Sigmoid()
    def forward(self, x):
        x = self.emb(x)
        x = x.transpose(1, 2) # N x seq_size x embedding_sizeになっているので、N x embedding_size x seq_size に変換する
        x = F.dropout(x, training=self.training)
        x = F.relu(F.max_pool1d(self.conv1(x), 2)) # max_pool1dに
        x = self.bn1(x)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return self.sig(x)
    
    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [82]:
model = Net()

In [83]:
use_gpu = True
if use_gpu:
    model = model.cuda()
    
criterion = nn.BCELoss()
# optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
optimizer = optim.Adam(model.parameters(), lr=0.001)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [10]:
def train_model(model, criterion, optimizer, scheduler=None, num_epochs=25):
    since = time.time()

    best_model_wts = model.state_dict()
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        for phase in ['train', 'test']:
            if phase == 'train':
                if scheduler:
                    scheduler.step()
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for data in tqdm(dataloaders[phase]):
                # get the inputs
                inputs = data.text
                labels = data.label.float()

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                outputs = model(inputs)
                preds = outputs.round() # 四捨五入して予測
                loss = criterion(outputs, labels)

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                # statistics
                running_loss += loss.data[0]
                running_corrects += torch.sum(preds.data == labels.data) # preds.dataに変更

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            # 最も良いモデルの重みを変数に保持
            if phase == 'test' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = model.state_dict()

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [85]:
model = train_model(model, criterion, optimizer, num_epochs=5)

  0%|          | 0/49 [00:00<?, ?it/s]

Epoch 0/4
----------


100%|██████████| 49/49 [00:14<00:00,  3.36it/s]
  2%|▏         | 1/49 [00:00<00:05,  9.24it/s]

train Loss: 0.0014 Acc: 0.5052


100%|██████████| 49/49 [00:04<00:00, 10.66it/s]
  0%|          | 0/49 [00:00<?, ?it/s]

test Loss: 0.0016 Acc: 0.5080

Epoch 1/4
----------


100%|██████████| 49/49 [00:15<00:00,  3.26it/s]
  2%|▏         | 1/49 [00:00<00:05,  9.13it/s]

train Loss: 0.0013 Acc: 0.5500


100%|██████████| 49/49 [00:04<00:00, 11.20it/s]
  0%|          | 0/49 [00:00<?, ?it/s]

test Loss: 0.0017 Acc: 0.5178

Epoch 2/4
----------


100%|██████████| 49/49 [00:14<00:00,  3.38it/s]
  2%|▏         | 1/49 [00:00<00:05,  9.16it/s]

train Loss: 0.0013 Acc: 0.5984


100%|██████████| 49/49 [00:04<00:00, 11.19it/s]
  0%|          | 0/49 [00:00<?, ?it/s]

test Loss: 0.0015 Acc: 0.5640

Epoch 3/4
----------


100%|██████████| 49/49 [00:14<00:00,  3.40it/s]
  2%|▏         | 1/49 [00:00<00:05,  9.23it/s]

train Loss: 0.0012 Acc: 0.6316


100%|██████████| 49/49 [00:04<00:00, 10.89it/s]
  0%|          | 0/49 [00:00<?, ?it/s]

test Loss: 0.0014 Acc: 0.6428

Epoch 4/4
----------


100%|██████████| 49/49 [00:14<00:00,  3.35it/s]
  2%|▏         | 1/49 [00:00<00:05,  9.18it/s]

train Loss: 0.0012 Acc: 0.6698


100%|██████████| 49/49 [00:04<00:00, 10.95it/s]

test Loss: 0.0015 Acc: 0.6376

Training complete in 1m 36s
Best val Acc: 0.642840





## Glove

- 自前でEmbeddingを作成してもうまくいかないので、Gloveの重みを活用する

In [12]:
from torchtext import data
from torchtext import datasets
from torchtext.vocab import GloVe

# Approach 1:
# set up fields
TEXT = data.Field(lower=True, fix_length=500, batch_first=True, pad_first=True)
LABEL = data.Field(sequential=True, batch_first=True, pad_token=None, unk_token=None) # vocabularyに反映されてしまうため、unkとpadをNonenに

# make splits for data
train, test = datasets.IMDB.splits(TEXT, LABEL)

In [13]:
# build the vocabulary
TEXT.build_vocab(train, vectors=GloVe(name='6B', dim=300)) # GloVeのembeddingを使用してvocabularyを作成->各termに対応するembeddingが得られる
LABEL.build_vocab(train)
print('len(TEXT.vocab)', len(TEXT.vocab))
print('TEXT.vocab.vectors.size()', TEXT.vocab.vectors.size())
TEXT.vocab.vectors

len(TEXT.vocab) 251639
TEXT.vocab.vectors.size() torch.Size([251639, 300])



 0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
 0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
 0.0466  0.2132 -0.0074  ...   0.0091 -0.2099  0.0539
          ...             ⋱             ...          
 0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
 0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
 0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
[torch.FloatTensor of size 251639x300]

In [14]:
print(TEXT.vocab.itos[11])
print(LABEL.vocab.itos[0], LABEL.vocab.itos[1])

that
neg pos


In [15]:
# make iterator for splits
train_iter, test_iter = data.BucketIterator.splits((train, test), batch_size=512, device=0, repeat=False)

In [16]:
dataloaders = {'train': train_iter, 'test': test_iter}
dataset_sizes = {'train': len(train), 'test': len(test)}

In [17]:
batch = next(iter(train_iter))
print(batch.text)
print(batch.label)

Variable containing:
 1.0000e+00  1.0000e+00  1.0000e+00  ...   2.5690e+03  5.3453e+04  8.3310e+04
 1.0000e+00  1.0000e+00  1.0000e+00  ...   6.5816e+04  1.3000e+01  7.1100e+02
 1.0000e+00  1.0000e+00  1.0000e+00  ...   1.6000e+01  2.0000e+00  2.2646e+04
                ...                   ⋱                   ...                
 1.0000e+00  1.0000e+00  1.0000e+00  ...   4.8000e+01  3.2000e+01  7.4110e+04
 1.0000e+00  1.0000e+00  1.0000e+00  ...   1.0800e+02  1.3000e+01  7.3049e+04
 1.0000e+00  1.0000e+00  1.0000e+00  ...   3.3100e+02  8.1000e+01  3.4160e+03
[torch.cuda.LongTensor of size 512x500 (GPU 0)]

Variable containing:
    0
    0
    0
    0
    0
    0
    1
    0
    1
    0
    0
    1
    1
    0
    1
    0
    1
    1
    1
    1
    1
    0
    0
    0
    1
    1
    0
    1
    0
    0
    0
    0
    0
    1
    0
    0
    1
    0
    1
    0
    1
    0
    1
    1
    1
    1
    1
    0
    0
    0
    0
    0
    1
    0
    0
    0
    0
    1
    0
    1
   

In [18]:
class NetGlove(nn.Module):
    def __init__(self, glove_weight):
        super(NetGlove, self).__init__()
        self.emb = nn.Embedding(251639, 300)
        self.emb.weight.data.copy_(glove_weight) # Gloveの重みをsetする
        self.conv1 = nn.Conv1d(300, 600, kernel_size=5) 
        self.bn1 = nn.BatchNorm1d(600, eps=1e-05, momentum=0.1, affine=True)
        # 500x32 -> 496x600 -> 248x600
        self.fc1 = nn.Linear(148800, 100)
        self.fc2 = nn.Linear(100, 1)
        self.sig = nn.Sigmoid()
    def forward(self, x):
        x = self.emb(x)
        x = x.transpose(1, 2) # N x seq_size x embedding_sizeになっているので、N x embedding_size x seq_size に変換する
        x = F.dropout(x, training=self.training)
        x = F.relu(F.max_pool1d(self.conv1(x), 2)) # max_pool1dに
        x = self.bn1(x)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return self.sig(x)
    
    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [19]:
model = NetGlove(TEXT.vocab.vectors)

In [20]:
use_gpu = True
if use_gpu:
    model = model.cuda()
    
criterion = nn.BCELoss()
# optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
optimizer = optim.Adam(model.parameters(), lr=0.001)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [23]:
model = train_model(model, criterion, optimizer, num_epochs=5)

  0%|          | 0/49 [00:00<?, ?it/s]

Epoch 0/4
----------


100%|██████████| 49/49 [01:27<00:00,  1.78s/it]
  0%|          | 0/49 [00:00<?, ?it/s]

train Loss: 0.0025 Acc: 0.5145


100%|██████████| 49/49 [00:23<00:00,  2.13it/s]
  0%|          | 0/49 [00:00<?, ?it/s]

test Loss: 0.0024 Acc: 0.5202

Epoch 1/4
----------


100%|██████████| 49/49 [01:27<00:00,  1.78s/it]
  0%|          | 0/49 [00:00<?, ?it/s]

train Loss: 0.0013 Acc: 0.5436


100%|██████████| 49/49 [00:22<00:00,  2.15it/s]
  0%|          | 0/49 [00:00<?, ?it/s]

test Loss: 0.0042 Acc: 0.5918

Epoch 2/4
----------


100%|██████████| 49/49 [01:27<00:00,  1.78s/it]
  0%|          | 0/49 [00:00<?, ?it/s]

train Loss: 0.0011 Acc: 0.6970


100%|██████████| 49/49 [00:22<00:00,  2.17it/s]
  0%|          | 0/49 [00:00<?, ?it/s]

test Loss: 0.0037 Acc: 0.5961

Epoch 3/4
----------


100%|██████████| 49/49 [01:26<00:00,  1.77s/it]
  0%|          | 0/49 [00:00<?, ?it/s]

train Loss: 0.0008 Acc: 0.7806


100%|██████████| 49/49 [00:22<00:00,  2.16it/s]
  0%|          | 0/49 [00:00<?, ?it/s]

test Loss: 0.0028 Acc: 0.6642

Epoch 4/4
----------


100%|██████████| 49/49 [01:27<00:00,  1.78s/it]
  0%|          | 0/49 [00:00<?, ?it/s]

train Loss: 0.0006 Acc: 0.8622


100%|██████████| 49/49 [00:22<00:00,  2.16it/s]

test Loss: 0.0021 Acc: 0.7275

Training complete in 9m 10s
Best val Acc: 0.727480





## RNN

[A Tutorial on Torchtext](http://anie.me/On-Torchtext/)

- variable lengthの入力を作成
  - padding済みから作成: torch.nn.utils.rnn.pack_padded_sequence
    - 引数として別途各lengthを与えることで、どこまでがpadではないか判別
  - sequenceから作成: torch.nn.utils.rnn.pack_sequence

In [54]:
from torchtext import data
from torchtext import datasets
from torchtext.vocab import GloVe

# Approach 1:
# set up fields
TEXT = data.Field(lower=True, batch_first=True, include_lengths=True) # lengthを含める
LABEL = data.Field(sequential=True, batch_first=True, pad_token=None, unk_token=None) # vocabularyに反映されてしまうため、unkとpadをNonenに

# make splits for data
train, test = datasets.IMDB.splits(TEXT, LABEL)

In [55]:
print('train.fields', train.fields)
print('len(train)', len(train))
print('vars(train[0])', vars(train[0]))

train.fields {'label': <torchtext.data.field.Field object at 0x7f2d9cedb4a8>, 'text': <torchtext.data.field.Field object at 0x7f2d9cedb5c0>}
len(train) 25000
vars(train[0]) {'label': ['pos'], 'text': ['master', 'cinéaste', 'alain', 'resnais', 'likes', 'to', 'work', 'with', 'those', 'actors', 'who', 'are', 'a', 'part', 'of', 'his', 'family.in', 'this', 'film', 'too', 'we', 'see', "resnais'", 'family', 'members', 'like', 'pierre', 'arditi,', 'sabine', 'azema,', 'andré', 'dussolier', 'and', 'fanny', 'ardant', 'dealing', 'with', 'serious', 'themes', 'like', 'death,religion,suicide,love', 'and', 'their', 'overall', 'implications', 'on', 'our', 'daily', 'lives.the', 'formal', 'nature', 'of', 'relationship', 'shared', 'by', 'these', 'people', 'is', 'evident', 'as', 'even', 'friends,', 'they', 'address', 'each', 'other', 'using', 'a', 'formal', 'you.in', '1984,while', 'making', "l'amour", 'à', 'mort,resnais', 'dealt', 'with', 'time,memory', 'and', 'space', 'to', 'unravel', 'the', 'mysteries', 

In [56]:
# build the vocabulary
TEXT.build_vocab(train, vectors=GloVe(name='6B', dim=300)) # GloVeのembeddingを使用してvocabularyを作成->各termに対応するembeddingが得られる
LABEL.build_vocab(train)
print('len(TEXT.vocab)', len(TEXT.vocab))
print('TEXT.vocab.vectors.size()', TEXT.vocab.vectors.size())
TEXT.vocab.vectors

len(TEXT.vocab) 251639
TEXT.vocab.vectors.size() torch.Size([251639, 300])



 0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
 0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
 0.0466  0.2132 -0.0074  ...   0.0091 -0.2099  0.0539
          ...             ⋱             ...          
 0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
 0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
 0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
[torch.FloatTensor of size 251639x300]

In [57]:
dataloaders = {'train': train_iter, 'test': test_iter}
dataset_sizes = {'train': len(train), 'test': len(test)}

In [58]:
batch = next(iter(train_iter))
print(batch.text)
print(batch.label)

Variable containing:
 1.0000e+00  1.0000e+00  1.0000e+00  ...   2.0400e+02  1.8900e+02  2.2716e+04
 1.0000e+00  1.0000e+00  1.0000e+00  ...   4.0000e+00  3.9000e+01  2.8520e+03
 1.0000e+00  1.0000e+00  1.0000e+00  ...   4.0000e+00  1.5570e+05  4.6610e+04
                ...                   ⋱                   ...                
 8.2000e+01  5.0000e+00  3.5000e+01  ...   1.0000e+02  5.0000e+00  1.8800e+02
 6.2780e+03  9.0000e+00  6.2000e+01  ...   3.3000e+01  4.3700e+02  1.2000e+01
 1.3300e+02  2.0000e+00  2.2500e+02  ...   7.3600e+02  1.8321e+04  1.2499e+04
[torch.cuda.LongTensor of size 512x500 (GPU 0)]

Variable containing:
    1
    0
    1
    1
    1
    1
    0
    1
    0
    0
    1
    1
    1
    0
    1
    1
    1
    0
    0
    1
    0
    0
    0
    0
    0
    1
    1
    0
    1
    0
    0
    1
    0
    0
    1
    0
    1
    1
    0
    0
    1
    1
    1
    0
    1
    1
    0
    1
    0
    0
    1
    0
    0
    0
    0
    1
    1
    0
    0
    1
   

 c nmbb## class NetRNN(nn.Module):
    def __init__(self, glove_weight):
        super(NetRNN, self).__init__()
        self.emb = nn.Embedding(251639, 300)
        self.emb.weight.data.copy_(glove_weight) # Gloveの重みをsetする
        self.lstm = nn.LSTM(input_size=300, hidden_size=50, num_layers=1, dropout=0.5)
        self.fc = nn.Linear(50, 1)
        self.sig = nn.Sigmoid()
    def forward(self, x, hidden=None):
        x = self.emb(x)
        if lengths is not None:
            lengths = lengths.view(-1).tolist()
            packed_emb = nn.utils.rnn.pack_padded_sequence(embed_input, lengths)
        
        x = x.transpose(0, 1) # N x seq_size x embedding_sizeになっているので、seq_size x N x embedding_size に変換する
        # input (seq_len, batch, input_size)
        output, (h_n, c_n) = self.lstm(x, hidden)
        x = h_n[-1].squeeze(0) # seq_len, batch, hidden_size * num_directions なので[-1]を取る
        x = self.fc(x)
        return self.sig(x)

In [110]:
model = NetRNN(TEXT.vocab.vectors)

In [111]:
use_gpu = True
if use_gpu:
    model = model.cuda()
    
criterion = nn.BCELoss()
# optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
optimizer = optim.Adam(model.parameters(), lr=0.001)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

CuDNNError: 8: b'CUDNN_STATUS_EXECUTION_FAILED'

In [50]:
model = train_model(model, criterion, optimizer, num_epochs=15)

  0%|          | 0/49 [00:00<?, ?it/s]

Epoch 0/14
----------


100%|██████████| 49/49 [00:26<00:00,  1.82it/s]
  2%|▏         | 1/49 [00:00<00:07,  6.06it/s]

train Loss: 0.0014 Acc: 0.5012


100%|██████████| 49/49 [00:07<00:00,  6.96it/s]
  0%|          | 0/49 [00:00<?, ?it/s]

test Loss: 0.0014 Acc: 0.5006

Epoch 1/14
----------


100%|██████████| 49/49 [00:26<00:00,  1.82it/s]
  2%|▏         | 1/49 [00:00<00:07,  6.08it/s]

train Loss: 0.0014 Acc: 0.5016


100%|██████████| 49/49 [00:06<00:00,  7.02it/s]
  0%|          | 0/49 [00:00<?, ?it/s]

test Loss: 0.0014 Acc: 0.5022

Epoch 2/14
----------


100%|██████████| 49/49 [00:26<00:00,  1.82it/s]
  2%|▏         | 1/49 [00:00<00:07,  6.25it/s]

train Loss: 0.0014 Acc: 0.5011


100%|██████████| 49/49 [00:06<00:00,  7.02it/s]
  0%|          | 0/49 [00:00<?, ?it/s]

test Loss: 0.0014 Acc: 0.5031

Epoch 3/14
----------


100%|██████████| 49/49 [00:26<00:00,  1.82it/s]
  2%|▏         | 1/49 [00:00<00:07,  6.28it/s]

train Loss: 0.0014 Acc: 0.5024


100%|██████████| 49/49 [00:07<00:00,  6.96it/s]
  0%|          | 0/49 [00:00<?, ?it/s]

test Loss: 0.0014 Acc: 0.5030

Epoch 4/14
----------


100%|██████████| 49/49 [00:27<00:00,  1.80it/s]
  2%|▏         | 1/49 [00:00<00:07,  6.26it/s]

train Loss: 0.0014 Acc: 0.5052


100%|██████████| 49/49 [00:06<00:00,  7.04it/s]
  0%|          | 0/49 [00:00<?, ?it/s]

test Loss: 0.0014 Acc: 0.5054

Epoch 5/14
----------


100%|██████████| 49/49 [00:26<00:00,  1.82it/s]
  2%|▏         | 1/49 [00:00<00:07,  6.27it/s]

train Loss: 0.0014 Acc: 0.5052


100%|██████████| 49/49 [00:06<00:00,  7.03it/s]
  0%|          | 0/49 [00:00<?, ?it/s]

test Loss: 0.0014 Acc: 0.5058

Epoch 6/14
----------


100%|██████████| 49/49 [00:26<00:00,  1.82it/s]
  2%|▏         | 1/49 [00:00<00:07,  6.21it/s]

train Loss: 0.0014 Acc: 0.5071


100%|██████████| 49/49 [00:07<00:00,  6.96it/s]
  0%|          | 0/49 [00:00<?, ?it/s]

test Loss: 0.0014 Acc: 0.5079

Epoch 7/14
----------


100%|██████████| 49/49 [00:26<00:00,  1.83it/s]
  2%|▏         | 1/49 [00:00<00:07,  6.03it/s]

train Loss: 0.0014 Acc: 0.5078


100%|██████████| 49/49 [00:06<00:00,  7.00it/s]
  0%|          | 0/49 [00:00<?, ?it/s]

test Loss: 0.0014 Acc: 0.5080

Epoch 8/14
----------


100%|██████████| 49/49 [00:26<00:00,  1.83it/s]
  2%|▏         | 1/49 [00:00<00:07,  6.22it/s]

train Loss: 0.0014 Acc: 0.5074


100%|██████████| 49/49 [00:07<00:00,  6.98it/s]
  0%|          | 0/49 [00:00<?, ?it/s]

test Loss: 0.0014 Acc: 0.5087

Epoch 9/14
----------


100%|██████████| 49/49 [00:26<00:00,  1.82it/s]
  2%|▏         | 1/49 [00:00<00:07,  6.27it/s]

train Loss: 0.0014 Acc: 0.5075


100%|██████████| 49/49 [00:06<00:00,  7.04it/s]
  0%|          | 0/49 [00:00<?, ?it/s]

test Loss: 0.0014 Acc: 0.5085

Epoch 10/14
----------


100%|██████████| 49/49 [00:26<00:00,  1.82it/s]
  2%|▏         | 1/49 [00:00<00:07,  6.26it/s]

train Loss: 0.0014 Acc: 0.5070


100%|██████████| 49/49 [00:06<00:00,  7.02it/s]
  0%|          | 0/49 [00:00<?, ?it/s]

test Loss: 0.0014 Acc: 0.5089

Epoch 11/14
----------


100%|██████████| 49/49 [00:26<00:00,  1.82it/s]
  2%|▏         | 1/49 [00:00<00:07,  6.16it/s]

train Loss: 0.0014 Acc: 0.5072


100%|██████████| 49/49 [00:06<00:00,  7.02it/s]
  0%|          | 0/49 [00:00<?, ?it/s]

test Loss: 0.0014 Acc: 0.5084

Epoch 12/14
----------


100%|██████████| 49/49 [00:26<00:00,  1.83it/s]
  2%|▏         | 1/49 [00:00<00:07,  6.28it/s]

train Loss: 0.0014 Acc: 0.5072


100%|██████████| 49/49 [00:07<00:00,  6.97it/s]
  0%|          | 0/49 [00:00<?, ?it/s]

test Loss: 0.0014 Acc: 0.5086

Epoch 13/14
----------


100%|██████████| 49/49 [00:27<00:00,  1.80it/s]
  2%|▏         | 1/49 [00:00<00:07,  6.28it/s]

train Loss: 0.0014 Acc: 0.5068


100%|██████████| 49/49 [00:06<00:00,  7.05it/s]
  0%|          | 0/49 [00:00<?, ?it/s]

test Loss: 0.0014 Acc: 0.5085

Epoch 14/14
----------


100%|██████████| 49/49 [00:26<00:00,  1.82it/s]
  2%|▏         | 1/49 [00:00<00:07,  6.28it/s]

train Loss: 0.0014 Acc: 0.5070


100%|██████████| 49/49 [00:06<00:00,  7.05it/s]

test Loss: 0.0014 Acc: 0.5086

Training complete in 8m 29s
Best val Acc: 0.508920



