- [Wiki](http://wiki.fast.ai/index.php/Lesson_3)
- [Deep Visualization Toolbox](https://github.com/yosinski/deep-visualization-toolbox)

# Dropoutの除去によるUnderfit対策

- training errorがvalidation errorよりも小さかったらunderfittingを疑う

1. vgg finetuned モデルの重みをロードする
2. conv layersとfc layersに分ける
    - vggはconv layersにdropoutを持たないため
3. fc layersからdropoutを除去する
    - dropoutの引数を0にする
    - 2倍の重みがアクティブになるので、元の重みを1/2にしてセットする
4. conv layersだけを使って推論する
5. その特徴量を使って、dropoutを除去したfc layersを学習する

In [None]:
import time
import os

import numpy as np
np.set_printoptions(precision=4, linewidth=100)
from matplotlib import pyplot as plt
from PIL import Image

import torch
import torchvision
from torch.autograd import Variable
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models
from torchvision import transforms, datasets
torch.set_printoptions(precision=4, linewidth=100)

## 1. finetunedモデルをロード

In [None]:
vgg16 = models.vgg16(pretrained=True)
vgg16

In [None]:
num_features = vgg16.classifier[6].in_features
modules = list(vgg16.classifier.children())
modules.pop()
modules.append(torch.nn.Linear(num_features, 2))
new_classifier = torch.nn.Sequential(*modules)
vgg16.classifier = new_classifier

In [None]:
vgg16.load_state_dict(torch.load("lesson1_finetuned.pt"))
model_finetuned = vgg16

In [None]:
# 置き換える層以外は再学習しないようにする
for param in vgg16.parameters():
    param.requires_grad = False

## 2, 3 Dropoutを除去

In [None]:
modules_without_dropout = []
for module in model_finetuned.classifier.children():
    if type(module) != nn.Dropout:
        modules_without_dropout.append(module)
new_classifier = nn.Sequential(*modules_without_dropout)
model_finetuned.classifier = new_classifier

In [None]:
model_finetuned

In [None]:
def half_weights(m):
    if type(m) == nn.Linear:
        print(m.weight)
        m.weight.data.mul_(1/2)
        print(m.weight)

In [None]:
model_finetuned.classifier.apply(half_weights)

In [None]:
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomSizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'valid': transforms.Compose([
        transforms.Scale(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

path = "data/dogcats/"
batch_size = 12 # バッチサイズが大きいとGPUのメモリに乗らないので注意
image_datasets = {x: datasets.ImageFolder(os.path.join(path, x),
                                          data_transforms[x])
                  for x in ['train', 'valid']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'valid']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'valid']}
class_names = image_datasets['train'].classes
print(dataset_sizes, class_names)
use_gpu = torch.cuda.is_available()

In [None]:
if use_gpu:
    model_finetuned = model_finetuned.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_finetuned.classifier.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = model.state_dict()
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'valid']:
            if phase == 'train':
                scheduler.step()
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for data in dataloaders[phase]:
                # get the inputs
                inputs, labels = data

                # wrap them in Variable
                if use_gpu:
                    inputs = Variable(inputs.cuda())
                    labels = Variable(labels.cuda())
                else:
                    inputs, labels = Variable(inputs), Variable(labels)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                outputs = model(inputs)
                _, preds = torch.max(outputs.data, 1)
                loss = criterion(outputs, labels)

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                # statistics
                running_loss += loss.data[0] # loss.dataはsize 1 のTensorなので[0]を取る
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            # 最も良いモデルの重みを変数に保持
            if phase == 'valid' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = model.state_dict()

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [None]:
model_without_dropout = train_model(model_finetuned, criterion, optimizer, num_epochs=1)

# Data augmentation

直感的には、データとして含まれていてもおかしくないような加工をすれば良い

- torchvision.transforms.RandomHorizontalFlip
- torchvision.transforms.RandomRotation
  - Pytorch 0.4から登場

In [None]:
%matplotlib inline

In [None]:
path = "data/dogcats/sample"

In [None]:
# http://pytorch.org/docs/master/torchvision/transforms.html
data_transform = transforms.Compose([
        transforms.RandomSizedCrop(224), # 画像のサイズを合わせ、その値はpretrainedモデルに合わせる
        transforms.RandomHorizontalFlip()
    ])
dataset = datasets.ImageFolder(root=path+"train", transform=data_transform)

In [None]:
dataset[0][0]

## Batch normalization

all modern networks should use batchnorm, or something equivalent

1. Adding batchnorm to a model can result in 10x or more improvements in training speed
2. Because normalization greatly reduces the ability of a small number of outlying inputs to over-influence the training, it also tends to reduce overfitting.

- VGGはBatch normalizationを使っていないので、足した場合はすべての重みを最初からImageNetで学習する必要がある
  - その後、dog/cat分類用にtransfer learningを行う
- torch.nn.BatchNorm1dを使用する  