# Lesson1, Lesson2

- [Lesson1](http://course.fast.ai/lessons/lesson1.html)
- [Lesson2](http://course.fast.ai/lessons/lesson2.html)
- [data](http://files.fast.ai/)
- [Git](https://github.com/fastai/courses/tree/master/deeplearning1)
- [Visualizing and Understanding Convolutional Networks](https://cs.nyu.edu/~fergus/papers/zeilerECCV2014.pdf)
- [Training a classifier](http://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html)
- [Deep Learning Building Blocks: Affine maps, non-linearities and objectives ~Softmax and Probabilities~](http://pytorch.org/tutorials/beginner/nlp/deep_learning_tutorial.html#softmax-and-probabilities)
- [Autograd mechanics](http://pytorch.org/docs/master/notes/autograd.html)
- Pytorch Forum
  - [How to modify the final FC layer based on the torch.model](https://discuss.pytorch.org/t/how-to-modify-the-final-fc-layer-based-on-the-torch-model/766)
  - [How to perform finetuning in Pytorch?](https://discuss.pytorch.org/t/how-to-perform-finetuning-in-pytorch/419)
  - [Given vgg16, how to remove pool5 layer and all the classify layers?](https://discuss.pytorch.org/t/given-vgg16-how-to-remove-pool5-layer-and-all-the-classify-layers-and-how-to-add-new-layers-to-that-while-keeping-the-pretrained-weights/1653)

In [None]:
%matplotlib inline

In [None]:
path = "data/dogscats/sample/"

In [None]:
import time
import os

import numpy as np
np.set_printoptions(precision=4, linewidth=100)
from matplotlib import pyplot as plt
import torch
import torchvision
from torch.autograd import Variable
import torchvision.models as models
from torchvision import transforms, datasets

## Pretrained modelの試行

http://pytorch.org/docs/master/torchvision/models.html に書いてあるように、pretrainedモデルに合わせてCropとNormalizeが必要

また、ImageFolderは以下のようなデータを想定

```
root/dog/xxx.png
root/dog/xxy.png
root/dog/xxz.png

root/cat/123.png
root/cat/nsdf3.png
root/cat/asd932_.png
```

In [None]:
# http://pytorch.org/docs/master/torchvision/transforms.html
data_transform = transforms.Compose([
        transforms.RandomSizedCrop(224), # 画像のサイズを合わせ、その値はpretrainedモデルに合わせる
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(), # PIL.ImageをTensorに
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
batch_size=12
dataset = datasets.ImageFolder(root=path+"train", transform=data_transform)
train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4)

In [None]:
class_names = dataset.classes # 自動的にディレクトリ名からclass名のリストを作成
print(class_names)

In [None]:
def imshow(inp, title=None):
    """Imshow for Tensor."""    
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean # 正規化をもとに戻す    
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated

# Get a batch of training data
inputs, classes = next(iter(train_loader))

# Make a grid from batch
# batchを結合したFloadTensorが戻り値
out = torchvision.utils.make_grid(inputs)

imshow(out, title=[class_names[x] for x in classes])

In [None]:
vgg16 = models.vgg16(pretrained=True)

In [None]:
for i, data in enumerate(train_loader, 0):
    images, labels = data
    outputs = vgg16(Variable(images))
    # get the index of the highest energy
    # (最大値のTensor, 最大値のindexのTensor) のタプルが戻り値
    _, predicted = torch.max(outputs.data, dim=1)
    print(predicted) # 1000分類される

## Pretrained モデルの出力をlinear層に掛けて学習

1. trainとvalidそれぞれで、vgg16で推論
2. そのデータを保存
3. Datasetを定義して、featuresとlabelsをDataloaderから得られるようにする
4. linearモデルを作成し、学習

### 手順1

In [None]:
vgg16 = models.vgg16(pretrained=True)

In [None]:
# どちらも正規化のみにする
data_transform = transforms.Compose([
        transforms.Scale(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
path = "data/dogcats/"
batch_size = 12 # バッチサイズが大きいとGPUのメモリに乗らないので注意
image_datasets = {x: datasets.ImageFolder(os.path.join(path, x), data_transform)
                  for x in ['train', 'valid']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'valid']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'valid']}
class_names = image_datasets['train'].classes
print(dataset_sizes, class_names)
use_gpu = torch.cuda.is_available()

In [None]:
if use_gpu:
    vgg16.cuda()

In [None]:
result_map = {'train': [], 'valid': []}
for phase in ['train', 'valid']:
    vgg16.train(False)
    for data in dataloaders[phase]:
        inputs, labels = data
        if use_gpu:
            inputs = Variable(inputs.cuda())
        else
            inputs = Variable(inputs)
        outputs = vgg16(inputs)
        result_map[phase].append(outputs.data.cpu())

### 手順2

In [None]:
feature_tensors = {x: torch.cat(result_map[x]) for x in ["train", "valid"]}

In [None]:
for x in ["train", "valid"]:
    torch.save(feature_tensors[x], "pretrain_features_{0}.pt".format(x))

### 手順3

In [None]:
from torch.utils.data import Dataset, DataLoader

class LinearDataset(Dataset):
    def __init__(self, image_folder, features):
        self.image_folder = image_folder
        self.features = features

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        _, label = self.image_folder.__getitem__(idx)
        return self.features[idx], label

In [None]:
path = "data/dogcats/"
batch_size = 12
# precomputeして特徴量を得ているので、ここでもdata augmentationをしないdata_transformを用いる
image_datasets = {x: datasets.ImageFolder(os.path.join(path, x), data_transform) for x in ['train', 'valid']}
linear_datasets = {x: LinearDataset(image_dataset[x], feature_tensors[x]) for x in ['train', 'valid']}
dataloaders = {x: torch.utils.data.DataLoader(linear_datasets[x], batch_size=batch_size,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'valid']}
dataset_sizes = {x: len(linear_datasets[x]) for x in ['train', 'valid']}
class_names = linear_datasets['train'].image_folder.classes
print(dataset_sizes, class_names)
use_gpu = torch.cuda.is_available()

### 手順4

In [None]:
fc = nn.Linear(1000, 2)
if use_gpu:
    fc = fc.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(fc.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [None]:
model_linear = train_model(fc, criterion, optimizer, exp_lr_scheduler, num_epochs=1)

## Finetune

[Transfer Learning Tutorial](http://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html)

- pretrained modelの最後の層だけ置き換えて学習する
- どの層から変更するからは試してみるしかないらしい (最後が一般的)

In [None]:
vgg16 = models.vgg16(pretrained=True)
vgg16

In [None]:
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomSizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'valid': transforms.Compose([
        transforms.Scale(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

path = "data/dogcats/"
batch_size = 12 # バッチサイズが大きいとGPUのメモリに乗らないので注意
image_datasets = {x: datasets.ImageFolder(os.path.join(path, x),
                                          data_transforms[x])
                  for x in ['train', 'valid']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'valid']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'valid']}
class_names = image_datasets['train'].classes
print(dataset_sizes, class_names)
use_gpu = torch.cuda.is_available()

In [None]:
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'valid']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'valid']}
class_names = image_datasets['train'].classes
print(dataset_sizes, class_names)
use_gpu = torch.cuda.is_available()

In [None]:
# 置き換える層以外は再学習しないようにする
for param in vgg16.parameters():
    param.requires_grad = False

In [None]:
num_features = vgg16.classifier[6].in_features
modules = list(vgg16.classifier.children())
modules.pop()
modules.append(torch.nn.Linear(num_features, 2))
new_classifier = torch.nn.Sequential(*modules)
vgg16.classifier = new_classifier

In [None]:
vgg16

In [None]:
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomSizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'valid': transforms.Compose([
        transforms.Scale(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

path = "data/dogcats/"
batch_size = 12 # バッチサイズが大きいとGPUのメモリに乗らないので注意
image_datasets = {x: datasets.ImageFolder(os.path.join(path, x),
                                          data_transforms[x])
                  for x in ['train', 'valid']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'valid']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'valid']}
class_names = image_datasets['train'].classes
print(dataset_sizes, class_names)
use_gpu = torch.cuda.is_available()

In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = model.state_dict()
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'valid']:
            if phase == 'train':
                scheduler.step()
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for data in dataloaders[phase]:
                # get the inputs
                inputs, labels = data

                # wrap them in Variable
                if use_gpu:
                    inputs = Variable(inputs.cuda())
                    labels = Variable(labels.cuda())
                else:
                    inputs, labels = Variable(inputs), Variable(labels)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                outputs = model(inputs)
                _, preds = torch.max(outputs.data, 1)
                loss = criterion(outputs, labels)

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                # statistics
                running_loss += loss.data[0] # loss.dataはsize 1 のTensorなので[0]を取る
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            # 最も良いモデルの重みを変数に保持
            if phase == 'valid' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = model.state_dict()

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [None]:
if use_gpu:
    vgg16 = vgg16.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(vgg16.classifier[6].parameters(), lr=0.001, momentum=0.9) # requires_grad=Falseのパラメーターを渡したらエラーが出た
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [None]:
model_finetuned = train_model(vgg16, criterion, optimizer, exp_lr_scheduler, num_epochs=1)

In [None]:
torch.save(model_finetuned.state_dict(), "lesson1_finetuned.pt")

## Kaggle提出

提出フォーマット

```
imageId,isDog
1242, .3984
3947, .1000
4539, .9082
2345, .0000
```

[Data Loading and Processing Tutorial](http://pytorch.org/tutorials/beginner/data_loading_tutorial.html)

を参考に、testデータ用のloaderを作成する

In [None]:
vgg16.load_state_dict(torch.load("lesson1_finetuned.pt"))
model_finetuned = vgg16

In [None]:
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

class KaggleDataset(Dataset):
    def __init__(self, root, transform=None):
        """
        Args:
            root (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.root = root
        self.transform = transform

    def __len__(self):
        return 12500 # ひとまず固定値(本当はディレクトリのファイル数を取れば良い)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root, "{0}.jpg".format(idx+1))
        image = Image.open(img_name) # ImageFolderと合わせてPIL.Image形式で読み込み
        if self.transform:
            image = self.transform(image)

        return image

In [None]:
data_transform = transforms.Compose([
        transforms.Scale(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
batch_size=12
path = "data/dogscats/"
dataset = KaggleDataset(root=path+"test1", transform=data_transform)
test_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=4)

In [None]:
use_gpu = False

In [None]:
import torch.nn.functional as F
model_finetuned.train(False)
if use_gpu:
    model_finetuned.cuda()
for data in test_loader:
    if use_gpu:
        inputs = Variable(data).cuda()
    else:
        inputs = Variable(data)
    outputs = F.softmax(model_finetuned(inputs))
    results.append(outputs.cpu().data)

result_tensor = torch.cat(results)
torch.save(result_tensor, "lesson1_result.pt")
np.savetxt("kaggle_output", result_tensor.numpy()[:,0])

In [None]:
output_data = np.dstack((np.asarray(range(1, 12501)), result_tensor.numpy()[:,1]))
np.savetxt("kaggle_output", output_data[0], delimiter=",", fmt=["%d", "%.10e"])