In [3]:
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
import tqdm
from sklearn.model_selection import train_test_split

from torchvision import models
from torchvision.datasets import ImageFolder
from torchvision import transforms
print(torch.__version__)
net = models.vgg16(pretrained=True)


1.10.0


In [4]:
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
        
    def forward(self,x):
        return x
    

In [5]:
#net.avgpool = Identity()
#net.classifier[-1] = nn.Linear(512,20)
print(net)


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [6]:
img = ImageFolder(
    "./face",
    transform=transforms.Compose([
      transforms.Resize(299),
      transforms.CenterCrop(299),
      transforms.ToTensor()]    
))

img_class = [str(n) for n in range(20)]

train, test = train_test_split(img, test_size=0.3)

train_loader = DataLoader(
    train,batch_size = 5 , shuffle=True)
test_loader = DataLoader(
    test,batch_size=5,shuffle=False)
print(len(train),len(test))
print(train_loader)

708 304
<torch.utils.data.dataloader.DataLoader object at 0xffff65425b50>


In [7]:
def eval_net(net, data_loader, device="cpu"):
    # DropoutやBatchNormを無効化
    net.eval()
    ys = []
    ypreds = []
    for x, y in data_loader:
        # toメソッドで計算を実行するデバイスに転送する
        x = x.to(device)
        y = y.to(device)
        # 確率が最大のクラスを予測(リスト2.14参照)
        # ここではforward（推論）の計算だけなので自動微分に
        # 必要な処理はoffにして余計な計算を省く
        with torch.no_grad():
            _, y_pred = net(x).max(1)
        ys.append(y)
        ypreds.append(y_pred)
    # ミニバッチごとの予測結果などを1つにまとめる
    ys = torch.cat(ys)
    ypreds = torch.cat(ypreds)
    # 予測精度を計算
    acc = (ys == ypreds).float().sum() / len(ys)
    return acc.item()

def train_net(net, train_loader, test_loader,
              only_fc=True,
              optimizer_cls=optim.Adam,
              loss_fn=nn.CrossEntropyLoss(),
              n_iter=10, device="cpu"):
    train_losses = []
    train_acc = []
    val_acc = []
    if only_fc:
        # 最後の線形層のパラメータのみを、
        # optimizerに渡す
        optimizer = optimizer_cls(net.parameters())
    else:
        optimizer = optimizer_cls(net.parameters())
    for epoch in range(n_iter):
        running_loss = 0.0
        # ネットワークを訓練モードにする
        net.train()
        n = 0
        n_acc = 0
        # 非常に時間がかかるのでtqdmを使用してプログレスバーを出す
        for i, (xx, yy) in tqdm.tqdm(enumerate(train_loader),
            total=len(train_loader)):
            xx = xx.to(device)
            yy = yy.to(device)
            
            h = net(xx)
            loss = loss_fn(h, yy)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            n += len(xx)
            _, y_pred = h.max(1)
            n_acc += (yy == y_pred).float().sum().item()
        train_losses.append(running_loss / i)
        # 訓練データの予測精度
        train_acc.append(n_acc / n)
        # 検証データの予測精度
        val_acc.append(eval_net(net, test_loader, device))
        # このepochでの結果を表示
        print(epoch, train_losses[-1], train_acc[-1],
              val_acc[-1], flush=True)

In [None]:
train_net(net, train_loader, test_loader, n_iter=20, device="cpu")

  0%|          | 0/142 [00:00<?, ?it/s]

tensor([[[[0.5922, 0.5882, 0.5843,  ..., 0.6431, 0.6431, 0.6392],
          [0.5922, 0.5882, 0.5882,  ..., 0.6471, 0.6471, 0.6471],
          [0.5882, 0.5843, 0.5843,  ..., 0.6431, 0.6431, 0.6431],
          ...,
          [0.6863, 0.6824, 0.6902,  ..., 0.4196, 0.4196, 0.4157],
          [0.6941, 0.6941, 0.7059,  ..., 0.4157, 0.4157, 0.4118],
          [0.7686, 0.7686, 0.7725,  ..., 0.4196, 0.4196, 0.4157]],

         [[0.5922, 0.5882, 0.5843,  ..., 0.6431, 0.6431, 0.6392],
          [0.5922, 0.5882, 0.5882,  ..., 0.6471, 0.6471, 0.6471],
          [0.5882, 0.5843, 0.5843,  ..., 0.6431, 0.6431, 0.6431],
          ...,
          [0.6863, 0.6824, 0.6902,  ..., 0.4196, 0.4196, 0.4157],
          [0.6941, 0.6941, 0.7059,  ..., 0.4157, 0.4157, 0.4118],
          [0.7686, 0.7686, 0.7725,  ..., 0.4196, 0.4196, 0.4157]],

         [[0.5922, 0.5882, 0.5843,  ..., 0.6431, 0.6431, 0.6392],
          [0.5922, 0.5882, 0.5882,  ..., 0.6471, 0.6471, 0.6471],
          [0.5882, 0.5843, 0.5843,  ..., 0

  1%|          | 1/142 [00:07<17:38,  7.51s/it]

tensor([[[[0.5843, 0.5843, 0.5843,  ..., 0.6706, 0.6627, 0.6588],
          [0.5843, 0.5843, 0.5843,  ..., 0.6706, 0.6667, 0.6667],
          [0.5804, 0.5804, 0.5843,  ..., 0.6667, 0.6627, 0.6627],
          ...,
          [0.5255, 0.5255, 0.5294,  ..., 0.4784, 0.4745, 0.4745],
          [0.5216, 0.5255, 0.5294,  ..., 0.4824, 0.4784, 0.4745],
          [0.5216, 0.5255, 0.5294,  ..., 0.4824, 0.4745, 0.4706]],

         [[0.5843, 0.5843, 0.5843,  ..., 0.6706, 0.6627, 0.6588],
          [0.5843, 0.5843, 0.5843,  ..., 0.6706, 0.6667, 0.6667],
          [0.5804, 0.5804, 0.5843,  ..., 0.6667, 0.6627, 0.6627],
          ...,
          [0.5255, 0.5255, 0.5294,  ..., 0.4784, 0.4745, 0.4745],
          [0.5216, 0.5255, 0.5294,  ..., 0.4824, 0.4784, 0.4745],
          [0.5216, 0.5255, 0.5294,  ..., 0.4824, 0.4745, 0.4706]],

         [[0.5843, 0.5843, 0.5843,  ..., 0.6706, 0.6627, 0.6588],
          [0.5843, 0.5843, 0.5843,  ..., 0.6706, 0.6667, 0.6667],
          [0.5804, 0.5804, 0.5843,  ..., 0