In [1]:
# 上位ディレクトリをパスに追加
import sys
sys.path.append('..')

In [9]:
from sklearn.metrics import confusion_matrix, classification_report
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import models
from tqdm import tqdm

from dataset import SceneDataset, make_datapath_list
from preprocess import ImageTransform

In [3]:
# 前処理用の定数
size = 224
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
batch_size = 32

In [4]:
# ファイルパス取得
train_list = make_datapath_list(phase='train', data_dir='../data/*/')
val_list = make_datapath_list(phase='val', data_dir='../data/*/')
print('train:', len(train_list), 'val:', len(val_list))

../data/*/train/*.jpg
../data/*/val/*.jpg
train: 4573 val: 938


In [7]:
# 前処理クラスとデータセット，データローダの定義
image_transform = ImageTransform(size, mean, std)

train_dataset = SceneDataset(train_list, image_transform, 'train')
val_dataset = SceneDataset(val_list, image_transform, 'val')

train_dataloader = DataLoader(train_dataset, batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size, shuffle=True)

dataloaders_dict = {'train': train_dataloader, 'val': val_dataloader}

In [8]:
# モデルと学習済み重みの読み込み
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
model_path = '../models/epoch_50.pth'

net = models.resnet18(pretrained=False)
net.fc = nn.Sequential(
        nn.Linear(512, 256),
        nn.ReLU(inplace=True),
        nn.Dropout(0.4),
        nn.Linear(256, 2),
        nn.Softmax(dim=1),  # 出力を確率にするために追加
    )

net.to(device)
print('device:', device)

# GPUで学習したものをCPUで推論
load_weights = torch.load(model_path, map_location=torch.device('cpu'))
net.load_state_dict(load_weights)
net.eval()

device: cpu


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

## trainの評価

In [25]:
train_pred = torch.tensor([], dtype=torch.uint8)
train_label = torch.tensor([], dtype=torch.uint8)

with torch.no_grad():
    for inputs, labels in tqdm(dataloaders_dict['train']):
        inputs = inputs.to(device)
        labels = labels.to(device)

        # ネットワークで推論
        outputs = net(inputs)
        _, preds = torch.max(outputs, 1)

        train_pred = torch.cat([train_pred, preds.cpu()], dim=0)
        train_label = torch.cat([train_label, labels.cpu()], dim=0)


  0%|                                                                                          | 0/143 [00:00<?, ?it/s][A
  1%|▌                                                                                 | 1/143 [00:02<05:19,  2.25s/it][A
  1%|█▏                                                                                | 2/143 [00:04<05:30,  2.34s/it][A
  2%|█▋                                                                                | 3/143 [00:06<05:19,  2.28s/it][A
  3%|██▎                                                                               | 4/143 [00:09<05:20,  2.30s/it][A
  3%|██▊                                                                               | 5/143 [00:11<05:13,  2.27s/it][A
  4%|███▍                                                                              | 6/143 [00:14<05:34,  2.44s/it][A
  5%|████                                                                              | 7/143 [00:16<05:27,  2.40s/it][A
  6%|████▌     

 46%|█████████████████████████████████████▍                                           | 66/143 [02:54<03:41,  2.88s/it][A
 47%|█████████████████████████████████████▉                                           | 67/143 [02:58<04:02,  3.19s/it][A
 48%|██████████████████████████████████████▌                                          | 68/143 [03:01<03:58,  3.18s/it][A
 48%|███████████████████████████████████████                                          | 69/143 [03:04<03:45,  3.05s/it][A
 49%|███████████████████████████████████████▋                                         | 70/143 [03:06<03:33,  2.92s/it][A
 50%|████████████████████████████████████████▏                                        | 71/143 [03:09<03:29,  2.92s/it][A
 50%|████████████████████████████████████████▊                                        | 72/143 [03:13<03:40,  3.11s/it][A
 51%|█████████████████████████████████████████▎                                       | 73/143 [03:16<03:39,  3.13s/it][A
 52%|███████████

 92%|█████████████████████████████████████████████████████████████████████████▊      | 132/143 [05:43<00:28,  2.59s/it][A
 93%|██████████████████████████████████████████████████████████████████████████▍     | 133/143 [05:46<00:25,  2.59s/it][A
 94%|██████████████████████████████████████████████████████████████████████████▉     | 134/143 [05:49<00:23,  2.59s/it][A
 94%|███████████████████████████████████████████████████████████████████████████▌    | 135/143 [05:51<00:20,  2.51s/it][A
 95%|████████████████████████████████████████████████████████████████████████████    | 136/143 [05:54<00:18,  2.61s/it][A
 96%|████████████████████████████████████████████████████████████████████████████▋   | 137/143 [05:56<00:14,  2.47s/it][A
 97%|█████████████████████████████████████████████████████████████████████████████▏  | 138/143 [05:59<00:13,  2.63s/it][A
 97%|█████████████████████████████████████████████████████████████████████████████▊  | 139/143 [06:01<00:10,  2.60s/it][A
 98%|███████████

In [17]:
classes = ['buildings', 'street']
print(classification_report(train_label, train_pred, target_names=classes))

ValueError: only one element tensors can be converted to Python scalars

## validationの評価

In [None]:
val_pred = torch.tensor([], dtype=torch.uint8)
val_label = torch.tensor([], dtype=torch.uint8)

with torch.no_grad():
    for inputs, labels in tqdm(dataloaders_dict['val']):
        inputs = inputs.to(device)
        labels = labels.to(device)

        # ネットワークで推論
        outputs = net(inputs)
        _, preds = torch.max(outputs, 1)

        val_pred = torch.cat([val_pred, preds.cpu()], dim=0)
        val_label = torch.cat([val_label, labels.cpu()], dim=0)

In [None]:
print(classification_report(val_label, val_pred, target_names=classes))