# 物体認識

PyTorch に実装された標準機能を利用して、画像中に映っている物体の分類を行うモデルを構築・評価を行うサンプルコード。

## セットアップ

In [1]:
import torch
import torchvision


device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')


transform_train = torchvision.transforms.Compose([
     torchvision.transforms.RandomCrop(224),
     torchvision.transforms.RandomHorizontalFlip(0.5),
     torchvision.transforms.RandomAffine(45),
     torchvision.transforms.ToTensor(),
     torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

transform_valid = torchvision.transforms.Compose([
     torchvision.transforms.CenterCrop(224),
     torchvision.transforms.ToTensor(),
     torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


train_data_dir = '/data/workshop1/rcait/dataset/citrus/leaves/train'
valid_data_dir = '/data/workshop1/rcait/dataset/citrus/leaves/valid'

# training dataset
trainset = torchvision.datasets.ImageFolder(train_data_dir, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True)

# validation dataset
validset = torchvision.datasets.ImageFolder(valid_data_dir, transform=transform_valid)
validloader = torch.utils.data.DataLoader(validset, batch_size=32, shuffle=False)

## モデル構築

In [3]:
net = torchvision.models.vgg16(pretrained=True)
num_ftrs = net.classifier[6].in_features
net.classifier[6] = torch.nn.Linear(num_ftrs, len(trainset.classes))

optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
criterion = torch.nn.CrossEntropyLoss()
net.train()
net = net.to(device)

for epoch in range(20):
    
    running_loss = 0.0
    n_correct = 0
    n_total = 0
  
    for data in trainloader:

        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        outputs = net(inputs)

        _, predicted = torch.max(outputs.data, 1)

        res = (predicted == labels)
        res = res.sum().item()
        n_correct = n_correct + res
        n_total = n_total + len(labels)

        loss = criterion(outputs, labels)
        running_loss +=  loss.item()

        loss.backward()
        optimizer.step()
        
    print([running_loss, n_correct / n_total])
  

## モデル検証

In [None]:
net.eval()

n_correct = 0
n_total = 0

for data in validloader:
    inputs, labels = data
    inputs = inputs.to(device)
    labels = labels.to(device)

    outputs = net(inputs)

    _, predicted = torch.max(outputs.data, 1)
    
    res = (predicted == labels)
    res = res.sum().item()

    n_correct = n_correct + res
    n_total = n_total + len(labels)

val_acc = n_correct / n_total
print(val_acc)

## モデル保存


In [None]:
net = net.to('cpu')
torch.save(net.state_dict(), 'vgg16net_weights.pth')

## 推論


In [None]:
app = torchvision.models.vgg16(pretrained=False)
num_ftrs = app.classifier[6].in_features
app.classifier[6] = torch.nn.Linear(num_ftrs, 5)

# load weights into the initialized model
app.load_state_dict(torch.load('vgg16net_weights.pth'))

# change to inference (evaluation) mode
app.eval()

In [None]:
import PIL
import numpy as np
import matplotlib.pyplot as plt

img_path = '/data/workshop1/rcait/dataset/citrus/leaves/valid/healthy/19.pn.png'

im = PIL.Image.open(img_path)
plt.imshow(np.asarray(im))

In [None]:
def image_loader(image_path):
    im = PIL.Image.open(image_path)
    im = transform_valid(im)
    im = im.unsqueeze(0)
    return im


x = image_loader(img_path)
output = app(x)

print(output)
print(trainset.classes)

In [None]:
prob1 = torch.softmax(output, dim=1).detach().numpy().copy()
print(prob1)

In [None]:
prob2 = torch.sigmoid(output).detach().numpy().copy()
print(prob2)