In [None]:
import torch
from torchvision import transforms, datasets, models
import matplotlib.pyplot as plt
import torchvision
import numpy as np
import time
import copy
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from tqdm import tqdm


def imshow(inp, title=None):
    """Imshow for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated

acc_list = []
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
#         for phase in ['val', 'train']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in tqdm(dataloaders[phase]):
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
#                     print('outputs:', outputs)
#                     print('labels:', labels)
                    _, preds = torch.max(outputs, 1)
#                     print('preds:', preds)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
#                 print("running_loss:{}, running_corrects:{}".format(running_loss, running_corrects))
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            acc_list.append(epoch_acc)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model


def visualize_model(model, num_images=6):
    was_training = model.training
    model.eval()
    images_so_far = 0
    fig = plt.figure()

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloaders['val']):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            for j in range(inputs.size()[0]):
                images_so_far += 1
                ax = plt.subplot(num_images//2, 2, images_so_far)
                ax.axis('off')
                ax.set_title('predicted: {}'.format(class_names[preds[j]]))
                imshow(inputs.cpu().data[j])

                if images_so_far == num_images:
                    model.train(mode=was_training)
                    return
        model.train(mode=was_training)


print("start")
data_transform = transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor()
        transforms.Normalize(mean=[0.453, 0.451, 0.431],
                             std=[0.251, 0.243, 0.221])
    ])
test_data_transform = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor()
    ])

artists_dataset = datasets.ImageFolder(root='/home/lab/yasuhiro/Downloads/best-artworks-of-all-time/images/images',
                                           transform=data_transform)
dataset_loader = torch.utils.data.DataLoader(artists_dataset,
                                             batch_size=4, shuffle=True,
                                             num_workers=4)

test_dataset = datasets.ImageFolder(root='/home/lab/yasuhiro/Downloads/best-artworks-of-all-time/test',
                                           transform=test_data_transform)

test_dataset_loader = torch.utils.data.DataLoader(test_dataset,
                                             batch_size=4, shuffle=True,
                                             num_workers=4)

data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor()
        transforms.Normalize([0.453, 0.451, 0.431], [0.251, 0.243, 0.221])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor()
        transforms.Normalize([0.453, 0.451, 0.431], [0.251, 0.243, 0.221])
    ]),
}

image_datasets = {'train':artists_dataset ,'val':test_dataset}
dataloaders = {'train':dataset_loader ,'val':test_dataset_loader}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes
class_names2 = image_datasets['val'].classes
print('classname:', class_names)
print('classname2:', class_names2)
print("Dataloader OK")
# device = torch.device("cpu")
device = torch.device("cuda:3" if torch.cuda.is_available() else "cpu")
print("device OK")


def get_std(dataloader):    
    train = iter(dataloader).next()[0]   
    mean = np.mean(train.numpy(), axis=(0,2,3))
    std = np.std(train.numpy(), axis=(0,2,3))
    return mean,std

# Get a batch of training data
print("start iter")
# inputs, classes = next(iter(dataset_loader))
print("make grid")
# Make a grid from batch
# out = torchvision.utils.make_grid(inputs)

# imshow(out, title=[artists_dataset.classes[x] for x in classes])




print("start fit model")
model_ft = models.resnet50(pretrained=True)
num_ftrs = model_ft.fc.in_features
# Here the size of each output sample is set to 2.
# Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)).
model_ft.fc = nn.Linear(num_ftrs, 50)

model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

print("model OK")

model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=25)

visualize_model(model_ft)
print("acc_list:", acc_list)
torch.save(model_ft, './resnet50_acc77.pth')

In [None]:
import seaborn

model = model_ft
nb_classes = 50

confusion_matrix = torch.zeros(nb_classes, nb_classes)

with torch.no_grad():
    for i, (inputs, classes) in enumerate(dataloaders['val']):
        inputs = inputs.to(device)
        classes = classes.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        for t, p in zip(classes.view(-1), preds.view(-1)):
                confusion_matrix[t.long(), p.long()] += 1

print(confusion_matrix)
seaborn.heatmap(np.array(confusion_matrix), cmap='Blues')
plt.show()
# To get the per-class accuracy:
print(confusion_matrix.diag()/confusion_matrix.sum(1))

confusion_matrix = np.array(confusion_matrix)        
print(confusion_matrix)
print(confusion_matrix.size)
print(confusion_matrix[0].size)
for i in range(50):
    for j in  range(50):
        confusion_matrix[i][j] = confusion_matrix[i][j] / np.sum(confusion_matrix[i])

print(confusion_matrix)
seaborn.heatmap(confusion_matrix, cmap='Blues')
plt.show()

train_acc = []
test_acc = []
for i in range(len(acc_list)):
    if i % 2 == 0:
        train_acc.append(acc_list[i].item())
    else:
        test_acc.append(acc_list[i].item())
plt.figure()
plt.plot(range(25), train_acc)
plt.show()
plt.figure()
plt.plot(range(25), test_acc)
plt.show()

In [None]:
import os
from PIL import Image
from torch.autograd import Variable
from torch.nn import functional as F
import cv2
# networks such as googlenet, resnet, densenet already use global average pooling at the end, so CAM could be used directly.

net = model_ft
finalconv_name = 'layer4'

net.eval()

# hook the feature extractor
features_blobs = []
def hook_feature(module, input, output):
    features_blobs.append(output.data.cpu().numpy())

net._modules.get(finalconv_name).register_forward_hook(hook_feature)

# get the softmax weight
params = list(net.parameters())
weight_softmax = np.squeeze(params[-2].data.cpu().numpy())
# print('params:', params)
print('weight_s:', weight_softmax)

def returnCAM(feature_conv, weight_softmax, class_idx):
    # generate the class activation maps upsample to 256x256
    size_upsample = (256, 256)
    bz, nc, h, w = feature_conv.shape
    output_cam = []
    for idx in class_idx:
        cam = weight_softmax[idx].dot(feature_conv.reshape((nc, h*w)))
        cam = cam.reshape(h, w)
        cam = cam - np.min(cam)
        cam_img = cam / np.max(cam)
        cam_img = np.uint8(255 * cam_img)
        output_cam.append(cv2.resize(cam_img, size_upsample))
    return output_cam


normalize = transforms.Normalize(
   mean=[0.485, 0.456, 0.406],
   std=[0.229, 0.224, 0.225]
)
preprocess = transforms.Compose([
   transforms.Resize((224,224)),
   transforms.ToTensor()
])

def show_img(path):
    img_pil = Image.open(path)

    img_tensor = preprocess(img_pil)
    img_variable = Variable(img_tensor.unsqueeze(0))
    img_variable = img_variable.to(device)
    logit = net(img_variable)

    # download the imagenet category list
    classes = class_names

    h_x = F.softmax(logit, dim=1).data.squeeze()
    probs, idx = h_x.sort(0, True)
    probs = probs.cpu().numpy()
    idx = idx.cpu().numpy()

    print('idx', idx)
    # output the prediction
    for i in range(0, 5):
        print('{:.3f} -> {}'.format(probs[i], classes[idx[i]]))

    # generate class activation mapping for the top1 prediction
    CAMs = returnCAM(features_blobs[0], weight_softmax, [idx[0]])

    # render the CAM and output
    print('output {} for the top1 prediction: {}'.format(path ,classes[idx[0]]))
    img = cv2.imread(path)
    height, width, _ = img.shape
    heatmap = cv2.applyColorMap(cv2.resize(CAMs[0],(width, height)), cv2.COLORMAP_JET)
    result = heatmap * 0.3 + img * 0.5

    plt.figure()
    plt.imshow(img)
    plt.show()
    
    plt.figure()
    plt.imshow(img, alpha=0.9)
    plt.imshow(heatmap, alpha=0.5)
#     plt.savefig("/home/lab/yasuhiro/Downloads/best-artworks-of-all-time/result/"+path[-13:-4]+'_r50.jpg')
    plt.show()
    
dir = "/home/lab/yasuhiro/Downloads/best-artworks-of-all-time/"
list_labels = os.listdir(os.path.join(dir, "test"))
for i in list_labels:
    list_image = os.listdir(os.path.join(dir, "test", i))
    for j in list_image:
        show_img(os.path.join(dir, "test", i, j))
        break