In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os, time
import argparse
import math
import random
import numpy as np

import torch
import torch.autograd as autograd
import torch.utils.data as data
import torchvision
import torchvision.datasets.folder
import torchvision.transforms as transforms

from dataset import Dataset
import pretrainedmodels
from pretrainedmodels.models import pnasnet5large
from pretrainedmodels.models import inceptionresnetv2

In [2]:
print(pretrainedmodels.model_names)

['fbresnet152', 'bninception', 'resnext101_64x4d', 'resnext101_32x4d', 'inceptionv4', 'inceptionresnetv2', 'resnet18', 'squeezenet1_0', 'squeezenet1_1', 'resnet50', 'vgg11_bn', 'vgg19', 'vgg16', 'resnet34', 'vgg13', 'densenet201', 'densenet121', 'vgg13_bn', 'densenet161', 'resnet152', 'vgg16_bn', 'inceptionv3', 'resnet101', 'vgg19_bn', 'densenet169', 'vgg11', 'alexnet', 'nasnetamobile', 'nasnetalarge', 'dpn92', 'dpn68b', 'dpn68', 'dpn107', 'dpn98', 'dpn131', 'xception', 'senet154', 'se_resnet50', 'se_resnet152', 'se_resnext50_32x4d', 'se_resnext101_32x4d', 'se_resnet101', 'cafferesnet101', 'pnasnet5large', 'polynet']


In [3]:
parser = argparse.ArgumentParser(description='Defence')
parser.add_argument('--input_dir', metavar='DIR', default='',
                    help='Input directory with images.')
parser.add_argument('--output_file', metavar='FILE', default='',
                    help='Output file to save labels.')
parser.add_argument('--img-size', type=int, default=299, metavar='N',
                    help='Image patch size (default: 299)')
parser.add_argument('--batch-size', type=int, default=16, metavar='N',
                    help='Batch size (default: 16)')
parser.add_argument('--no-gpu', action='store_true', default=False,
                    help='disables GPU training')
parser.add_argument('--iteration', type=int, default=30, 
                    help='Number of iteration (default: 30)')

def batch_transform(inputs, transform, size):
    input_shape = list(inputs.size())
    res = torch.zeros(input_shape[0], input_shape[1], size, size)
    for i in range(input_shape[0]):
        res[i,:,:,:] = transform(inputs[i,:,:,:])
    return res

# codes for random padding
def padding_layer_iyswim(inputs, shape, transform):
    h_start = shape[0]
    w_start = shape[1]
    output_short = shape[2]
    # print(output_short)
    input_shape = list(inputs.size())
    #print(input_shape)
    # input shape (16, 3, 299, 299)
    input_short = min(input_shape[2:4])
    input_long = max(input_shape[2:4])
    #print(input_long, input_short)
    output_long = int(math.ceil( 1. * float(output_short) * float(input_long) / float(input_short)))
    output_height = output_long if input_shape[1] >= input_shape[2] else output_short
    output_width = output_short if input_shape[1] >= input_shape[2] else output_long  
    # print(output_height, output_width, output_long)
    padding = torch.nn.ConstantPad3d((w_start, output_width - w_start - input_shape[3], h_start, output_height - h_start - input_shape[2], 0,0), 0)
    outputs = padding(inputs)
    # print(type(outputs))
    return batch_transform(outputs, transform, 299)


class LeNormalize(object):
    """Normalize to -1..1 in Google Inception style
    """

    def __call__(self, tensor):
        for t in tensor:
            t.sub_(0.5).mul_(2.0)
        return tensor

    

In [4]:
input_dir = '../../dataset/images2/'
output_file = 'output.csv'
image_size = 299
batch_size = 16
no_gpu = False
itr = 30

In [5]:
start_time = time.time()

tf = transforms.Compose([
    transforms.Resize([image_size,image_size]),
    transforms.ToTensor()
])

tf_flip = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor()
])  

tf_shrink = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize([299,299]),
    transforms.ToTensor()
]) 


In [6]:
with torch.no_grad():
    mean_torch = autograd.Variable(torch.from_numpy(np.array([0.485, 0.456, 0.406]).reshape([1,3,1,1]).astype('float32')).cuda())
    std_torch = autograd.Variable(torch.from_numpy(np.array([0.229, 0.224, 0.225]).reshape([1,3,1,1]).astype('float32')).cuda())
    mean_tf = autograd.Variable(torch.from_numpy(np.array([0.5, 0.5, 0.5]).reshape([1,3,1,1]).astype('float32')).cuda())
    std_tf = autograd.Variable(torch.from_numpy(np.array([0.5, 0.5, 0.5]).reshape([1,3,1,1]).astype('float32')).cuda())

    dataset = Dataset(input_dir, transform=tf)
    loader = data.DataLoader(dataset, batch_size=batch_size, shuffle=False)
    

In [7]:
#inceptionresnetv2
model = inceptionresnetv2(num_classes=1001, pretrained='imagenet+background')
model = model.cuda()

In [8]:
model.eval()


InceptionResNetV2(
  (conv2d_1a): BasicConv2d(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (conv2d_2a): BasicConv2d(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (conv2d_2b): BasicConv2d(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (maxpool_3a): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2d_3b): BasicConv2d(
    (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (conv2d_4a): 

In [9]:
outputs = []
for batch_idx, (input, _) in enumerate(loader):
    # print(input.size())
    length_input, _, _, _ = input.size()
    iter_labels = np.zeros([length_input, 1001, itr])
    for j in range(itr):
        # random fliping
        input0 = batch_transform(input, tf_flip, 299)
        # random resizing
        resize_shape_ = random.randint(310, 331)
        image_resize = 331
        tf_rand_resize = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize([resize_shape_, resize_shape_]),
            transforms.ToTensor()
        ]) 
        input1 = batch_transform(input0, tf_rand_resize, resize_shape_)

        # ramdom padding
        shape = [random.randint(0, image_resize - resize_shape_), random.randint(0, image_resize - resize_shape_), image_resize]
        # print(shape)
       
        new_input = padding_layer_iyswim(input1, shape, tf_shrink)
        #print(type(new_input))
    if not no_gpu:
        new_input = new_input.cuda()
    with torch.no_grad():
        input_var = autograd.Variable(new_input)
        logits = model(input_var)
        labels = logits.max(1)[1]
    outputs.append(labels.data.cpu().numpy())
outputs = np.concatenate(outputs, axis=0)

RuntimeError: Expected object of type torch.FloatTensor but found type torch.cuda.FloatTensor for argument #2 'weight'

In [None]:
with open(output_file, 'w') as out_file:
    filenames = dataset.filenames()
    for filename, label in zip(filenames, outputs):
        filename = os.path.basename(filename)
        out_file.write('{0},{1}\n'.format(filename, label))

In [None]:
print(max, class_id)