In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.optim import lr_scheduler

import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader, Dataset, random_split
import os
import copy
import matplotlib.pyplot as plt

from __future__ import print_function, division
import time

plt.ion()

In [2]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

data_dir = 'horse2zebra'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'val']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
from typing import Union, Callable, Tuple
from functools import reduce
from collections import deque
from typing import Union, Tuple
from torch.nn import Module

def _iterative_gradient(model: Module,
                        x: torch.Tensor,
                        y: torch.Tensor,
                        loss_fn: Callable,
                        k: int,
                        step: float,
                        eps: float,
                        norm: Union[str, float],
                        step_norm: Union[str, float],
                        y_target: torch.Tensor = None,
                        random: bool = False) -> torch.Tensor:
  
    x_adv = x.clone().detach().requires_grad_(True).to(x.device)
    targeted = y_target is not None

    if random:
        x_adv = random_perturbation(x_adv, norm, eps)

    for i in range(k):
        _x_adv = x_adv.clone().detach().requires_grad_(True)

        prediction = model(_x_adv)
        loss = loss_fn(prediction, y_target if targeted else y)
        loss.backward()

        with torch.no_grad():
            if step_norm == 'inf':
                gradients = _x_adv.grad.sign()*step
            else:
                # .view() assumes batched image data as 4D tensor
                gradients = _x_adv.grad * step / _x_adv.grad.view(_x_adv.shape[0], -1).norm(step_norm, dim=-1)\
                    .view(-1, 1, 1, 1)

            if targeted:
                # Targeted: Gradient descent with on the loss of the (incorrect) target label
                # w.r.t. the model parameters
                x_adv -= gradients
            else:
                # Untargeted: Gradient ascent on the loss of the correct label w.r.t.
                # the model parameters
                x_adv += gradients


        # Project back into l_norm ball and correct range
        x_adv = project(x, x_adv, norm, eps)

    return x_adv.detach()

def pgd(model: Module,
        x: torch.Tensor,
        y: torch.Tensor,
        loss_fn: Callable,
        k: int,
        step: float,
        eps: float,
        norm: Union[str, float],
        y_target: torch.Tensor = None,
        random: bool = False) -> torch.Tensor:
   
    return _iterative_gradient(model=model, x=x, y=y, loss_fn=loss_fn, k=k, eps=eps, norm=norm, step=step, step_norm=2,
                               y_target=y_target, random=random)


def project(x: torch.Tensor, x_adv: torch.Tensor, norm: Union[str, int], eps: float) -> torch.Tensor:
    
    if x.shape != x_adv.shape:
        raise ValueError('Input Tensors must have the same shape')

    if norm == 'inf':
        # Workaround as PyTorch doesn't have elementwise clip
        x_adv = torch.max(torch.min(x_adv, x + eps), x - eps)
    else:
        delta = x_adv - x

        # Assume x and x_adv are batched tensors where the first dimension is
        # a batch dimension
        mask = delta.view(delta.shape[0], -1).norm(norm, dim=1) <= eps

        scaling_factor = delta.view(delta.shape[0], -1).norm(norm, dim=1)
        scaling_factor[mask] = eps

        # .view() assumes batched images as a 4D Tensor
        delta *= eps / scaling_factor.view(-1, 1, 1, 1)

        x_adv = x + delta

    return x_adv

In [4]:
def train_model(model, criterion, optimizer, scheduler, num_epochs, epsilon):
    best_acc = 0.0
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch + 1, num_epochs))
        print('-' * 10)
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()
            running_loss = 0.0
            running_corrects = 0
            for i, (inputs, labels) in enumerate(dataloaders[phase]):
#                 if (i%50==0):
#                     print('Batch : {}'.format(i))
                inputs = inputs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()
                if (epsilon != 0):
                    inputs = pgd(model, inputs, labels, criterion, k=10, step=0.1, eps=epsilon, norm=2)
                with torch.set_grad_enabled(phase == 'train'):
#                     print(inputs)
                    outputs = model(inputs)
                    preds = torch.argmax(outputs, 1)
                    loss = criterion(outputs, labels)
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
     
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)      
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            if phase == 'train':
                scheduler.step()
                
            print('{} loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))            
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
            model_wts = copy.deepcopy(model.state_dict())
        print()
    print('Best val accuracy: {:4f}'.format(best_acc))
    model.load_state_dict(model_wts)
    return model

In [5]:
model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 2)

model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.Adam(model_ft.parameters(), lr=3e-5)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=10, gamma=0.1)

In [6]:
for i in [0, 3]:
    model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=10, epsilon=i)
    torch.save(model_ft.state_dict(), "models/pwny_eps_"+str(i)+".pth")

Epoch 1/10
----------
train loss: 0.3077 Acc: 0.8613
val loss: 0.0666 Acc: 0.9911

Epoch 2/10
----------
train loss: 0.2264 Acc: 0.9145
val loss: 0.0429 Acc: 0.9931

Epoch 3/10
----------
train loss: 0.2248 Acc: 0.9260
val loss: 0.0472 Acc: 0.9901

Epoch 4/10
----------
train loss: 0.1458 Acc: 0.9504
val loss: 0.0508 Acc: 0.9901

Epoch 5/10
----------
train loss: 0.1294 Acc: 0.9518
val loss: 0.0660 Acc: 0.9812

Epoch 6/10
----------
train loss: 0.1791 Acc: 0.9418
val loss: 0.0251 Acc: 0.9921

Epoch 7/10
----------
train loss: 0.1451 Acc: 0.9547
val loss: 0.0323 Acc: 0.9950

Epoch 8/10
----------
train loss: 0.1794 Acc: 0.9396
val loss: 0.0245 Acc: 0.9960

Epoch 9/10
----------
train loss: 0.1496 Acc: 0.9540
val loss: 0.0360 Acc: 0.9891

Epoch 10/10
----------
train loss: 0.1554 Acc: 0.9482
val loss: 0.0283 Acc: 0.9941

Best val accuracy: 0.996040
Epoch 1/10
----------
train loss: 0.6818 Acc: 0.7692
val loss: 0.1831 Acc: 0.9465

Epoch 2/10
----------
train loss: 0.4819 Acc: 0.8174
val l

In [4]:
# worklist: train test split to remake splits, then retrain
data = np.load('train_final_data_1.npy')
labels = np.load('train_final_label_1.npy')
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.30, random_state=42, stratify=labels)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.50, random_state=21, stratify = y_test)
np.save(X_train, 'new_data/data_train')
np.save(y_train, 'new_data/label_train')
np.save(X_val, 'new_data/data_val')
np.save(y_val, 'new_data/label_val')
np.save(X_test, 'new_data/data_test')
np.save(y_test, 'new_data/label_test')

TypeError: expected str, bytes or os.PathLike object, not numpy.ndarray