# Adversarial Example Generation for Images

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pdb
import pandas as pd
import numpy as np

from pathlib import Path
from PIL import Image
from collections import OrderedDict
from argparse import Namespace
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
import torch
import torch.tensor as T
from torch import nn, optim
from torch.nn import functional as F

from torch.utils.data import DataLoader, random_split
from torchvision import transforms, models
from torchvision.datasets import ImageFolder
from torchsummary import summary

import pytorch_lightning as pl

print(f"GPU present: {torch.cuda.is_available()}")

GPU present: True


In [4]:
img_size=(150,150)
data_stats = dict(mean=T([0.4302, 0.4575, 0.4539]), std=T([0.2361, 0.2347, 0.2433]))
data_path = Path('./data')

In [5]:
pl.logging.tensorboard

AttributeError: module 'logging' has no attribute 'tensorboard'

## Functions

In [None]:
def for_disp(img):
  img.mul_(data_stats['std'][:, None, None]).add_(data_stats['mean'][:, None, None])
  return transforms.ToPILImage()(img)

def get_stats(loader):
  mean,std = 0.0,0.0
  nb_samples = 0
  for imgs, _ in loader:
    batch = imgs.size(0)
    imgs = imgs.view(batch, imgs.size(1), -1)
    mean += imgs.mean(2).sum(0)
    std += imgs.std(2).sum(0)
    nb_samples += batch

  return mean/nb_samples, std/nb_samples

## EDA Data

In [None]:
imgs,labels = [],[]
n_imgs = 5

for folder in (data_path/'train').iterdir():
  label = folder.name  
  for img_f in list(folder.glob('*.jpg'))[:n_imgs]:
    with Image.open(img_f) as f:
      imgs.append(np.array(f))
    labels.append(label)

n_classes = len(np.unique(labels))

In [None]:
fig = plt.figure(figsize=(15, 15))

for i, img in enumerate(imgs):
  ax = fig.add_subplot(n_classes, n_imgs, i+1)
  ax.imshow(img)
  ax.set_title(labels[i], color='r')
  ax.set_xticks([])
  ax.set_yticks([])
    
plt.show()

In [None]:
train_tfms = transforms.Compose(
  [
    transforms.Resize(img_size),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(**data_stats),    
  ]
)

pred_tfms = transforms.Compose(
  [
    transforms.Resize(img_size),
    transforms.ToTensor(),
    transforms.Normalize(**data_stats)
  ]
)

In [None]:
ds = ImageFolder(data_path/'train', transform=train_tfms)

train_pct = 0.85
n_train = np.int(len(ds) * train_pct)
n_val = len(ds) - n_train

train_ds,val_ds = random_split(ds, [n_train, n_val])
train_ds,val_ds = train_ds.dataset,val_ds.dataset

In [None]:
train_dl = DataLoader(train_ds, batch_size=32, shuffle=True, drop_last=True)
train_itr = iter(train_dl)

val_dl = DataLoader(val_ds, batch_size=32)
val_itr = iter(val_dl)

test_ds = ImageFolder(data_path/'test', transform=pred_tfms)
test_dl = DataLoader(test_ds, batch_size=32)

In [None]:
imgs, labels = next(train_itr)

In [None]:
idx = np.random.randint(len(imgs))

print(train_ds.classes[labels[idx].item()])
img = for_disp(imgs[idx])
img

## Training

In [None]:
class IntelImageClassifier(pl.LightningModule):
  def __init__(self, hparams):
    super(IntelImageClassifier, self).__init__()
    self.hparams = hparams
    self.loss_fn = nn.CrossEntropyLoss()
    self.img_tfms = self.__define_tfms()
    self.train_ds,self.val_ds = self.__split_data()
    self.model = self.__build_model()
    
  def __build_model(self):
    model = models.vgg16(pretrained=True) # load pretrained model
    for param in model.parameters(): param.requires_grad=False # freeze model params
    
    # replace last layer with custom layer
    classifier = nn.Sequential(
      nn.Linear(in_features=25088, out_features=4096),
      nn.ReLU(),
      nn.Dropout(p=0.5),
      nn.Linear(in_features=4096, out_features=4096),
      nn.ReLU(),
      nn.Dropout(p=0.5),
      nn.Linear(in_features=4096, out_features=6) # 6 classes
    )
    model.classifier = classifier
    return model
  
  def forward(self, x): return self.model(x)
  
  def configure_optimizers(self):
    return optim.Adam(self.model.classifier.parameters(), lr=self.hparams.lr)
  
  def __define_tfms(self):
    tfms = {}    
    tfms['train'] = transforms.Compose([
      transforms.Resize((150, 150)),
      transforms.RandomHorizontalFlip(p=0.5),
      transforms.ToTensor(),
      transforms.Normalize(**self.hparams.data_stats)
    ])
    tfms['pred'] = transforms.Compose([
      transforms.Resize((150, 150)),
      transforms.ToTensor(),
      transforms.Normalize(**self.hparams.data_stats)
    ])    
    return tfms
  
  def training_step(self, batch, batch_idx):
    imgs,labels = batch
    out = self.forward(imgs)
    loss = self.loss_fn(out, labels)
    
    tqdm_dict = {'train_loss': loss}
    output = OrderedDict({
      'loss': loss,
      'progress_bar': tqdm_dict,
    })
    
  def __split_data(self):
    ds = ImageFolder(self.hparams.data_path/'train', self.img_tfms['train'])
    n_train = np.int(len(ds) * self.hparams.train_pct)
    n_val = len(ds) - n_train
    train_ds,val_ds = random_split(ds, [n_train, n_val])
    return train_ds, val_ds
    
  @pl.data_loader
  def train_dataloader(self): 
    return DataLoader(self.train_ds, batch_size=self.hparams.bs, shuffle=True, drop_last=True, num_workers=4)
  
#   @pl.data_loader
#   def val_dataloader(self):
#     return DataLoader(self.val_ds, batch_size=self.hparams.bs)

In [None]:
hparams = Namespace(
  bs=32,
  lr=0.001,
  train_pct=0.85,
  data_path=Path('./data'),
  data_stats=dict(mean=T([0.4302, 0.4575, 0.4539]), std=T([0.2361, 0.2347, 0.2433])),
)

In [None]:
model = IntelImageClassifier(hparams)

In [None]:
import logging

In [None]:
logger = logging.getLogger(__name__)

In [None]:
trainer = pl.Trainer(logger=logger,train_percent_check=0.1)
trainer.fit(model)

In [None]:
tfms = {
  'train': transforms.Compose([
    transforms.Resize((150, 150)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(**hparams.data_stats)
  ]),
  'pred': transforms.Compose([
    transforms.Resize((150, 150)),
    transforms.ToTensor(),
    transforms.Normalize(**hparams.data_stats)
  ]),
}    
ds = ImageFolder(data_path/'train', transform=tfms['pred'])

train_pct = 0.85
n_train = np.int(len(ds) * train_pct)
n_val = len(ds) - n_train

train_ds,val_ds = random_split(ds, [n_train, n_val])

train_dl = DataLoader(train_ds, batch_size=32, shuffle=True, drop_last=True)
train_itr = iter(train_dl)

val_dl = DataLoader(val_ds, batch_size=32)
val_itr = iter(val_dl)

test_ds = ImageFolder(data_path/'test', transform=tfms['pred'])
test_dl = DataLoader(test_ds, batch_size=32)

In [None]:
clf = models.vgg16(pretrained=True)
for param in clf.parameters(): param.requires_grad=False

In [None]:
final_clf = nn.Sequential(
  nn.Linear(in_features=25088, out_features=4096),
  nn.ReLU(),
  nn.Dropout(p=0.5),
  nn.Linear(in_features=4096, out_features=4096),
  nn.ReLU(),
  nn.Dropout(p=0.5),
  nn.Linear(in_features=4096, out_features=6),
)

clf.classifier = final_clf

In [None]:
loss_fn = nn.CrossEntropyLoss()
opt = optim.Adam(clf.parameters(), lr=0.01)

In [None]:
clf = clf.cuda()

In [None]:
imgs, labels = next(train_itr)
imgs = imgs.cuda()
labels = labels.cuda()

pred = clf(imgs)
loss_fn(pred, labels)

In [None]:
summary(clf, input_size=(3, 150, 150))

In [None]:
summary(clf, input_size=(3,150,150))