In [7]:
import glob
import os
import time

os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' 
os.environ['CUDA_VISIBLE_DEVICES']='4,5'

import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import h5py

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import torch.autograd as autograd
import torchvision.utils as vutils

In [10]:
def load_data():
    x_train = (h5py.File('./camelyonpatch_level_2_split_train_x.h5', 'r')['x'][:, 16:80,16:80] - 127.5) / 127.5
    y_train = h5py.File('camelyonpatch_level_2_split_train_y.h5', 'r')['y'][:].reshape(-1,1)
    x_test = (h5py.File('camelyonpatch_level_2_split_test_x.h5', 'r')['x'][:, 16:80,16:80] - 127.5) / 127.5
    y_test = h5py.File('camelyonpatch_level_2_split_test_y.h5', 'r')['y'][:].reshape(-1,1)
    x_valid = (h5py.File('camelyonpatch_level_2_split_valid_x.h5', 'r')['x'][:, 16:80,16:80] - 127.5) / 127.5
    y_valid = h5py.File('camelyonpatch_level_2_split_valid_y.h5', 'r')['y'][:].reshape(-1,1)
              
    return x_train, y_train, x_test, y_test, x_valid, y_valid

def plot_samples(samples, folder=None, epoch=None):
    rt = int(np.sqrt(samples.shape[0]))
    r, c = rt, rt
    generatedImage = 0.5 * samples + 0.5

    fig = plt.figure(figsize=(10,10))

    axs = [fig.add_subplot(r,c,i+1) for i in range(r*c)]
    cnt = 0
    for ax in axs:
        ax.imshow(generatedImage[cnt],interpolation='nearest')
        ax.axis('off')
        ax.set_aspect('equal')
        cnt+=1
    fig.subplots_adjust(wspace=.008, hspace=.03)

    if folder:
        path = 'results/'+folder+'/samples'
        if not os.path.exists('results'):
            os.mkdir('results')
        if not os.path.exists('results/'+folder):
            os.mkdir('results/'+folder)
        if not os.path.exists(path):
            os.mkdir(path)
        fig.savefig(path+'/epoch_%d.png' % epoch)
        plt.close()


In [11]:
x_train, y_train, x_test, y_test, x_valid, y_valid = load_data()

percent = int(x_train.shape[0]*.01)
np.random.seed(17)
idx_small = np.random.choice(range(x_train.shape[0]), percent, replace=False)

x_train_small = x_train[idx_small]
y_train_small = y_train[idx_small]
plt.hist(y_train_small, bins=2)
print(percent)

OSError: Unable to open file (unable to open file: name = './camelyonpatch_level_2_split_train_x.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [153]:
use_cuda=True

def weights_init(m):
    classname = m.__class__.__name__
    #print(classname)
    if classname.find('Conv') != -1:
        torch.nn.init.kaiming_normal(m.weight.data)
        m.bias.data.fill_(0)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)
    elif classname.find('Linear') != -1:
        m.bias.data.normal_(0.0, 0.2)
        torch.nn.init.xavier_uniform(m.weight.data)


class Generator(nn.Module):
    def __init__(self, w, h, c, latent_dim=100):
        super(Generator, self).__init__()
        
        self.w = w
        self.h = h
        self.c = c
        self.latent_dim = latent_dim
        
        self.n_filters = 128
        
        self.input = nn.Sequential(
            nn.Linear(latent_dim, self.n_filters * w//4 * h//4),
            nn.ReLU()
        )
        
        self.deconv = nn.Sequential(
            nn.Upsample(size=[w//2, h//2], mode='nearest'),
            nn.Conv2d(self.n_filters, self.n_filters//2, 3, stride=1, padding=1),
            nn.ReLU(True),
            
            nn.Upsample(size=[w, h], mode='nearest'),
            nn.Conv2d(self.n_filters//2, self.n_filters//4, 4, stride=2, padding=1),
            nn.ReLU(True),
            
            nn.Upsample(size=[w, h], mode='nearest'),
            nn.Conv2d(self.n_filters//4, self.n_filters//8, 3, stride=1, padding=1),
            nn.ReLU(True),

            nn.Upsample(size=[w, h], mode='nearest'),
            nn.Conv2d(self.n_filters//8, self.n_filters//16, 4, stride=2, padding=1),
            nn.ReLU(True),

            nn.Upsample(size=[w, h], mode='nearest'),
            nn.Conv2d(self.n_filters//16, self.n_filters//32, 3, stride=1, padding=1),
            nn.ReLU(True),

            nn.Conv2d(self.n_filters//32, c, 3, stride=1, padding=1),
            nn.Tanh()
        )

    def forward(self, x):
        output = self.input(x)
        print(output.shape)
        output = output.view(-1, self.n_filters, self.w//4, self.h//4)
        print(output.shape)
        return self.deconv(output).view(-1, self.w, self.h, self.c)

class Discriminator(nn.Module):
    def __init__(self, h, w, c):
        super(Discriminator, self).__init__()
        
        self.w = w
        self.h = h
        self.c = c
        n_filters = 32

        self.main = nn.Sequential(
            nn.Conv2d(c, n_filters, 4, stride=2, padding=1),
            nn.LeakyReLU(0.2),
            nn.Dropout2d(p=0.2),
            nn.Conv2d(n_filters, 2*n_filters, 4, stride=2, padding=1),
            nn.LeakyReLU(0.2),
            nn.Dropout2d(p=0.2),
            nn.Conv2d(2*n_filters, 4*n_filters, 4, stride=2, padding=1),
            nn.LeakyReLU(0.2),
            nn.Dropout2d(p=0.2),
            nn.Flatten()
        )
        self.output = nn.Linear(4*n_filters*int(w/2**3)*int(h/2**3), 1,bias=True)

    def forward(self, x):
        out = self.main(x.view(-1, self.c, self.w, self.h))
#         print(out.shape)
        out2 = self.output(out).view(-1)
#         print(out2.shape)
        return out2, out



def calc_gradient_penalty(netD, real_data, fake_data,LAMBDA=LAMBDA,use_cuda=True):
    real_data_flat = real_data.view(fake_data.size())
    #print(real_data_flat.size())
    alpha = torch.rand(BATCH_SIZE,1).expand(real_data_flat.size())
    #alpha = alpha.expand(real_data.size())
    if use_cuda: alpha=alpha.cuda()

    interpolates = alpha * real_data_flat + (1 - alpha) * fake_data
    print(interpolates)


    if self.use_cuda:
        interpolates = interpolates.cuda()
    interpolates = autograd.Variable(interpolates, requires_grad=True)

    disc_interpolates,_ = netD(interpolates)
    grad_outputs = torch.ones(disc_interpolates.size()).cuda()
    if use_cuda:
        grad_outputs.cuda()

    gradients = autograd.grad(outputs=disc_interpolates, inputs=interpolates, grad_outputs=grad_outputs,
                              create_graph=True, retain_graph=True, only_inputs=True)[0]
    gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * LAMBDA
    return gradient_penalty
    

def generate_image(netG):
    noise = torch.randn(BATCH_SIZE, 128)
    if use_cuda:
        noise = noise.cuda(gpu)
    noisev = autograd.Variable(noise, volatile=True)
    samples = netG(noisev)
    samples = samples.view(BATCH_SIZE, 28, 28)
    # print samples.size()

    samples = samples.cpu().data.numpy()

    lib.save_images.save_images(samples, 'tmp/mnist/samples_{}.png'.format(frame))


In [1]:
class WGAN_CT:
    def __init__(self, w, h, c):
        self.LAMBDA = 10
        self.use_cuda = True
        self.D = Discriminator(w, h, c)

        if self.use_cuda:
            self.D = self.D.cuda()

    def train(self, epochs, X_train, y_train, X, X_val, y_val, batch_size=28):


        self.clf_loss = []
        self.clf_acc = []
        self.clf_loss_val = []
        self.clf_acc_val = []

        self.g_loss = []
        self.d_loss_real=[]
        self.d_loss_fake=[]

#         self.d_acc=[]

        idx_range = np.arange(X_train.shape[0])

        # for e in range(epochs):
        #     clf_loss = []
        #     clf_acc = []
        #     g_loss = []
        #     d_loss_fake = []
        #     d_loss_real = []
        #     for idx in tqdm(np.array_split(shuffle(idx_range), idx_range.shape[0]/(batch_size/2)), desc="epoch "+str(e)):

    def _train_D(self, data):
        self.D_opt.zero_grad()

        # Get generated data
        batch_size = data.shape[0]
        generated_data = self.generator(batch_size)

        # Calculate probabilities on real and generated data
#         data = Variable(data)
#         if self.use_cuda:
#             data = data.cuda()
        d_real = self.D(data)
        d_generated = self.D(generated_data)

        # Get gradient penalty
        gradient_penalty = self._gradient_penalty(data, generated_data)
#         self.losses['GP'].append(gradient_penalty.data[0])

        # Create total loss and optimize
        d_loss = d_generated.mean() - d_real.mean() + self.LAMBDA*gradient_penalty
        d_loss.backward()

        self.D_opt.step()

        # Record loss
#         self.losses['D'].append(d_loss.data[0])

#         return d_loss.data[0], gradient_penalty.data[0]

    def _train_G(self, data):
        self.G_opt.zero_grad()

        # Get generated data
        batch_size = data.shape[0]
        generated_data = self.sample_generator(batch_size)

        # Calculate loss and optimize
        d_generated = self.D(generated_data)
        g_loss = -d_generated.mean()
        g_loss.backward()
        self.G_opt.step()

        # Record loss
#         self.losses['G'].append(g_loss.data[0])

    def _gradient_penalty(self, real_data, generated_data):
        batch_size = real_data.shape[0]

        # Calculate interpolation
        alpha = torch.rand(batch_size, 1, 1, 1)
        alpha = alpha.expand_as(real_data)
        if self.use_cuda:
            alpha = alpha.cuda()
        interpolated = alpha * real_data + (1 - alpha) * generated_data
        print(interpolated)
        # interpolated = Variable(interpolated, requires_grad=True)
        if self.use_cuda:
            interpolated = interpolated.cuda()

        # Calculate probability of interpolated examples
        prob_interpolated = self.D(interpolated)
        grad_outputs=torch.ones(prob_interpolated.size())
        if self.use_cuda:
            grad_outputs.cuda()
            
        # Calculate gradients of probabilities with respect to examples
        gradients = torch_grad(outputs=prob_interpolated, inputs=interpolated,
                               grad_outputs=grad_outputs, create_graph=True, retain_graph=True)[0]

        # Gradients have shape (batch_size, num_channels, img_width, img_height),
        # so flatten to easily take norm per example in batch
        gradients = gradients.view(batch_size, -1)
#         self.losses['gradient_norm'].append(gradients.norm(2, dim=1).mean().data[0])

        # Derivatives of the gradient close to 0 can cause problems because of
        # the square root, so manually calculate norm and add epsilon
        gradients_norm = torch.sqrt(torch.sum(gradients ** 2, dim=1) + 1e-12)

        # Return gradient penalty
        return ((gradients_norm - 1) ** 2).mean()

    def _train_epoch(self, data_loader):
        for i, data in enumerate(data_loader):
            self.num_steps += 1
            self._critic_train_iteration(data[0])
            # Only update generator every |critic_iterations| iterations
            if self.num_steps % self.critic_iterations == 0:
                self._generator_train_iteration(data[0])

            if i % self.print_every == 0:
                print("Iteration {}".format(i + 1))
                print("D: {}".format(self.losses['D'][-1]))
                print("GP: {}".format(self.losses['GP'][-1]))
                print("Gradient norm: {}".format(self.losses['gradient_norm'][-1]))
                if self.num_steps > self.critic_iterations:
                    print("G: {}".format(self.losses['G'][-1]))

    def train(self, data_loader, epochs, save_training_gif=True):
        if save_training_gif:
            # Fix latents to see how image generation improves during training
            fixed_latents = Variable(self.G.sample_latent(64))
            if self.use_cuda:
                fixed_latents = fixed_latents.cuda()
            training_progress_images = []

        for epoch in range(epochs):
            print("\nEpoch {}".format(epoch + 1))
            self._train_epoch(data_loader)

            if save_training_gif:
                # Generate batch of images and convert to grid
                img_grid = make_grid(self.G(fixed_latents).cpu().data)
                # Convert to numpy and transpose axes to fit imageio convention
                # i.e. (width, height, channels)
                img_grid = np.transpose(img_grid.numpy(), (1, 2, 0))
                # Add image grid to training progress
                training_progress_images.append(img_grid)

        if save_training_gif:
            imageio.mimsave('./training_{}_epochs.gif'.format(epochs),
                            training_progress_images)

    def sample_generator(self, num_samples):
        latent_samples = torch.randn((num_samples, self.latent_dim), requires_grad=True)
        if self.use_cuda:
            latent_samples = latent_samples.cuda()
        generated_data = self.G(latent_samples)
        return generated_data

    def sample(self, num_samples):
        generated_data = self.sample_generator(num_samples)
        # Remove color channel
        return generated_data.data.cpu().numpy()[:, 0, :, :]

In [5]:
wgan_ct = WGAN_CT(64, 64, 3)
wgan_ct._gradient_penalty()

In [None]:
tmp.data

In [None]:
alpha

In [71]:
tmp = torch.from_numpy(x_train[:10]).type(torch.FloatTensor).cuda()
tmp.shape

torch.Size([10, 64, 64, 3])

In [6]:
gen = Generator(64,64,3).cuda()
noise = torch.FloatTensor(10, 100).normal_(0, 1).cuda()

gen(noise).shape

NameError: name 'Generator' is not defined

In [155]:
dis = Discriminator(64,64,3).cuda()
dis(tmp)[0].shape


torch.Size([10])

In [None]:
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                   ])),
    batch_size=BATCH_SIZE, shuffle=True)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                   ])),
    batch_size=BATCH_SIZE, shuffle=True)

# ================== main script ======================

netG = Generator()
netG.apply(weights_init)
netD = Discriminator()
netD.apply(weights_init)

print netG
print netD

if use_cuda:
    netD = netD.cuda()
    netG = netG.cuda()

optimizerD = optim.Adam(netD.parameters(), lr=1e-4, betas=(0.5, 0.9))
optimizerG = optim.Adam(netG.parameters(), lr=1e-4, betas=(0.5, 0.9))

one = torch.FloatTensor([1])
mone = one * -1
zero = torch.FloatTensor([0])
if use_cuda:
    one = one.cuda()
    mone = mone.cuda()
    zero = zero.cuda()

def dataset():
    while 1:
        for images,targets in train_loader:
            #print(images)
            yield images
data=dataset()
fixed_noise = Variable(torch.randn(100, NOISE_SZ).cuda(),requires_grad = False)

for iteration in xrange(ITERS):
    ############################
    # (1) Update D network
    ###########################
    netD.train()
    for p in netD.parameters():  
        p.requires_grad = True  
    for iter_d in xrange(CRITIC_ITERS):
        _data = data.next()
        real_data = torch.Tensor(_data)
        if use_cuda:
            real_data = real_data.cuda()
        real_data_v = autograd.Variable(real_data)

        netD.zero_grad()


        # train with fake
        noise = torch.randn(BATCH_SIZE, NOISE_SZ)
        if use_cuda:
            noise = noise.cuda()
        noise_v = autograd.Variable(noise, volatile=True)  
        fake = autograd.Variable(netG(noise_v).data)
        D_fake1_1,D_fake1_2 = netD(fake)
        D_fake1 = D_fake1_1.mean()
        D_fake1.backward((one))

        # train with gradient penalty
        gradient_penalty = calc_gradient_penalty(netD, real_data_v.data, fake.data)
        gradient_penalty.backward()
               
        # train with real
        D_real1_1,D_real1_2 = netD(real_data_v)
        D_real1 = D_real1_1.mean()      
        D_real1.backward((mone),retain_graph=True)
        
        # train with CT penalty
        D_real2_1,D_real2_2 = netD(real_data_v)

        ct_penalty = LAMBDA_2*((D_real1_1-D_real2_1)**2)       
        ct_penalty += LAMBDA_2*0.1*((D_real1_2-D_real2_2)**2).mean(dim=1)
        ct_penalty = torch.max(Variable(torch.zeros(ct_penalty.size()).cuda() if use_cuda else torch.zeros(ct_penalty.size())),ct_penalty-Factor_M)
        ct_penalty = ct_penalty.mean()
        #print(ct_penalty)
        ct_penalty.backward()

        D_cost = -D_real1 + D_fake1 + gradient_penalty + ct_penalty
        #print(-D_real1.data[0] , D_fake1.data[0] , gradient_penalty.data[0] ,ct_penalty.data[0])
        #D_cost.backward()
        Wasserstein_D = D_real1 - D_fake1
        optimizerD.step()

    #if not FIXED_GENERATOR:
        ############################
        # (2) Update G network
        ###########################
    netD.eval()
    for p in netD.parameters():
        p.requires_grad = False  
    netG.zero_grad()

    noise = torch.randn(BATCH_SIZE, NOISE_SZ)
    if use_cuda:
        noise = noise.cuda()
    noisev = autograd.Variable(noise,requires_grad=True) 
    fake = netG(noisev)
    G,_ = netD(fake)
    G = G.mean()
    G.backward(mone)
    G_cost = -G
    optimizerG.step()
    print((iteration,D_cost.data[0],Wasserstein_D.data[0]))
    if iteration%100==0:
        fake = netG(fixed_noise).view(-1, 1,28,28)
        vutils.save_image(fake.data,
                    'fake_samples_epoch_%03d.png' %  iteration,
                    normalize=True)

torch.save(netG.state_dict(), 'netG_mnist.pth')
torch.save(netD.state_dict(), 'netD_mnist.pth')