In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import numpy as np
import matplotlib.pyplot as plt
import copy
import seaborn
from tqdm import tqdm_notebook

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from mixture_gaussian import data_generator
plt.style.use('ggplot')

ImportError: No module named seaborn

In [None]:
if torch.cuda.is_available():
    cuda = True
    os.environ['CUDA_VISIBLE_DEVICES'] = "0"
else:
    cuda = False

In [None]:
dset = data_generator()
dset.uniform_distribution()

plt.plot(dset.p)
plt.title('Weight of each gaussian')
plt.show()
plt.close()

In [None]:
def plot(points, title):

    plt.scatter(points[:, 0], points[:, 1], s=10, c='b', alpha=0.5)
    plt.scatter(dset.centers[:, 0], dset.centers[:, 1], s=100, c='g', alpha=0.5)
    plt.title(title)
    plt.ylim(-5, 5)
    plt.xlim(-5, 5)
    plt.show()
    plt.close()
    
sample_points = dset.sample(5000)
plot(sample_points, 'Sampled data points')

In [None]:
# Model params (most of hyper-params follow the original paper: https://arxiv.org/abs/1611.02163)
z_dim = 512
g_inp = z_dim
g_hid = 256
g_out = dset.size

d_inp = g_out
d_hid = 256
d_out = 1

minibatch_size = 512

unrolled_steps = 10
d_learning_rate = 1e-4
g_learning_rate = 1e-4
optim_betas = (0.5, 0.999)
num_iterations = 1000
log_interval = 100
d_steps = 1
g_steps = 1

prefix = "unrolled_steps-{}-prior_std-{:.2f}".format(unrolled_steps, np.std(dset.p))
print("Save file with prefix", prefix)

In [None]:
def noise_sampler(N, z_dim):
    return torch.from_numpy(np.random.normal(size=[N, z_dim]).astype('float32'))

In [None]:
class Generator(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Generator, self).__init__()
        self.map1 = nn.Linear(input_size, hidden_size)
        self.map2 = nn.Linear(hidden_size, hidden_size)
        self.map3 = nn.Linear(hidden_size,output_size)
        self.activation_fn = F.relu

    def forward(self, x):
        x = self.activation_fn(self.map1(x))
        x = self.activation_fn(self.map2(x))
        return self.map3(x)

class Discriminator(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Discriminator, self).__init__()
        self.map1 = nn.Linear(input_size, hidden_size)
        self.map2 = nn.Linear(hidden_size, hidden_size)
        self.map3 = nn.Linear(hidden_size, output_size)
        self.activation_fn = F.relu

    def forward(self, x):
        x = self.activation_fn(self.map1(x))
        x = self.activation_fn(self.map2(x))
        return F.sigmoid(self.map3(x))

In [None]:
G = []
D = []
d_optimizer = []
g_optimizer = []
loss_grid = []
for i in range(10):
    D.append(Discriminator(input_size=d_inp, hidden_size=d_hid, output_size=d_out))
    G.append(Generator(input_size=g_inp, hidden_size=g_hid, output_size=g_out))
criterion = nn.BCELoss()
for i in range(10):
    d_optimizer.append(optim.Adam(D[i].parameters(), lr=d_learning_rate, betas=optim_betas))
    g_optimizer.append(optim.Adam(G[i].parameters(), lr=g_learning_rate, betas=optim_betas))

In [None]:
def d_loop(i, loss_grid):
    # 1. Train D on real+fake
    d_optimizer[i].zero_grad()
    d_real_data = torch.from_numpy(dset.sample(minibatch_size))
    #  1A: Train D on real
    d_real_data = torch.from_numpy(dset.sample(minibatch_size))
    if cuda:
        d_real_data = d_real_data.cuda()
    
    d_real_decision = D[i](d_real_data)
    target = torch.ones_like(d_real_decision)
    if cuda:
        target = target.cuda()
    
    d_real_error = criterion(d_real_decision, target)  # ones = true

    #  1B: Train D on fake
    d_gen_input = noise_sampler(minibatch_size, g_inp)
    if cuda:
        d_gen_input = d_gen_input.cuda()
    
    for j in range(1):
        d_fake_data = G[j](d_gen_input)
        d_fake_decision = D[i](d_fake_data)
        target = torch.zeros_like(d_fake_decision)
        if cuda:
            target = target.cuda()
        d_fake_error = criterion(d_fake_decision, target)  # zeros = fake
        d_loss = (d_real_error + d_fake_error)
        d_loss.backward(retain_graph=True)
        d_optimizer[i].step()     # Only optimizes D's parameters; changes based on stored gradients from backward()
        loss_grid[i][j] = d_loss
    return d_real_error.cpu().item(), d_fake_error.cpu().item()

In [None]:
def g_loop(i, loss_grid):
    # 2. Train G on D's response (but DO NOT train D on these labels)
    g_optimizer[i].zero_grad()

    gen_input = noise_sampler(minibatch_size, g_inp)
    if cuda: 
        gen_input = gen_input.cuda()
    
    g_fake_data = G[i](gen_input)
    for j in range(1):
        dg_fake_decision_temp = D[j](g_fake_data)
        dg_fake_decision = dg_fake_decision_temp
        target = torch.ones_like(dg_fake_decision)
        dg_fake_decision = dg_fake_decision
       # we want to fool, so pretend it's all genuine
        #if j==0:
        #    g_error = criterion(dg_fake_decision,target)
        #else:
        #    g_error += criterion(dg_fake_decision,target)
        g_error = criterion(dg_fake_decision,target)
        g_error.backward(retain_graph=True)
        g_optimizer[i].step()  # Only optimizes G's parameters
        loss_grid[j][i] = g_error
    return g_error.cpu().item()

In [None]:
def g_sample(i):
    with torch.no_grad():
        gen_input = noise_sampler(minibatch_size, g_inp)
        if cuda:
            gen_input = gen_input.cuda()
        g_fake_data = G[i](gen_input)
        return g_fake_data.cpu().numpy()

In [None]:
for it in tqdm_notebook(range(1000)):
    samples = []
    d_loss_grid = np.zeros((5,5))
    for i in range(1):
        d_infos = []
        for d_index in range(d_steps):
            d_info = d_loop(i, d_loss_grid)
            d_infos.append(d_info)
        d_infos = np.mean(d_infos, 0)
        d_real_loss, d_fake_loss = d_infos
    
    g_loss_grid = np.zeros((5,5))
    
    for i in range(1):
        g_infos = []
        for g_index in range(g_steps):
            g_info = g_loop(i, g_loss_grid)
            g_infos.append(g_info)
        g_infos = np.mean(g_infos)
        g_loss = g_infos
    if it%10==0:
        print("G-Loss Grid:")
        print(g_loss_grid)
        print("D-Loss Grid:")
        print(d_loss_grid)
    
    if (it) % 10 == 0:
        for i in range(5):
            g_fake_data = g_sample(i)
            samples.append(g_fake_data)
            plot(g_fake_data, title='[{}] Iteration {}'.format(prefix, it))
            print(d_real_loss, d_fake_loss, g_loss)

for model_num in range(3):
    torch.save(G[i],"G"+str(model_num+1))
    torch.save(D[i],"D"+str(model_num+1))

In [None]:
for i in range(5):
    #G = torch.load('G'+str(i+1))
    #g_optimizer = optim.Adam(G.parameters(), lr=g_learning_rate, betas=optim_betas)
    for j in range(5):
        if j==i:
            continue
        #D = torch.load('D'+str(i+1))
        #criterion = nn.BCELoss()
        #d_optimizer = optim.Adam(D.parameters(), lr=d_learning_rate, betas=optim_betas)
        for it in tqdm_notebook(range(1)):
                d_infos = []
                for d_index in range(d_steps):
                    d_info = d_loop(i)
                    d_infos.append(d_info)
                d_infos = np.mean(d_infos, 0)
                d_real_loss, d_fake_loss = d_infos

                g_infos = []
                for g_index in range(g_steps):
                    g_info = g_loop(j)
                    g_infos.append(g_info)
                g_infos = np.mean(g_infos)
                g_loss = g_infos
                if (it+1)%100==0:
                    #d_learning_rate = 1e-4
                    #g_learning_rate = 1e-3
                    #optim_betas = (0.5, 0.999)
                    #d_optimizer = optim.Adam(D.parameters(), lr=d_learning_rate, betas=optim_betas)
                    #g_optimizer = optim.Adam(G.parameters(), lr=g_learning_rate, betas=optim_betas)
                    g_fake_data = g_sample(i)
                    samples.append(g_fake_data)
                    plot(g_fake_data, title='[{}] Iteration {}'.format(prefix, it))
                    print(d_real_loss, d_fake_loss, g_loss)
    torch.save(G,"G_mixed"+str(i+1))

# Visualize the samples through iterations

In [None]:
# plot the samples through iterations
def plot_samples(samples):
    xmax = 5
    cols = len(samples)
    bg_color  = seaborn.color_palette('Greens', n_colors=256)[0]
    plt.figure(figsize=(2*cols, 2))
    for i, samps in enumerate(samples):
        if i == 0:
            ax = plt.subplot(1, cols, 1)
        else:
            plt.subplot(1, cols, i+1, sharex=ax, sharey=ax)
        ax2 = seaborn.kdeplot(samps[:, 0], samps[:, 1], shaded=True, cmap='Greens', n_levels=20, clip=[[-xmax,xmax]]*2)
        plt.xticks([])
        plt.yticks([])
        plt.title('step %d'%(i*log_interval))
    
    ax.set_ylabel('%d unrolling steps'% unrolled_steps)
    plt.gcf().tight_layout()
    plt.savefig(prefix + '.png')
    plt.show()
    plt.close()

In [None]:
torch.save(model.state-dict(),"Model1")