# InfoGAN

## Background: GAN

$$
\min_G \max_D V(D,G) = \mathbb{E}_{x\sim P_\text{data}}[\log D(x)] + \mathbb{E}_{z\sim \text{noise}} [\log(1-D(G(z)))]
$$

## InfoGAN

### Main Idea: Mutual Information

$$
\min_G \max_D V_I(D,G) = V(D,G) - \lambda I(c; G(z,c))
$$

### Variational Mutual Information Maximization

One mathmatics:
Mutual information term $I(c; G(z,c))$ is hard to maximize directly as it requires access to the posterior $P(c|x)$. So, we approximate posterior $P(c|x)$ to auxilliary distribution $Q(c|x)$ using variational lower bounds techniques.

## References

* https://github.com/wiseodd/generative-models/blob/master/GAN/infogan/infogan_tensorflow.py
* https://gist.github.com/awjuliani/c9ecd8b37d33d6855cd4ed9aa16ce89f
* https://github.com/openai/InfoGAN (Official codes but hard to understand)

더이상은 수식 쓰기 귀찮으니 생략한다. 논문 참고.

## Develop steps

1. Only categorical condition [ing]
2. One categorical + two continuous conditions
3. Multiple + Multiple ... ?

### Network architecture for MNIST

![network_architecutre](infogan_network_architecture.png)

In [3]:
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data

In [9]:
slim = tf.contrib.slim

In [4]:
mnist = input_data.read_data_sets("../MNIST_data/", one_hot=True)

Extracting ../MNIST_data/train-images-idx3-ubyte.gz
Extracting ../MNIST_data/train-labels-idx1-ubyte.gz
Extracting ../MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ../MNIST_data/t10k-labels-idx1-ubyte.gz


In [5]:
# z_dim: 62 noise + 2 continuous + 10 categorical = 74

In [19]:
def lrelu(x, leak=0.2, name="lrelu"):
    with tf.variable_scope(name):
        f1 = 0.5 * (1 + leak)
        f2 = 0.5 * (1 - leak)
        return f1 * x + f2 * abs(x)

In [26]:
def dense(inputs, units, activ_fn=tf.nn.relu, use_bn=False, name="dense"):
    with tf.variable_scope(name):
        net = tf.layers.dense(inputs, units, activation=None)
        if use_bn:
            net = tf.layers.batch_normalization(net, training=training)
        net = activ_fn(net)
        
        return net

In [28]:
def conv(inputs, n_filters, kernel_size, strides=1, activ_fn=tf.nn.relu, use_bn=False, name="conv"):
    with tf.variable_scope(name):
        net = tf.layers.conv2d(inputs, n_filters, kernel_size, strides, padding='same')
        if use_bn:
            net = tf.layers.batch_normalization(net, training=training)
        net = activ_fn(net)
        
        return net

In [29]:
def upconv(inputs, n_filters, kernel_size, strides=2, activ_fn=tf.nn.relu, use_bn=False, name="upconv"):
    with tf.variable_scope(name):
        net = tf.layers.conv2d_transpose(inputs, n_filters, kernel_size, strides=strides, padding='same')
        if use_bn:
            net = tf.layers.batch_normalization(net, training=training)
        net = activ_fn(net)
        
        return net

In [30]:
# c1: categorical ~ Cat(10)
# c2, c3: continuous ~ Uniform(-1, 1)
def generator(z, c1, c2, c3, reuse=False):
    with tf.variable_scope("generator", reuse=reuse):
        z_c = tf.concat([z, c1, c2, c3], axis=1)
        net = dense(z_c, 1024, use_bn=True, name="dense1")
        net = dense(net, 7*7*128, use_bn=True, name="dense2") # 6272
        net = tf.reshape(net, [-1, 7, 7, 128], name="reshape") # 7x7x128
        net = upconv(net, 64, [4,4], use_bn=True, name="upconv1") # 14x14x64
        net = upconv(net, 1, [4,4], activ_fn=tf.nn.sigmoid, name="upconv2") # 28x28x1
        
        return net

In [35]:
# discfriminator D and recognition network Q share most of the network
def D_Q_shared_nets(x, reuse=False):
    with tf.variable_scope("D_Q_shared_nets", reuse=reuse):
        net = tf.reshape(x, [-1, 28, 28, 1])
        net = conv(net, 64, [4,4], strides=2, activ_fn=lrelu, name="conv1") # 14x14x64
        net = conv(net, 128, [4,4], strides=2, activ_fn=lrelu, use_bn=True, name="conv2") # 7x7x128
        net = slim.flatten(net)
        net = dense(net, 1024, activ_fn=lrelu, use_bn=True, name="dense1")
        
        return net

In [38]:
def discriminator(x, shared_net, reuse=False):
    with tf.variable_scope("discriminator", reuse=reuse):
        prob = dense(shared_net, 1, activ_fn=tf.nn.sigmoid, name="D_output")
    
        return prob

In [39]:
# auxilliary distribution Q(c|x)
# softmax for categorical, factored Gaussian for continuous
# Q. what is factored gaussian?
def Q(x, shared_net, reuse=False):
    with tf.variable_scope("Q", reuse=reuse):
        net = dense(shared_net, 128, activ_fn=lrelu, use_bn=True, name="Q_dense")
        c1_prob = dense(net, 10, activ_fn=tf.nn.softmax, name="c1_prob")
        # 사실 이 c2, c3 은 dense(net, 2) 로 합쳐도 됨
        c2_prob = dense(net, 1, activ_fn=tf.nn.tanh, name="c2_prob") 
        c3_prob = dense(net, 1, activ_fn=tf.nn.tanh, name="c3_prob")
        
        return c1_prob, c2_prob, c3_prob

In [90]:
# build nets
tf.reset_default_graph()

X = tf.placeholder(tf.float32, [None, 784])
training = tf.placeholder(tf.bool)
z = tf.placeholder(tf.float32, [None, 62])
c1 = tf.placeholder(tf.float32, [None, 10]) # tensor 는 타입이 동일해야 함. 전체 latent vector type 이 float 이므로 얘도 float.
c2 = tf.placeholder(tf.float32, [None, 1])
c3 = tf.placeholder(tf.float32, [None, 1])

G_sample = generator(z, c1, c2, c3)
D_real = discriminator(X, D_Q_shared_nets(X))
D_fake = discriminator(X, D_Q_shared_nets(G_sample, reuse=True), reuse=True)
Q_c1, Q_c2, Q_c3 = Q(G_sample, D_Q_shared_nets(G_sample, reuse=True))

D_loss = -tf.reduce_mean(tf.log(D_real + 1e-8) + tf.log(1 - D_fake + 1e-8))
G_loss = -tf.reduce_mean(tf.log(D_fake + 1e-8)) # heuristic non-saturation loss
# categorical loss
Q_c1_loss = tf.reduce_sum(tf.log(Q_c1 + 1e-8) * c1, axis=1)
# continuous loss - 사실 c2, c3 은 각각 1개라 reduce_sum 안해도 됨
Q_c2_loss = tf.reduce_sum(0.5 * (Q_c2 - c2)**2, axis=1)
Q_c3_loss = tf.reduce_sum(0.5 * (Q_c3 - c3)**2, axis=1)
Q_loss = -tf.reduce_mean(Q_c1_loss + Q_c2_loss + Q_c3_loss)

var_D_Q_shared = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="D_Q_shared_nets")
var_D = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="discriminator")
var_G = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="generator")
var_Q = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="Q")

# lambda = 1
D_train_op = tf.train.AdamOptimizer(learning_rate=0.0002, beta1=0.5).minimize(D_loss, var_list=var_D_Q_shared + var_D)
G_train_op = tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.5).minimize(G_loss, var_list=var_G)
Q_train_op = tf.train.AdamOptimizer(learning_rate=0.0002, beta1=0.5).minimize(Q_loss, var_list=var_D_Q_shared + var_Q + var_G)


ValueError: Variable D_Q_shared_nets/conv1/conv2d/kernel already exists, disallowed. Did you mean to set reuse=True in VarScope? Originally defined at:

  File "<ipython-input-28-96b4ebc49ea3>", line 3, in conv
    net = tf.layers.conv2d(inputs, n_filters, kernel_size, strides, padding='same')
  File "<ipython-input-35-189354405110>", line 5, in D_Q_shared_nets
    net = conv(net, 64, [4,4], strides=2, activ_fn=lrelu, name="conv1") # 14x14x64
  File "<ipython-input-90-e03f02ad23d5>", line 12, in <module>
    D_real = discriminator(X, D_Q_shared_nets(X))
