In [1]:
% pylab inline
from numpy import linalg as LA
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import glob
from tqdm import tqdm
import os
import sklearn.preprocessing as prep

def min_max_scale(X):
    preprocessor = prep.MinMaxScaler().fit(X)
    X_scaled = preprocessor.transform(X)
    return X_scaled

Populating the interactive namespace from numpy and matplotlib


In [2]:
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data

In [3]:
normal_patches_dir = '/Z/personal-folders/interns/saket/histopath_data/CAMELYON16_patches/normal_patches_test/level_0/'
tumor_patches_dir = '/Z/personal-folders/interns/saket/histopath_data/CAMELYON16_patches/tumor_patches_test/level_0/'

np.random.seed(42)
master_matrix = []
label_matrix = []
y = []
list_of_tumor_files = list(glob.glob('{}*.png'.format(tumor_patches_dir)))
list_of_tumor_files = np.random.choice(list_of_tumor_files, 10000)
for f in tqdm(list_of_tumor_files):
    #standardized = (tf.clip_by_value(imread(f),0, 255)/127.5-1).reshape(-1, 256*256*3)
    #standardized = (np.clip(imread(f),0, 255)/127.5-1).reshape(-1, 256*256*3)
    standardized = (imread(f)/255.0).reshape(-1, 256*256*3)
    master_matrix.append(standardized)
    label_matrix.append('tumor')
    y.append(1)

tumor_count = len(label_matrix)

list_of_normal_files = list(glob.glob('{}*.png'.format(normal_patches_dir)))
list_of_normal_files = np.random.choice(list_of_normal_files, 10000)
#np.random.shuffle(list_of_normal_files)

for f in tqdm(list_of_normal_files):   
    #standardized = (tf.clip_by_value(imread(f),0, 255)/127.5-1).reshape(-1, 256*256*3)
    #standardized = (np.clip(imread(f),0, 255)/127.5-1).reshape(-1, 256*256*3)
    standardized = (imread(f)/255.0).reshape(-1, 256*256*3)
    master_matrix.append(standardized)
    label_matrix.append('normal')
    y.append(0)
    
master_matrix = np.array(master_matrix)
y=np.array(y)

100%|██████████| 10000/10000 [06:30<00:00, 25.63it/s]
100%|██████████| 10000/10000 [05:51<00:00, 28.48it/s]


In [4]:
standardized

array([[0.00379854, 0.00313725, 0.00344483, ..., 0.00118416, 0.00075356,
        0.0013687 ]], dtype=float32)

In [5]:
master_matrix.shape

(20000, 1, 196608)

In [6]:
#train_data = np.asarray(np.array(master_matrix, axis=np.newaxis), dtype=np.float32)
train_data = master_matrix
input_dim = train_data[0].shape

In [7]:
config = tf.ConfigProto(
    device_count = {'GPU': 2}
)
config.gpu_options.allocator_type = 'BFC'
#config

IMAGE_WIDTH = 256
IMAGE_HEIGHT = 256
IMAGE_CHANNELS = 3

class VAE(object):
    def __init__(self, input_dim, 
                 learning_rate=0.001, 
                 n_latent=8, batch_size=50):
        self.learning_rate = learning_rate
        self.n_latent = n_latent
        self.batch_size = batch_size
        self.input_dim = input_dim
        
        self._build_network()
        self._create_loss_optimizer()

        
        init = tf.global_variables_initializer()        
        #init = tf.initialize_all_variables()
        # Launch the session
        self.session = tf.InteractiveSession(config=config)
        self.session.run(init)
        self.saver = tf.train.Saver(tf.all_variables())
        
    
    def _build_network(self):        
        self.x = tf.placeholder(tf.float32, [None, self.input_dim])
        dense1 = tf.layers.dense(activation=tf.nn.elu, inputs=self.x, units=256)
        dense2 = tf.layers.dense(activation=tf.nn.elu, inputs=dense1, units=256)
        dense3 = tf.layers.dense(activation=tf.nn.elu, inputs=dense2, units=256)
        dense4 = tf.layers.dense(activation=None, inputs=dense3, units=self.n_latent * 2)
        self.mu = dense4[:, :self.n_latent]
        self.sigma = tf.nn.softplus(dense4[:, self.n_latent:])
        eps = tf.random_normal(shape=tf.shape(self.sigma),
                               mean=0, stddev=1, dtype=tf.float32)
        self.z = self.mu + self.sigma * eps
        
        ddense1 = tf.layers.dense(activation=tf.nn.elu, inputs=self.z, units=256)
        ddense2 = tf.layers.dense(activation=tf.nn.elu, inputs=ddense1, units=256)
        ddense3 = tf.layers.dense(activation=tf.nn.elu, inputs=ddense2, units=256)

        self.reconstructed = tf.layers.dense(activation=tf.nn.sigmoid, inputs=ddense3,
                                            units=self.input_dim)
    
    def _create_loss_optimizer(self):
        #self.reconstruction_loss = tf.reduce_mean(tf.losses.sparse_softmax_cross_entropy(labels=self.x,
        #                                                                  logits=self.reconstructed))
        epsilon = 1e-10
        reconstruction_loss = -tf.reduce_sum(
            self.x * tf.log(epsilon+self.reconstructed) + (1-self.x) * tf.log(epsilon+1-self.reconstructed), 
            axis=1
        )
        
        self.reconstruction_loss = tf.reduce_mean(reconstruction_loss) / self.batch_size
        
        latent_loss = -0.5 * tf.reduce_sum(1 + tf.log(epsilon+self.sigma) - tf.square(self.mu) - tf.square(self.sigma),
                                           axis=1)
        latent_loss = tf.reduce_mean(latent_loss) / self.batch_size
        self.latent_loss = latent_loss
        self.cost = tf.reduce_mean(self.reconstruction_loss + self.latent_loss)
        # ADAM optimizer
        self.optimizer = \
            tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.cost)         
    
    
    def fit_minibatch(self, batch):
        _, cost, reconstruction_loss, latent_loss = self.session.run([self.optimizer,
                                                                            self.cost,
                                                                            self.reconstruction_loss,
                                                                            self.latent_loss], 
                                                                           feed_dict = {self.x: batch})
        return  cost, reconstruction_loss, latent_loss
    
    def reconstruct(self, x):
        return self.session.run([self.reconstructed], feed_dict={self.x: x})
    
    def decoder(self, z):
        return self.session.run([self.reconstructed], feed_dict={self.z: z})
    
    def encoder(self, x):
        return self.session.run([self.z], feed_dict={self.x: x})

    def save_model(self, checkpoint_path, epoch):
        self.saver.save(self.session, checkpoint_path, global_step = epoch)

    def load_model(self, checkpoint_path):
        ckpt = tf.train.get_checkpoint_state(checkpoint_path)
        print('loading model: {}'.format(ckpt.model_checkpoint_path))
        self.saver.restore(self.session, checkpoint_path+'/'+ckpt.model_checkpoint_path)
    

In [8]:
def trainer(data, input_dim,
            learning_rate=1e-3, batch_size=100,
            num_epoch=50, n_latent=10, checkpoint_dir='/tmp/vae_checkpoint'):
    os.makedirs(checkpoint_dir, exist_ok=True)
    model = VAE(input_dim=input_dim,
                learning_rate=learning_rate,
                n_latent=n_latent,
                batch_size=batch_size)
    total_losses = []
    reconstruction_losses = []
    latent_losses = []

    for epoch in range(num_epoch):
        #for (batch, labels) in iter.get_next():            
        #    print(batch)
        for iter in range(num_sample // batch_size):
            batch = data[iter*batch_size: min((iter+1)*batch_size, data.shape[0]),]
            input_batch = batch[0]
            #input_batch = tf.reshape(input_batch, (-1, 256*256*3))
            #input_batch = np.asarray(input_batch, dtype=np.float32).reshape(-1, 256*256*3)
            #print(input_batch.shape)
            total_loss, reconstruction_loss, latent_loss = model.fit_minibatch(input_batch)
        latent_losses.append(latent_loss)
        reconstruction_losses.append(reconstruction_loss)
        total_losses.append(total_loss)
        
        if epoch % 5 == 0:
            print('[Epoch {}] Loss: {}, Recon loss: {}, Latent loss: {}'.format(
                epoch, total_loss, reconstruction_loss, latent_loss))
            checkpoint_path = os.path.join(checkpoint_dir, 'model.ckpt')
            model.save_model(checkpoint_path, epoch)
            print ("model saved to {}".format(checkpoint_path))
            
    print('Done!')
    return model, reconstruction_losses, latent_losses,  total_losses

In [9]:
input_dim

(1, 196608)

In [10]:
train_data.shape

(20000, 1, 196608)

In [11]:
train_data[0].dtype

dtype('float32')

In [12]:
input_dim

(1, 196608)

In [None]:



#input_dims = input_dim[0]*input_dim[1]*input_dim[2]
input_dims = input_dim[1]
num_sample = train_data.shape[0]
model, reconstruction_losses, latent_losses,  total_losses = trainer(train_data, input_dims,
                learning_rate=1e-4,  batch_size=32,
                num_epoch=1000, n_latent=10, 
                checkpoint_dir='/Z/personal-folders/interns/saket/vae_checkpoint_histoapath_2000')
    

Instructions for updating:
Please use tf.global_variables instead.
[Epoch 0] Loss: 144.99713134765625, Recon loss: 141.99961853027344, Latent loss: 2.9975156784057617
model saved to /Z/personal-folders/interns/saket/vae_checkpoint_histoapath_2000/model.ckpt


In [None]:
# Test the trained model: generation
%pylab inline
# Sample noise vectors from N(0, 1)
z = np.random.normal(size=[model.batch_size, model.n_latent])
x_generated = model.decoder(z)[0]

w = h = 256
n = np.sqrt(model.batch_size).astype(np.int32)
I_generated = np.empty((h*n, w*n, 3))

for i in range(n):
    for j in range(n):
        I_generated[i*h:(i+1)*h, j*w:(j+1)*w, :] = x_generated[i*n+j, :].reshape(256, 256, 3)

plt.figure(figsize=(8, 8))
plt.imshow(I_generated)# cmap='gray')


In [None]:
I_generated

In [None]:
x_sample = mnist.test.next_batch(100)[0]
x_reconstruct = model.reconstruct(x_sample)

plt.figure(figsize=(8, 12))
for i in range(7):

    plt.subplot(7, 2, 2*i + 1)
    plt.imshow(x_sample[i].reshape(28, 28), vmin=0, vmax=1, cmap="gray")
    plt.title("Test input")
    plt.colorbar()
    plt.subplot(7, 2, 2*i + 2)
    plt.imshow(x_reconstruct[0][i].reshape(28, 28), vmin=0, vmax=1, cmap="gray")
    plt.title("Reconstruction")
    plt.colorbar()
plt.tight_layout()

In [None]:
z_mu = vae.encoder(x_sample)[0]
plt.figure(figsize=(8, 6)) 
plt.scatter(z_mu[:, 0], z_mu[:, 1], c=np.argmax(y_sample, 1))
plt.colorbar()
plt.grid()


In [None]:
import tensorflow as tf
config = tf.ConfigProto(
    device_count = {'GPU': 0}
)
const_init_node = tf.constant_initializer(0.)
count_variable = tf.get_variable("count", [], initializer=const_init_node)

init = tf.global_variables_initializer()
sess = tf.Session(config=config)
sess.run(init)

print(sess.run([count_variable]))