In [2]:
import sys
assert sys.version_info >= (3, 5)

In [3]:
import tensorflow as tf
from tensorflow import keras
assert tf.__version__ >= "2.0"

import numpy as np

In [4]:
import os

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   
os.environ["CUDA_VISIBLE_DEVICES"]="0"

#check os.environ ld_library_path is the same here as when I do it in python via terminal, if I get issues

#sometimes I can't select the GPU. In this case, try: https://forums.fast.ai/t/tip-limiting-tensorflow-to-one-gpu/1995

In [5]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [6]:
tf.__version__


'2.1.0'

In [5]:
K = keras.backend


In [6]:
mnist = keras.datasets.mnist
(X_train_full, y_train_full), (X_test,y_test) = mnist.load_data()

X_valid, X_train = X_train_full[:5000] / 255.0, X_train_full[5000:] / 255.0

y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

X_test = X_test / 255.0

#1000 labelled points in train set for this example

X_train_la = X_train[:1000]
X_train_un = X_train[1000:]

y_train_la = y_train[:1000]
y_train_un = np.empty((X_train_la.shape[0]))



# Encoder #

In [7]:
tf.random.set_seed(42)
np.random.seed(42)

class Sampling(keras.layers.Layer):
    def call(self, inputs):
        mean, log_var = inputs
        return K.random_normal(tf.shape(log_var)) * K.exp(log_var/2) + mean

codings_size = 50

inputs = keras.layers.Input(shape=[28, 28])
z = keras.layers.Flatten()(inputs)
z = keras.layers.Dense(600, activation="softplus")(z)
z = keras.layers.Dense(300, activation="softplus")(z)
codings_mean = keras.layers.Dense(codings_size)(z)
codings_log_var = keras.layers.Dense(codings_size)(z)
codings = Sampling()([codings_mean, codings_log_var])
variational_encoder = keras.models.Model(
    inputs=[inputs], outputs=[codings_mean, codings_log_var, codings,z])

# Decoder #

In [8]:
decoder_inputs = keras.layers.Input(shape=[codings_size])
x = keras.layers.Dense(300, activation="softplus")(decoder_inputs)
x = keras.layers.Dense(600, activation="softplus")(x)
x = keras.layers.Dense(28 * 28, activation="sigmoid")(x)
outputs = keras.layers.Reshape([28, 28])(x)
variational_decoder = keras.models.Model(inputs=[decoder_inputs], outputs=[outputs])

# VAE # 

In [9]:
_, _, codings,last_hidden_layer = variational_encoder(inputs)
reconstructions = variational_decoder(codings)
variational_ae = keras.models.Model(inputs=[inputs], outputs=[reconstructions])

latent_loss = -0.5 * K.sum(
    1 + codings_log_var - K.exp(codings_log_var) - K.square(codings_mean),
    axis=-1)
variational_ae.add_loss(K.mean(latent_loss) / 784)
#question on how loss is computed too..
variational_ae.compile(loss="binary_crossentropy", optimizer="rmsprop")

In [10]:


early_stopping_cb = keras.callbacks.EarlyStopping(patience=10,restore_best_weights=True)

history = variational_ae.fit(X_train, X_train, epochs=200, batch_size=32,
                             validation_data=(X_valid, X_valid),  callbacks=[early_stopping_cb])

Train on 55000 samples, validate on 5000 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200


# SVM Classifier #

VAE is trained on entire set, and then SVM is trained on the coding of the labelled data.

In [10]:
codings_mean,_,_ = variational_encoder.predict(X_train_la)

from sklearn.svm import SVC

svc_class = SVC(kernel="rbf", gamma='scale', random_state=42, C=1000)
svc_class.fit(codings_mean, y_train_la)
svc_class.score(codings_mean, y_train_la)


1.0

In [11]:
codings_mean_2,_,_ = variational_encoder.predict(X_test)



In [12]:
svc_class.score(codings_mean_2, y_test)


0.9118

Ah, far better results when I use the mean of the codings :)

How about when I use both the mean and the var?

In [13]:
codings_mean,codings_var,_ = variational_encoder.predict(X_train_la)


In [28]:
train = np.hstack([codings_mean,codings_var])

In [29]:
svc_class = SVC(kernel="rbf", gamma='scale', random_state=42, C=1000)
svc_class.fit(train, y_train_la)
svc_class.score(train, y_train_la)

1.0

In [30]:
codings_mean2,codings_var2,_ = variational_encoder.predict(X_test)
test = np.hstack([codings_mean2,codings_var2])

In [1]:
svc_class.score(train, y_train_la)

NameError: name 'svc_class' is not defined

In [None]:
codings_mean2,codings_var2,_ = variational_encoder.predict(X_test)
test = np.hstack([codings_mean2,codings_var2])

svc_class.score(test, y_test)

Score of 0.8968. A bit worse. 

How about if we use the last hidden layer?

In [11]:
_,_,_,last_hidden_layer = variational_encoder.predict(X_train_la)

from sklearn.svm import SVC

svc_class = SVC(kernel="rbf", gamma='scale', random_state=42, C=1000)
svc_class.fit(last_hidden_layer, y_train_la)
svc_class.score(last_hidden_layer, y_train_la)


1.0

In [14]:
_,_,_,last_hidden_layer2 = variational_encoder.predict(X_test)
svc_class.score(last_hidden_layer2, y_test)

0.9082

Good, not as good as the means though.