# Importing Libraries and Packages

In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from sklearn.model_selection import train_test_split
from keras.datasets import mnist

from scipy.stats import norm

import tensorflow as tf
import keras 
from keras import layers
from keras.models import Model
from keras import metrics
from keras import backend as K 

np.random.seed(237)
K.clear_session()

from sklearn.metrics import accuracy_score, log_loss, confusion_matrix, f1_score


from tensorflow.python.framework.ops import disable_eager_execution
from collections import defaultdict
import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go

disable_eager_execution()

# Importing Dataset

In [None]:
df_train = pd.read_csv('../input/digit-recognizer/train.csv')
df_test = pd.read_csv('../input/digit-recognizer/test.csv')

df_train.head()

In [None]:
df_train.shape
df_test.shape

# Data Pre-Processing

In [None]:
# Droping the labels column 
y_train = df_train['label']
df_train = df_train.drop(['label'], axis = 1)

In [None]:
# Normalizing and reshaping

df_train = df_train.values.reshape(-1, 28, 28, 1)/255.0
df_test = df_test.values.reshape(-1, 28, 28, 1)/255.0

In [None]:
# create an anomaly hold out group and then only train with the remaining digits
anom_mask = (y_train==9)
anomaly_test = df_train[anom_mask]
df_train = df_train[~anom_mask]
y_train = y_train[~anom_mask]
# make a test set the same size as the anomaly set
df_train, df_test, y_train, y_test = train_test_split(df_train, y_train, test_size=anomaly_test.shape[0], random_state=42)
print('Training Images', df_train.shape, 'Testing Images', df_test.shape, 'Anomaly Images', anomaly_test.shape)

# Visualizing the dataset

In [None]:
plt.figure(1)
plt.subplot(221)
plt.imshow(df_train[13][:,:,0])

plt.subplot(222)
plt.imshow(df_train[690][:,:,0])

plt.subplot(223)
plt.imshow(df_train[2375][:,:,0])

plt.subplot(224)
plt.imshow(df_train[4213][:,:,0])
plt.show()

# Model
In VAEs, **instead of encoding an input as a single point, we encode it as a distribution over the latent space**. The model is then trained as follows:
- The input is encoded as distribution over the latent space
- Then a point from the latent space is sampled from that distribution
- third, the sampled point is decoded and the reconstruction error can be computed
- finally, the reconstruction error is backpropagated through the network
<br> <p>
    
    
The training loss of VAE is defined as the sum of these the reconstruction loss and the similarity loss. The reconstruction error, is the mean squared loss of the input and reconstructed output. The similarity loss is the KL divergence between the latent space distribution and standard gaussian (zero mean and unit variance). The loss function is then the sum of these two losses.


The encoder has two output layers - 
- latent distribution mean
- variance

In [None]:
img_shape = (28, 28, 1)
batch_size = 16
latent_dim = 6                           ## no. of latent dimension parameters

## Encoder Architecture

In [None]:
encoder_inputs = keras.Input(shape = img_shape)

x = layers.Conv2D(32, 3, padding='same', activation='relu')(encoder_inputs)
x = layers.Conv2D(64, 3, padding='same', activation='relu', strides=(2, 2))(x)
x = layers.Conv2D(64, 3, padding='same', activation='relu')(x)
x = layers.Conv2D(64, 3, padding='same', activation='relu')(x)
# need to know the shape of the network here for the decoder

shape_before_flattening = K.int_shape(x)
x = layers.Flatten()(x)
x = layers.Dense(16, activation="relu")(x)

# Two outputs, latent mean and (log)variance
z_mu = layers.Dense(latent_dim)(x)
z_log_sigma = layers.Dense(latent_dim)(x)

## Creating a sampling layer

In [None]:
# sampling function
def sampling(args):
    z_mu, z_log_sigma = args
    epsilon = tf.keras.backend.random_normal(shape=(tf.keras.backend.shape(z_mu)[0], latent_dim),
                              mean=0., stddev=1.)
    return z_mu + tf.keras.backend.exp(z_log_sigma) * epsilon

# sample vector from the latent distribution
z = layers.Lambda(sampling)([z_mu, z_log_sigma])

## Decoder Architecture

In [None]:
# decoder takes the latent distribution sample as input
decoder_input = layers.Input(K.int_shape(z)[1:])

# Expand to 784 total pixels
x = layers.Dense(np.prod(shape_before_flattening[1:]), activation='relu')(decoder_input)

# reshape
x = layers.Reshape(shape_before_flattening[1:])(x)

# use Conv2DTranspose to reverse the conv layers from the encoder
x = layers.Conv2DTranspose(32, 3, padding='same', activation='relu', strides=(2, 2))(x)
x = layers.Conv2D(1, 3, padding='same', activation='sigmoid')(x)
decoder_z = keras.Model(decoder_input, x)

# apply the decoder to the sample from the latent distribution
decoder = decoder_z(z)

## Defining the loss

In [None]:
# construct a custom layer to calculate the loss
class CustomVariationalLayer(keras.layers.Layer):

    def vae_loss(self, x, decoder):
        x = tf.keras.backend.flatten(x)
        decoder = tf.keras.backend.flatten(decoder)
        # Reconstruction loss
        xent_loss = keras.metrics.binary_crossentropy(x, decoder)
        # KL divergence
        kl_loss = -5e-4 * K.mean(1 + z_log_sigma - K.square(z_mu) - K.exp(z_log_sigma), axis=-1)
        return K.mean(xent_loss + kl_loss)

    # adds the custom loss to the class
    def call(self, inputs):
        x = inputs[0]
        decoder = inputs[1]
        loss = self.vae_loss(x, decoder)
        self.add_loss(loss, inputs=inputs)
        return x

# apply the custom loss to the input images and the decoded latent distribution sample
y = CustomVariationalLayer()([encoder_inputs, decoder])

In [None]:
# VAE model statement
vae = Model(encoder_inputs, y)
vae.compile(optimizer='rmsprop', loss=None, metrics=["accuracy"], experimental_run_tf_function=False)
vae.summary()

In [None]:
vae.fit(x = df_train, y = None, shuffle = True, epochs=10, batch_size=batch_size)

In [None]:
encoder = Model(encoder_inputs, z_mu)
# display a 2D plot of the digit classes in the latent space
X_test_encoded = encoder.predict(df_test, batch_size=batch_size)
# X_test_encoded.shape
# Finding the Predicted Classes
# # cls_train = np.argmax(preds_train, axis = 1)
# cls_test = np.argmax(X_test_encoded, axis = 1)

# # Finding the Train/Test set Loss
# print("Log-loss for Train Dataset = ", log_loss(y_test, X_test_encoded))

In [None]:
# Translate into the latent space
encoder = Model(encoder_inputs, z_mu)
x_valid_noTest_encoded = encoder.predict(df_test, batch_size=batch_size)
plt.figure(figsize=(10, 10))
plt.scatter(x_valid_noTest_encoded[:, 0], x_valid_noTest_encoded[:, 1], c=y_test, cmap='icefire')
plt.colorbar()

plt.show()

# Dimensionality Reduction

In [None]:
from sklearn.decomposition import PCA
from matplotlib.cm import get_cmap
from sklearn.manifold import TSNE
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

In [None]:
df_train.shape
# reshape
X = df_test.reshape(df_test.shape[0], -1)
pca = PCA(n_components = 2).fit_transform(x_valid_noTest_encoded)

In [None]:
# plt.figure(figsize=(15, 10))
plt.scatter(pca[:, 0], pca[:, 1], c=y_test, cmap='icefire')
plt.colorbar()

plt.show()

In [None]:
# tsne = TSNE(n_components = 2).fit_transform(x_valid_noTest_encoded)
lda = LDA(n_components=2)

x_lda = lda.fit_transform(x_valid_noTest_encoded, y_test)

In [None]:
# plt.figure(figsize=(15, 10))
plt.scatter(x_lda[:, 0], x_lda[:, 1], c=y_test, cmap='icefire')
plt.colorbar()

plt.show()

In [None]:
# tsne = TSNE(n_components = 2)
# x_tsne = tsne.fit_transform(x_valid_noTest_encoded) 

In [None]:
from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters=10)
# Compute cluster centers and predict cluster indices
kmeans_pca = kmeans.fit_predict(pca)

plt.scatter(pca[:, 0], pca[:, 1],c = kmeans_pca, cmap = 'icefire')

plt.show()

plt.scatter(pca[:, 0], pca[:, 1],c = y_test, cmap = 'icefire')
plt.show()

In [None]:
kmeans = KMeans(n_clusters=10)
# Compute cluster centers and predict cluster indices
kmeans_9 = kmeans.fit_predict(x_lda)

plt.scatter(x_lda[:, 0], x_lda[:, 1],c = kmeans_9, cmap = 'icefire')

plt.show()

plt.scatter(x_lda[:, 0], x_lda[:, 1],c = y_test, cmap = 'icefire')
plt.show()

In [None]:
from sklearn.cluster import DBSCAN

DBSCAN_cluster = DBSCAN(eps=10, min_samples=5).fit(x_valid_noTest_encoded) 

In [None]:
tsne = TSNE(n_components=2, verbose = 1)
x_tsne = tsne.fit_transform(x_valid_noTest_encoded) 

Each of these colored clusters is a type of digit. Close clusters are digits that are structurally similar (i.e. digits that share information in the latent space).

In [None]:
kmeans = KMeans(n_clusters=10)
# Compute cluster centers and predict cluster indices
kmeans_tsne = kmeans.fit_predict(x_lda)

plt.scatter(x_tsne[:, 0], x_tsne[:, 1],c = kmeans_tsne, cmap = 'icefire')

plt.show()

plt.scatter(x_tsne[:, 0], x_tsne[:, 1],c = y_test, cmap = 'icefire')
plt.show()

[Reference 1](https://blog.keras.io/building-autoencoders-in-keras.html)

[Reference 2](https://stackoverflow.com/questions/65366442/cannot-convert-a-symbolic-keras-input-output-to-a-numpy-array-typeerror-when-usi)