In [None]:
# Import libraries

import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import seaborn as sns

sns.set()
from sklearn import mixture
from keras.datasets import mnist
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from probml_utils import savefig, latexify

np.random.seed(0)

In [None]:
latexify(fig_width=3, fig_height=2)

In [None]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()  # Loading the mnist dataset

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(
    x_train, y_train, test_size=0.97, stratify=y_train
)  # Splitting the data into train and test sets
print(X_train.shape)

In [None]:
# Function to plot 100 digits


def plot_digits(data):
    fig, ax = plt.subplots(10, 10, subplot_kw=dict(xticks=[], yticks=[]))
    fig.subplots_adjust(hspace=0.05, wspace=0.05)
    for i, axi in enumerate(ax.flat):
        im = axi.imshow(data[i].reshape(28, 28), cmap="binary")
        im.set_clim(0, 16)


plot_digits(X_train)

In [None]:
pca = PCA(0.87, whiten=True)  # Preserving 87% of information using PCA
data = pca.fit_transform(X_train.reshape(-1, 28 * 28))
data.shape

In [None]:
# Using AIC to find the optimum number of components needed for GMM Model
n_components = np.arange(50, 210, 10)
models = [mixture.GaussianMixture(n, covariance_type="full", random_state=0) for n in n_components]
aics = [model.fit(data).aic(data) for model in models]
plt.plot(n_components, aics);

In [None]:
# Fitting the GMM on transformed data
gmm = mixture.GaussianMixture(100, covariance_type="full", random_state=0)
gmm.fit(data)
print(gmm.converged_)

In [None]:
# sampling 100 samples from the generated samples
data_new = gmm.sample(100)
data_new[0].shape

In [None]:
digits_new = pca.inverse_transform(data_new[0])
plot_digits(digits_new)
savefig("generated_samples")