In [9]:
import os
import sys

import pandas as pd
import numpy as np
import pickle
import unsupervised_learning_util as utl
import clustering_utl as cl_utl
import matplotlib as mpl
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPClassifier
from sklearn.decomposition import PCA, FastICA, KernelPCA
from sklearn.manifold import TSNE, LocallyLinearEmbedding, MDS, Isomap, SpectralEmbedding
from sklearn.cluster import KMeans
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import silhouette_score, silhouette_samples, homogeneity_completeness_v_measure
from sklearn.metrics import homogeneity_score, calinski_harabasz_score, davies_bouldin_score
from yellowbrick.cluster import KElbowVisualizer, SilhouetteVisualizer
from warnings import simplefilter
from sklearn.mixture import GaussianMixture

plt.tight_layout()
plt.style.use("ggplot")
mpl.rcParams['figure.figsize'] = [8, 6]
mpl.rcParams['figure.dpi'] = 200
mpl.rcParams['savefig.dpi'] = 500


NJOBS = 32
VERBOSE = 0

%matplotlib inline

utl.check_folder("Part4")

C:\Users\joshu\OneDrive - Georgia Institute of Technology\Georgia-Tech\CS 7641 - Machine Learning\Assignments\Unsupervised Learning and Dimensionality Reduction\Part4 folder already exists.


In [2]:
gathered_data = utl.setup(["MNIST"])
gathered_data_fashion = utl.setup(["Fashion-MNIST"])

mnist = {}
fashion_mnist = {}
# mnist_not_scaled = {}
# fashion_mnist_not_scaled = {}

mnist['train_X'], mnist['train_y'], \
mnist['valid_X'], mnist['valid_y'], \
mnist['test_X'], mnist['test_y'] = utl.split_data(gathered_data["MNIST"]["X"],
                                                  gathered_data["MNIST"]["y"], minMax=True)
# mnist_not_scaled['train_X'], mnist_not_scaled['train_y'], \
# mnist_not_scaled['valid_X'], mnist_not_scaled['valid_y'], \
# mnist_not_scaled['test_X'], mnist_not_scaled['test_y'] = utl.split_data(
#     gathered_data["MNIST"]["X"], gathered_data["MNIST"]["y"], scale=False)

fashion_mnist['train_X'], fashion_mnist['train_y'], \
fashion_mnist['valid_X'], fashion_mnist['valid_y'], \
fashion_mnist['test_X'], fashion_mnist['test_y'] = utl.split_data(gathered_data_fashion["Fashion-MNIST"]["X"],
                                                                  gathered_data_fashion["Fashion-MNIST"]["y"],
                                                                  minMax=True)

# fashion_mnist_not_scaled['train_X'], fashion_mnist_not_scaled['train_y'], \
# fashion_mnist_not_scaled['valid_X'], fashion_mnist_not_scaled['valid_y'], \
# fashion_mnist_not_scaled['test_X'], fashion_mnist_not_scaled['test_y'] = utl.split_data(
#     gathered_data_fashion["Fashion-MNIST"]["X"], gathered_data_fashion["Fashion-MNIST"]["y"], scale=False)

dataset folder already exists.
MNIST dataset found:
	Loading MNIST.feather
	Finished loading MNIST dataset
dataset folder already exists.
Fashion-MNIST dataset found:
	Loading Fashion-MNIST.feather
	Finished loading Fashion-MNIST dataset


# Neural Network on Dimensionality Reduced Datasets

## Fashion-MNIST Dataset

In [None]:
with open(f"{os.getcwd()}/DimensionalityReduction/PCA_Fashion_Reduced_Dataset.pkl", "rb") as input_file:
    pca_fashion_reduced_data = pickle.load(input_file)
    input_file.close()
print(pca_fashion_reduced_data.shape)

with open(f"{os.getcwd()}/DimensionalityReduction/ICA_Fashion_Reduced_Dataset.pkl", "rb") as input_file:
    ica_fashion_reduced_data = pickle.load(input_file)
    input_file.close()
print(ica_fashion_reduced_data.shape)

with open(f"{os.getcwd()}/DimensionalityReduction/RP_Fashion_Reduced_Dataset.pkl", "rb") as input_file:
    rp_fashion_reduced_data = pickle.load(input_file)
    input_file.close()
print(rp_fashion_reduced_data.shape)

with open(f"{os.getcwd()}/DimensionalityReduction/RF_Fashion_Reduced_Dataset.pkl", "rb") as input_file:
    rf_fashion_reduced_data = pickle.load(input_file)
    input_file.close()
print(rf_fashion_reduced_data.shape)

# Run Clustering on these datasets

In [None]:
cl_utl.run_kmeans(pca_fashion_reduced_data.iloc[:, :-1], 
                  pca_fashion_reduced_data.iloc[:, -1], 
                  max_clusters=30, dataset_name="PCA_Reduced_To_KMeans", 
                  save_name="PCA_Reduced_Then_Clustered")

In [None]:
model = KMeans()
plt.close("all")
fig, ax1 = plt.subplots()
visualizer = KElbowVisualizer(model, k=(2, 30), timings=False)
visualizer.fit(fashion_mnist["train_X"].iloc[:10000, :]).finalize()
elbow = visualizer.elbow_value_

In [None]:
cluster_model = KMeans(n_clusters=elbow)

In [None]:
clustered_data = cluster_model.fit_transform(fashion_mnist["train_X"].iloc[:10000, :])

In [None]:
kmeans_reduced_df = pd.DataFrame(clustered_data)

In [None]:
kmeans_reduced_df["Label"] = fashion_mnist["train_y"].iloc[:10000]

In [None]:
with open(f"{os.getcwd()}/Clustering/KMeans_Clustered_Reduced_Dataset.pkl", "wb") as output_file:
    pickle.dump(kmeans_reduced_df, output_file)
    output_file.close()

In [3]:
temp_gmm = GaussianMixture(n_components=15, n_init=10, covariance_type="full", warm_start=True, max_iter=500)

In [4]:
temp_gmm.fit(fashion_mnist["train_X"].iloc[:10000, :])

GaussianMixture(max_iter=500, n_components=15, n_init=10, warm_start=True)

In [27]:
reduced_data = temp_gmm.predict(fashion_mnist["train_X"].iloc[:10000, :])

In [29]:
temp_df = pd.DataFrame()
temp_df["Label"] = reduced_data

In [31]:
temp = pd.get_dummies(temp_df.Label)

In [33]:
temp["Label"] = fashion_mnist["train_y"].iloc[:10000]

In [35]:
temp

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,Label
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,8
1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,6
3,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,3
4,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,2
9996,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,4
9997,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,4
9998,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,8


In [36]:
with open(f"{os.getcwd()}/Clustering/Expecation_Maximization_Clustered_Reduced_Dataset.pkl", "wb") as output_file:
    pickle.dump(temp, output_file)
    output_file.close()