In [1]:
import os
import sys

import os
import pickle

import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
color = sns.color_palette()

import unsupervised_learning_util as utl
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.feature_selection import SelectFromModel
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.random_projection import GaussianRandomProjection
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA, FastICA
from sklearn.manifold import TSNE

plt.tight_layout()
plt.style.use("ggplot")
mpl.rcParams['figure.figsize'] = [8, 6]
mpl.rcParams['figure.dpi'] = 200
mpl.rcParams['savefig.dpi'] = 500


NJOBS = 32
VERBOSE = 0
limit = 5000

folder = "DimensionalityReduction/"
utl.check_folder(folder)

%matplotlib inline

C:\Users\joshu\OneDrive - Georgia Institute of Technology\Georgia-Tech\CS 7641 - Machine Learning\Assignments\Unsupervised Learning and Dimensionality Reduction\DimensionalityReduction/ folder already exists.


# Load Data

In [2]:
gathered_data = utl.setup(["MNIST"])
gathered_data_fashion = utl.setup(["Fashion-MNIST"])

mnist = {}
fashion_mnist = {}
mnist_not_scaled = {}
fashion_mnist_not_scaled = {}

mnist['train_X'], mnist['train_y'], \
mnist['valid_X'], mnist['valid_y'], \
mnist['test_X'], mnist['test_y'] = utl.split_data(gathered_data["MNIST"]["X"],
                                                  gathered_data["MNIST"]["y"], minMax=True)
mnist_not_scaled['train_X'], mnist_not_scaled['train_y'], \
mnist_not_scaled['valid_X'], mnist_not_scaled['valid_y'], \
mnist_not_scaled['test_X'], mnist_not_scaled['test_y'] = utl.split_data(
    gathered_data["MNIST"]["X"], gathered_data["MNIST"]["y"], scale=False)

fashion_mnist['train_X'], fashion_mnist['train_y'], \
fashion_mnist['valid_X'], fashion_mnist['valid_y'], \
fashion_mnist['test_X'], fashion_mnist['test_y'] = utl.split_data(gathered_data_fashion["Fashion-MNIST"]["X"],
                                                                  gathered_data_fashion["Fashion-MNIST"]["y"],
                                                                  minMax=True)

fashion_mnist_not_scaled['train_X'], fashion_mnist_not_scaled['train_y'], \
fashion_mnist_not_scaled['valid_X'], fashion_mnist_not_scaled['valid_y'], \
fashion_mnist_not_scaled['test_X'], fashion_mnist_not_scaled['test_y'] = utl.split_data(
    gathered_data_fashion["Fashion-MNIST"]["X"], gathered_data_fashion["Fashion-MNIST"]["y"], scale=False)

dataset folder already exists.
MNIST dataset found:
	Loading MNIST.feather
	Finished loading MNIST dataset
dataset folder already exists.
Fashion-MNIST dataset found:
	Loading Fashion-MNIST.feather
	Finished loading Fashion-MNIST dataset


# Only Run if you need to recreate the reduced datasets without going through the optimization processes

In [3]:
def combine_and_save(X, y, dataset_name, algorithm_name):
    X.reset_index(inplace=True, drop=True)
    X["Label"] = y
    with open(f"{os.getcwd()}/DimensionalityReduction/{algorithm_name}_{dataset_name}_Reduced_Dataset.pkl", "wb") as output_file:
        pickle.dump(X, output_file)
        output_file.close()

In [4]:
limit=10000
# PCA
pca_mnist = PCA(n_components=326).fit_transform(mnist['train_X'].iloc[:limit, :])
pca_fashion = PCA(n_components=445).fit_transform(fashion_mnist['train_X'].iloc[:limit, :])

In [5]:
combine_and_save(X=pd.DataFrame(pca_mnist), y=mnist['train_y'].iloc[:limit], dataset_name="MNIST", algorithm_name="PCA")
combine_and_save(X=pd.DataFrame(pca_fashion), y=fashion_mnist['train_y'].iloc[:limit], dataset_name="Fashion", algorithm_name="PCA")

In [6]:
# ICA
ica_mnist = FastICA(n_components=284, whiten=True, max_iter=400).fit_transform(mnist['train_X'].iloc[:limit, :])
ica_fashion = FastICA(n_components=412, whiten=True, max_iter=400).fit_transform(fashion_mnist['train_X'].iloc[:limit, :])



In [7]:
combine_and_save(X=pd.DataFrame(ica_mnist), y=mnist['train_y'].iloc[:limit], dataset_name="MNIST", algorithm_name="ICA")
combine_and_save(X=pd.DataFrame(ica_fashion), y=fashion_mnist['train_y'].iloc[:limit], dataset_name="Fashion", algorithm_name="ICA")

In [8]:
# RP
rp_mnist = GaussianRandomProjection(n_components=715).fit_transform(mnist['train_X'].iloc[:limit, :])
rp_fashion = GaussianRandomProjection(n_components=746).fit_transform(fashion_mnist['train_X'].iloc[:limit, :])

In [9]:
combine_and_save(X=pd.DataFrame(rp_mnist), y=mnist['train_y'].iloc[:limit], dataset_name="MNIST", algorithm_name="RP")
combine_and_save(X=pd.DataFrame(rp_fashion), y=fashion_mnist['train_y'].iloc[:limit], dataset_name="Fashion", algorithm_name="RP")

In [10]:
# RF
rf_mnist = RandomForestClassifier(n_estimators=1000, n_jobs=-1, verbose=10, warm_start=True)
X = mnist['train_X'].iloc[:limit, :]
y = mnist['train_y'].iloc[:limit]
rf_mnist.fit(X, y)
rf_mnist_df = pd.DataFrame(columns=["Feature_Importance"], data=rf_mnist.feature_importances_)
rf_mnist_df.sort_values(by=["Feature_Importance"], ascending=False, inplace=True)
rf_mnist_feature_index = rf_mnist_df.copy().index
mnist_idx = rf_mnist_feature_index.to_numpy()
mnist_idx = mnist_idx[:152]
mnist_df = X.iloc[:limit, mnist_idx]
result_X = pd.DataFrame(mnist_df)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 36 concurrent workers.


building tree 1 of 1000building tree 2 of 1000
building tree 3 of 1000
building tree 4 of 1000

building tree 5 of 1000
building tree 6 of 1000building tree 7 of 1000

building tree 8 of 1000
building tree 9 of 1000
building tree 10 of 1000
building tree 11 of 1000
building tree 12 of 1000
building tree 13 of 1000
building tree 14 of 1000
building tree 15 of 1000
building tree 16 of 1000
building tree 17 of 1000
building tree 18 of 1000building tree 19 of 1000

building tree 20 of 1000
building tree 21 of 1000
building tree 22 of 1000
building tree 23 of 1000
building tree 24 of 1000building tree 25 of 1000

building tree 26 of 1000building tree 27 of 1000
building tree 28 of 1000building tree 29 of 1000
building tree 30 of 1000
building tree 31 of 1000

building tree 32 of 1000

building tree 33 of 1000
building tree 34 of 1000
building tree 35 of 1000
building tree 36 of 1000
building tree 37 of 1000
building tree 38 of 1000
building tree 39 of 1000
building tree 40 of 1000
building 

[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done  41 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done  73 tasks      | elapsed:    0.4s



building tree 81 of 1000building tree 82 of 1000building tree 83 of 1000


building tree 84 of 1000building tree 85 of 1000building tree 86 of 1000building tree 87 of 1000
building tree 88 of 1000

building tree 89 of 1000building tree 90 of 1000building tree 91 of 1000


building tree 92 of 1000building tree 93 of 1000
building tree 94 of 1000building tree 95 of 1000building tree 96 of 1000

building tree 97 of 1000building tree 98 of 1000



building tree 99 of 1000

building tree 100 of 1000
building tree 101 of 1000building tree 102 of 1000
building tree 103 of 1000
building tree 104 of 1000building tree 105 of 1000


building tree 106 of 1000building tree 107 of 1000


building tree 108 of 1000
building tree 109 of 1000
building tree 110 of 1000
building tree 111 of 1000
building tree 112 of 1000
building tree 113 of 1000
building tree 114 of 1000building tree 115 of 1000

building tree 116 of 1000
building tree 117 of 1000
building tree 118 of 1000
building tree 119 of 1000build

[Parallel(n_jobs=-1)]: Done  90 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 109 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:    0.6s


building tree 146 of 1000
building tree 147 of 1000
building tree 148 of 1000
building tree 149 of 1000
building tree 150 of 1000
building tree 151 of 1000
building tree 152 of 1000building tree 153 of 1000

building tree 154 of 1000building tree 155 of 1000

building tree 156 of 1000building tree 157 of 1000

building tree 158 of 1000building tree 159 of 1000building tree 160 of 1000
building tree 161 of 1000building tree 162 of 1000



building tree 163 of 1000building tree 164 of 1000building tree 165 of 1000building tree 166 of 1000


building tree 167 of 1000building tree 168 of 1000
building tree 169 of 1000building tree 170 of 1000


building tree 171 of 1000building tree 172 of 1000
building tree 173 of 1000

building tree 174 of 1000building tree 175 of 1000
building tree 176 of 1000

building tree 177 of 1000

building tree 178 of 1000
building tree 179 of 1000
building tree 180 of 1000
building tree 181 of 1000
building tree 182 of 1000
building tree 183 of 1000
building tre

[Parallel(n_jobs=-1)]: Done 149 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 170 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done 193 tasks      | elapsed:    0.8s
[Parallel(n_jobs=-1)]: Done 216 tasks      | elapsed:    0.8s



building tree 221 of 1000
building tree 222 of 1000
building tree 223 of 1000building tree 224 of 1000

building tree 225 of 1000building tree 226 of 1000

building tree 227 of 1000
building tree 228 of 1000
building tree 229 of 1000building tree 230 of 1000
building tree 231 of 1000
building tree 232 of 1000building tree 233 of 1000building tree 234 of 1000building tree 235 of 1000
building tree 236 of 1000
building tree 237 of 1000
building tree 238 of 1000

building tree 239 of 1000building tree 240 of 1000
building tree 241 of 1000

building tree 242 of 1000
building tree 243 of 1000building tree 244 of 1000
building tree 245 of 1000




building tree 246 of 1000building tree 247 of 1000building tree 248 of 1000


building tree 249 of 1000building tree 250 of 1000building tree 251 of 1000


building tree 252 of 1000building tree 253 of 1000
building tree 254 of 1000
building tree 255 of 1000

building tree 256 of 1000
building tree 257 of 1000
building tree 258 of 1000
building tr

[Parallel(n_jobs=-1)]: Done 241 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done 266 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-1)]: Done 293 tasks      | elapsed:    1.0s


building tree 294 of 1000
building tree 295 of 1000
building tree 296 of 1000
building tree 297 of 1000
building tree 298 of 1000
building tree 299 of 1000building tree 300 of 1000
building tree 301 of 1000

building tree 302 of 1000building tree 303 of 1000

building tree 304 of 1000
building tree 305 of 1000building tree 306 of 1000

building tree 307 of 1000
building tree 308 of 1000building tree 309 of 1000
building tree 310 of 1000

building tree 311 of 1000building tree 312 of 1000

building tree 313 of 1000building tree 314 of 1000building tree 315 of 1000building tree 316 of 1000building tree 317 of 1000building tree 318 of 1000

building tree 319 of 1000building tree 320 of 1000building tree 321 of 1000building tree 322 of 1000
building tree 323 of 1000





building tree 324 of 1000

building tree 325 of 1000building tree 326 of 1000building tree 327 of 1000

building tree 328 of 1000


building tree 329 of 1000
building tree 330 of 1000
building tree 331 of 1000
building tre

[Parallel(n_jobs=-1)]: Done 320 tasks      | elapsed:    1.1s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:    1.2s



building tree 368 of 1000
building tree 369 of 1000
building tree 370 of 1000
building tree 371 of 1000
building tree 372 of 1000
building tree 373 of 1000building tree 374 of 1000
building tree 375 of 1000

building tree 376 of 1000building tree 377 of 1000
building tree 378 of 1000
building tree 379 of 1000building tree 380 of 1000

building tree 381 of 1000

building tree 382 of 1000building tree 383 of 1000

building tree 384 of 1000building tree 385 of 1000building tree 386 of 1000


building tree 387 of 1000building tree 388 of 1000building tree 389 of 1000

building tree 390 of 1000building tree 391 of 1000

building tree 392 of 1000building tree 393 of 1000

building tree 394 of 1000building tree 395 of 1000
building tree 396 of 1000
building tree 397 of 1000building tree 398 of 1000



building tree 399 of 1000
building tree 400 of 1000
building tree 401 of 1000building tree 402 of 1000

building tree 403 of 1000
building tree 404 of 1000
building tree 405 of 1000
building tr

[Parallel(n_jobs=-1)]: Done 378 tasks      | elapsed:    1.3s
[Parallel(n_jobs=-1)]: Done 409 tasks      | elapsed:    1.4s
[Parallel(n_jobs=-1)]: Done 440 tasks      | elapsed:    1.4s



building tree 442 of 1000
building tree 443 of 1000
building tree 444 of 1000
building tree 445 of 1000
building tree 446 of 1000
building tree 447 of 1000
building tree 448 of 1000building tree 449 of 1000building tree 450 of 1000building tree 451 of 1000



building tree 452 of 1000
building tree 453 of 1000building tree 454 of 1000building tree 455 of 1000
building tree 456 of 1000building tree 457 of 1000

building tree 458 of 1000building tree 459 of 1000

building tree 460 of 1000building tree 461 of 1000


building tree 462 of 1000

building tree 463 of 1000building tree 464 of 1000building tree 465 of 1000building tree 466 of 1000


building tree 467 of 1000building tree 468 of 1000building tree 469 of 1000building tree 470 of 1000building tree 471 of 1000




building tree 472 of 1000building tree 473 of 1000


building tree 474 of 1000
building tree 475 of 1000
building tree 476 of 1000
building tree 477 of 1000
building tree 478 of 1000
building tree 479 of 1000
building tr

[Parallel(n_jobs=-1)]: Done 473 tasks      | elapsed:    1.5s
[Parallel(n_jobs=-1)]: Done 506 tasks      | elapsed:    1.6s


building tree 513 of 1000
building tree 514 of 1000
building tree 515 of 1000
building tree 516 of 1000building tree 517 of 1000

building tree 518 of 1000building tree 519 of 1000

building tree 520 of 1000building tree 521 of 1000building tree 522 of 1000
building tree 523 of 1000

building tree 524 of 1000

building tree 525 of 1000building tree 526 of 1000building tree 527 of 1000
building tree 528 of 1000


building tree 529 of 1000building tree 530 of 1000building tree 531 of 1000building tree 532 of 1000

building tree 533 of 1000


building tree 534 of 1000building tree 535 of 1000building tree 536 of 1000
building tree 537 of 1000building tree 538 of 1000
building tree 539 of 1000building tree 540 of 1000
building tree 541 of 1000

building tree 542 of 1000

building tree 543 of 1000building tree 544 of 1000
building tree 545 of 1000


building tree 546 of 1000building tree 547 of 1000

building tree 548 of 1000building tree 549 of 1000
building tree 550 of 1000


building tre

[Parallel(n_jobs=-1)]: Done 541 tasks      | elapsed:    1.7s
[Parallel(n_jobs=-1)]: Done 576 tasks      | elapsed:    1.8s



building tree 627 of 1000
building tree 628 of 1000
building tree 629 of 1000building tree 630 of 1000
building tree 631 of 1000
building tree 632 of 1000
building tree 633 of 1000building tree 634 of 1000


building tree 635 of 1000
building tree 636 of 1000building tree 637 of 1000building tree 638 of 1000building tree 639 of 1000


building tree 640 of 1000

building tree 641 of 1000
building tree 642 of 1000
building tree 643 of 1000building tree 644 of 1000building tree 645 of 1000


building tree 646 of 1000building tree 647 of 1000building tree 648 of 1000building tree 649 of 1000building tree 650 of 1000



building tree 651 of 1000
building tree 652 of 1000
building tree 653 of 1000
building tree 654 of 1000
building tree 655 of 1000

building tree 656 of 1000
building tree 657 of 1000building tree 658 of 1000

building tree 659 of 1000
building tree 660 of 1000
building tree 661 of 1000
building tree 662 of 1000
building tree 663 of 1000
building tree 664 of 1000
building tr

[Parallel(n_jobs=-1)]: Done 613 tasks      | elapsed:    1.9s
[Parallel(n_jobs=-1)]: Done 650 tasks      | elapsed:    2.0s


building tree 698 of 1000
building tree 699 of 1000
building tree 700 of 1000
building tree 701 of 1000
building tree 702 of 1000
building tree 703 of 1000building tree 704 of 1000building tree 705 of 1000building tree 706 of 1000



building tree 707 of 1000
building tree 708 of 1000building tree 709 of 1000building tree 710 of 1000building tree 711 of 1000



building tree 712 of 1000building tree 713 of 1000
building tree 714 of 1000
building tree 715 of 1000building tree 716 of 1000building tree 717 of 1000



building tree 718 of 1000building tree 719 of 1000
building tree 720 of 1000building tree 721 of 1000
building tree 722 of 1000
building tree 723 of 1000building tree 724 of 1000
building tree 725 of 1000building tree 726 of 1000building tree 727 of 1000



building tree 728 of 1000

building tree 729 of 1000
building tree 730 of 1000building tree 731 of 1000building tree 732 of 1000



building tree 733 of 1000
building tree 734 of 1000
building tree 735 of 1000
building tre

[Parallel(n_jobs=-1)]: Done 689 tasks      | elapsed:    2.2s
[Parallel(n_jobs=-1)]: Done 728 tasks      | elapsed:    2.3s


building tree 772 of 1000
building tree 773 of 1000
building tree 774 of 1000
building tree 775 of 1000building tree 776 of 1000building tree 777 of 1000
building tree 778 of 1000

building tree 779 of 1000building tree 780 of 1000building tree 781 of 1000building tree 782 of 1000building tree 783 of 1000





building tree 784 of 1000building tree 785 of 1000
building tree 786 of 1000building tree 787 of 1000


building tree 788 of 1000building tree 789 of 1000building tree 790 of 1000
building tree 791 of 1000

building tree 792 of 1000building tree 793 of 1000building tree 794 of 1000
building tree 795 of 1000building tree 796 of 1000


building tree 797 of 1000
building tree 798 of 1000
building tree 799 of 1000

building tree 800 of 1000building tree 801 of 1000

building tree 802 of 1000
building tree 803 of 1000building tree 804 of 1000


building tree 805 of 1000
building tree 806 of 1000
building tree 807 of 1000
building tree 808 of 1000building tree 809 of 1000

building tre

[Parallel(n_jobs=-1)]: Done 769 tasks      | elapsed:    2.4s


building tree 844 of 1000
building tree 845 of 1000building tree 846 of 1000

building tree 847 of 1000
building tree 848 of 1000
building tree 849 of 1000
building tree 850 of 1000building tree 851 of 1000building tree 852 of 1000
building tree 853 of 1000

building tree 854 of 1000
building tree 855 of 1000building tree 856 of 1000building tree 857 of 1000building tree 858 of 1000


building tree 859 of 1000building tree 860 of 1000


building tree 861 of 1000building tree 862 of 1000building tree 863 of 1000



building tree 864 of 1000building tree 865 of 1000building tree 866 of 1000
building tree 867 of 1000building tree 868 of 1000
building tree 869 of 1000

building tree 870 of 1000building tree 871 of 1000
building tree 872 of 1000building tree 873 of 1000


building tree 874 of 1000building tree 875 of 1000



building tree 876 of 1000
building tree 877 of 1000building tree 878 of 1000

building tree 879 of 1000
building tree 880 of 1000
building tree 881 of 1000
building tre

[Parallel(n_jobs=-1)]: Done 810 tasks      | elapsed:    2.6s
[Parallel(n_jobs=-1)]: Done 853 tasks      | elapsed:    2.7s


building tree 916 of 1000
building tree 917 of 1000
building tree 918 of 1000building tree 919 of 1000
building tree 920 of 1000building tree 921 of 1000building tree 922 of 1000

building tree 923 of 1000

building tree 924 of 1000
building tree 925 of 1000
building tree 926 of 1000

building tree 927 of 1000
building tree 928 of 1000building tree 929 of 1000
building tree 930 of 1000building tree 931 of 1000
building tree 932 of 1000
building tree 933 of 1000building tree 934 of 1000building tree 935 of 1000building tree 936 of 1000
building tree 937 of 1000building tree 938 of 1000
building tree 939 of 1000





building tree 940 of 1000building tree 941 of 1000
building tree 942 of 1000building tree 943 of 1000building tree 944 of 1000



building tree 945 of 1000building tree 946 of 1000building tree 947 of 1000

building tree 948 of 1000
building tree 949 of 1000


building tree 950 of 1000
building tree 951 of 1000
building tree 952 of 1000
building tree 953 of 1000
building tre

[Parallel(n_jobs=-1)]: Done 896 tasks      | elapsed:    2.8s


building tree 988 of 1000
building tree 989 of 1000
building tree 990 of 1000building tree 991 of 1000building tree 992 of 1000


building tree 993 of 1000building tree 994 of 1000building tree 995 of 1000building tree 996 of 1000building tree 997 of 1000
building tree 998 of 1000
building tree 999 of 1000

building tree 1000 of 1000





[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:    3.1s finished


In [11]:
combine_and_save(X=result_X, y=y, dataset_name="MNIST", algorithm_name="RF")

In [12]:
rf_fashion = RandomForestClassifier(n_estimators=1000, n_jobs=-1, verbose=10, warm_start=True)
X = fashion_mnist['train_X'].iloc[:limit, :]
y = fashion_mnist['train_y'].iloc[:limit]
rf_fashion.fit(X, y)
rf_fashion_df = pd.DataFrame(columns=["Feature_Importance"], data=rf_fashion.feature_importances_)
rf_fashion_df.sort_values(by=["Feature_Importance"], ascending=False, inplace=True)
rf_fashion_feature_index = rf_fashion_df.copy().index
fashion_idx = rf_fashion_feature_index.to_numpy()
fashion_idx = fashion_idx[:250]
fashion_df = X.iloc[:limit, fashion_idx]
result_X = pd.DataFrame(fashion_df)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 36 concurrent workers.


building tree 1 of 1000
building tree 2 of 1000building tree 3 of 1000
building tree 4 of 1000
building tree 5 of 1000
building tree 6 of 1000
building tree 7 of 1000

building tree 8 of 1000
building tree 9 of 1000building tree 10 of 1000
building tree 11 of 1000

building tree 12 of 1000building tree 13 of 1000
building tree 14 of 1000
building tree 15 of 1000

building tree 16 of 1000
building tree 17 of 1000building tree 18 of 1000building tree 19 of 1000


building tree 20 of 1000
building tree 21 of 1000
building tree 22 of 1000
building tree 23 of 1000building tree 24 of 1000
building tree 25 of 1000
building tree 26 of 1000
building tree 27 of 1000
building tree 28 of 1000

building tree 29 of 1000building tree 30 of 1000

building tree 31 of 1000building tree 32 of 1000
building tree 33 of 1000building tree 34 of 1000building tree 35 of 1000


building tree 36 of 1000

building tree 37 of 1000
building tree 38 of 1000
building tree 39 of 1000
building tree 40 of 1000
building 

[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done  41 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.5s


building tree 80 of 1000

building tree 81 of 1000building tree 82 of 1000

building tree 83 of 1000building tree 84 of 1000

building tree 85 of 1000
building tree 86 of 1000building tree 87 of 1000building tree 88 of 1000


building tree 89 of 1000
building tree 90 of 1000
building tree 91 of 1000building tree 92 of 1000building tree 93 of 1000

building tree 94 of 1000
building tree 95 of 1000
building tree 96 of 1000
building tree 97 of 1000
building tree 98 of 1000building tree 99 of 1000
building tree 100 of 1000building tree 101 of 1000building tree 102 of 1000




building tree 103 of 1000building tree 104 of 1000

building tree 105 of 1000
building tree 106 of 1000building tree 107 of 1000building tree 108 of 1000


building tree 109 of 1000
building tree 110 of 1000
building tree 111 of 1000
building tree 112 of 1000
building tree 113 of 1000building tree 114 of 1000
building tree 115 of 1000building tree 116 of 1000
building tree 117 of 1000building tree 118 of 1000building 

[Parallel(n_jobs=-1)]: Done  73 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done  90 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done 109 tasks      | elapsed:    0.8s




building tree 136 of 1000building tree 137 of 1000
building tree 138 of 1000

building tree 139 of 1000

building tree 140 of 1000
building tree 141 of 1000building tree 142 of 1000
building tree 143 of 1000

building tree 144 of 1000
building tree 145 of 1000
building tree 146 of 1000
building tree 147 of 1000
building tree 148 of 1000
building tree 149 of 1000
building tree 150 of 1000building tree 151 of 1000

building tree 152 of 1000
building tree 153 of 1000building tree 154 of 1000

building tree 155 of 1000building tree 156 of 1000

building tree 157 of 1000
building tree 158 of 1000building tree 159 of 1000
building tree 160 of 1000

building tree 161 of 1000building tree 162 of 1000building tree 163 of 1000

building tree 164 of 1000building tree 165 of 1000
building tree 166 of 1000
building tree 167 of 1000
building tree 168 of 1000
building tree 169 of 1000

building tree 170 of 1000building tree 171 of 1000
building tree 172 of 1000
building tree 173 of 1000

building t

[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:    0.8s
[Parallel(n_jobs=-1)]: Done 149 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-1)]: Done 170 tasks      | elapsed:    1.0s


building tree 181 of 1000
building tree 182 of 1000
building tree 183 of 1000
building tree 184 of 1000
building tree 185 of 1000
building tree 186 of 1000
building tree 187 of 1000
building tree 188 of 1000
building tree 189 of 1000
building tree 190 of 1000building tree 191 of 1000building tree 192 of 1000

building tree 193 of 1000

building tree 194 of 1000building tree 195 of 1000

building tree 196 of 1000
building tree 197 of 1000building tree 198 of 1000
building tree 199 of 1000

building tree 200 of 1000building tree 201 of 1000
building tree 202 of 1000building tree 203 of 1000building tree 204 of 1000
building tree 205 of 1000
building tree 206 of 1000
building tree 207 of 1000



building tree 208 of 1000building tree 209 of 1000building tree 210 of 1000

building tree 211 of 1000building tree 212 of 1000
building tree 213 of 1000

building tree 214 of 1000building tree 215 of 1000


building tree 216 of 1000
building tree 217 of 1000
building tree 218 of 1000


[Parallel(n_jobs=-1)]: Done 193 tasks      | elapsed:    1.2s
[Parallel(n_jobs=-1)]: Done 216 tasks      | elapsed:    1.2s


building tree 219 of 1000
building tree 220 of 1000
building tree 221 of 1000
building tree 222 of 1000
building tree 223 of 1000building tree 224 of 1000

building tree 225 of 1000
building tree 226 of 1000
building tree 227 of 1000building tree 228 of 1000

building tree 229 of 1000
building tree 230 of 1000building tree 231 of 1000
building tree 232 of 1000

building tree 233 of 1000building tree 234 of 1000
building tree 235 of 1000

building tree 236 of 1000building tree 237 of 1000
building tree 238 of 1000
building tree 239 of 1000
building tree 240 of 1000
building tree 241 of 1000
building tree 242 of 1000
building tree 243 of 1000building tree 244 of 1000
building tree 245 of 1000building tree 246 of 1000
building tree 247 of 1000

building tree 248 of 1000
building tree 249 of 1000building tree 250 of 1000



building tree 251 of 1000
building tree 252 of 1000
building tree 253 of 1000
building tree 254 of 1000
building tree 255 of 1000
building tree 256 of 1000
building tre

[Parallel(n_jobs=-1)]: Done 241 tasks      | elapsed:    1.3s


building tree 265 of 1000
building tree 266 of 1000

building tree 267 of 1000building tree 268 of 1000building tree 269 of 1000


building tree 270 of 1000building tree 271 of 1000building tree 272 of 1000building tree 273 of 1000building tree 274 of 1000
building tree 275 of 1000
building tree 276 of 1000building tree 277 of 1000building tree 278 of 1000
building tree 279 of 1000






building tree 280 of 1000
building tree 281 of 1000building tree 282 of 1000building tree 283 of 1000


building tree 284 of 1000building tree 285 of 1000
building tree 286 of 1000

building tree 287 of 1000
building tree 288 of 1000
building tree 289 of 1000
building tree 290 of 1000
building tree 291 of 1000
building tree 292 of 1000
building tree 293 of 1000
building tree 294 of 1000building tree 295 of 1000

building tree 296 of 1000
building tree 297 of 1000building tree 298 of 1000

building tree 299 of 1000building tree 300 of 1000

building tree 301 of 1000building tree 302 of 1000

building tr

[Parallel(n_jobs=-1)]: Done 266 tasks      | elapsed:    1.5s
[Parallel(n_jobs=-1)]: Done 293 tasks      | elapsed:    1.7s



building tree 323 of 1000building tree 324 of 1000

building tree 325 of 1000
building tree 326 of 1000
building tree 327 of 1000
building tree 328 of 1000
building tree 329 of 1000
building tree 330 of 1000
building tree 331 of 1000
building tree 332 of 1000
building tree 333 of 1000
building tree 334 of 1000
building tree 335 of 1000building tree 336 of 1000

building tree 337 of 1000building tree 338 of 1000

building tree 339 of 1000
building tree 340 of 1000
building tree 341 of 1000
building tree 342 of 1000building tree 343 of 1000

building tree 344 of 1000
building tree 345 of 1000
building tree 346 of 1000
building tree 347 of 1000building tree 348 of 1000building tree 349 of 1000


building tree 350 of 1000
building tree 351 of 1000building tree 352 of 1000

building tree 353 of 1000
building tree 354 of 1000building tree 355 of 1000
building tree 356 of 1000

building tree 357 of 1000
building tree 358 of 1000
building tree 359 of 1000building tree 360 of 1000

building tr

[Parallel(n_jobs=-1)]: Done 320 tasks      | elapsed:    1.7s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:    1.9s



building tree 398 of 1000
building tree 399 of 1000
building tree 400 of 1000
building tree 401 of 1000
building tree 402 of 1000
building tree 403 of 1000
building tree 404 of 1000
building tree 405 of 1000
building tree 406 of 1000
building tree 407 of 1000building tree 408 of 1000

building tree 409 of 1000
building tree 410 of 1000
building tree 411 of 1000
building tree 412 of 1000
building tree 413 of 1000
building tree 414 of 1000
building tree 415 of 1000
building tree 416 of 1000
building tree 417 of 1000building tree 418 of 1000
building tree 419 of 1000building tree 420 of 1000

building tree 421 of 1000building tree 422 of 1000
building tree 423 of 1000building tree 424 of 1000


building tree 425 of 1000building tree 426 of 1000building tree 427 of 1000

building tree 428 of 1000

building tree 429 of 1000building tree 430 of 1000

building tree 431 of 1000
building tree 432 of 1000

building tree 433 of 1000
building tree 434 of 1000
building tree 435 of 1000
building tr

[Parallel(n_jobs=-1)]: Done 378 tasks      | elapsed:    2.0s
[Parallel(n_jobs=-1)]: Done 409 tasks      | elapsed:    2.2s



building tree 440 of 1000
building tree 441 of 1000
building tree 442 of 1000
building tree 443 of 1000
building tree 444 of 1000
building tree 445 of 1000
building tree 446 of 1000
building tree 447 of 1000building tree 448 of 1000
building tree 449 of 1000

building tree 450 of 1000building tree 451 of 1000
building tree 452 of 1000building tree 453 of 1000


building tree 454 of 1000building tree 455 of 1000building tree 456 of 1000building tree 457 of 1000building tree 458 of 1000

building tree 459 of 1000

building tree 460 of 1000
building tree 461 of 1000building tree 462 of 1000
building tree 463 of 1000
building tree 464 of 1000

building tree 465 of 1000


building tree 466 of 1000building tree 467 of 1000
building tree 468 of 1000

building tree 469 of 1000building tree 470 of 1000
building tree 471 of 1000building tree 472 of 1000


building tree 473 of 1000
building tree 474 of 1000
building tree 475 of 1000
building tree 476 of 1000
building tree 477 of 1000
building tr

[Parallel(n_jobs=-1)]: Done 440 tasks      | elapsed:    2.3s
[Parallel(n_jobs=-1)]: Done 473 tasks      | elapsed:    2.4s


building tree 515 of 1000
building tree 516 of 1000
building tree 517 of 1000
building tree 518 of 1000
building tree 519 of 1000
building tree 520 of 1000
building tree 521 of 1000building tree 522 of 1000

building tree 523 of 1000building tree 524 of 1000
building tree 525 of 1000
building tree 526 of 1000building tree 527 of 1000
building tree 528 of 1000


building tree 529 of 1000building tree 530 of 1000

building tree 531 of 1000
building tree 532 of 1000building tree 533 of 1000

building tree 534 of 1000
building tree 535 of 1000building tree 536 of 1000
building tree 537 of 1000building tree 538 of 1000


building tree 539 of 1000building tree 540 of 1000

building tree 541 of 1000building tree 542 of 1000

building tree 543 of 1000building tree 544 of 1000

building tree 545 of 1000
building tree 546 of 1000
building tree 547 of 1000
building tree 548 of 1000
building tree 549 of 1000
building tree 550 of 1000
building tree 551 of 1000
building tree 552 of 1000
building tre

[Parallel(n_jobs=-1)]: Done 506 tasks      | elapsed:    2.6s
[Parallel(n_jobs=-1)]: Done 541 tasks      | elapsed:    2.7s



building tree 555 of 1000
building tree 556 of 1000
building tree 557 of 1000
building tree 558 of 1000
building tree 559 of 1000
building tree 560 of 1000
building tree 561 of 1000
building tree 562 of 1000
building tree 563 of 1000
building tree 564 of 1000building tree 565 of 1000
building tree 566 of 1000building tree 567 of 1000building tree 568 of 1000
building tree 569 of 1000building tree 570 of 1000building tree 571 of 1000





building tree 572 of 1000building tree 573 of 1000building tree 574 of 1000building tree 575 of 1000
building tree 576 of 1000
building tree 577 of 1000


building tree 578 of 1000
building tree 579 of 1000
building tree 580 of 1000
building tree 581 of 1000

building tree 582 of 1000
building tree 583 of 1000
building tree 584 of 1000
building tree 585 of 1000
building tree 586 of 1000
building tree 587 of 1000
building tree 588 of 1000
building tree 589 of 1000
building tree 590 of 1000
building tree 591 of 1000
building tree 592 of 1000
building tr

[Parallel(n_jobs=-1)]: Done 576 tasks      | elapsed:    2.9s
[Parallel(n_jobs=-1)]: Done 613 tasks      | elapsed:    3.1s


building tree 655 of 1000
building tree 656 of 1000building tree 657 of 1000

building tree 658 of 1000
building tree 659 of 1000
building tree 660 of 1000
building tree 661 of 1000
building tree 662 of 1000
building tree 663 of 1000
building tree 664 of 1000
building tree 665 of 1000
building tree 666 of 1000
building tree 667 of 1000
building tree 668 of 1000
building tree 669 of 1000
building tree 670 of 1000
building tree 671 of 1000building tree 672 of 1000building tree 673 of 1000


building tree 674 of 1000building tree 675 of 1000

building tree 676 of 1000
building tree 677 of 1000building tree 678 of 1000

building tree 679 of 1000
building tree 680 of 1000building tree 681 of 1000

building tree 682 of 1000building tree 683 of 1000building tree 684 of 1000


building tree 685 of 1000
building tree 686 of 1000
building tree 687 of 1000building tree 688 of 1000building tree 689 of 1000


building tree 690 of 1000
building tree 691 of 1000
building tree 692 of 1000
building tre

[Parallel(n_jobs=-1)]: Done 650 tasks      | elapsed:    3.2s
[Parallel(n_jobs=-1)]: Done 689 tasks      | elapsed:    3.4s


building tree 694 of 1000
building tree 695 of 1000
building tree 696 of 1000
building tree 697 of 1000
building tree 698 of 1000
building tree 699 of 1000
building tree 700 of 1000
building tree 701 of 1000building tree 702 of 1000

building tree 703 of 1000
building tree 704 of 1000
building tree 705 of 1000
building tree 706 of 1000
building tree 707 of 1000
building tree 708 of 1000building tree 709 of 1000

building tree 710 of 1000building tree 711 of 1000building tree 712 of 1000
building tree 713 of 1000
building tree 714 of 1000


building tree 715 of 1000
building tree 716 of 1000building tree 717 of 1000building tree 718 of 1000building tree 719 of 1000building tree 720 of 1000building tree 721 of 1000building tree 722 of 1000






building tree 723 of 1000building tree 724 of 1000building tree 725 of 1000building tree 726 of 1000
building tree 727 of 1000

building tree 728 of 1000


building tree 729 of 1000building tree 730 of 1000building tree 731 of 1000

building tree

[Parallel(n_jobs=-1)]: Done 728 tasks      | elapsed:    3.6s
[Parallel(n_jobs=-1)]: Done 769 tasks      | elapsed:    3.8s


building tree 772 of 1000
building tree 773 of 1000
building tree 774 of 1000
building tree 775 of 1000
building tree 776 of 1000
building tree 777 of 1000
building tree 778 of 1000
building tree 779 of 1000
building tree 780 of 1000
building tree 781 of 1000
building tree 782 of 1000
building tree 783 of 1000building tree 784 of 1000

building tree 785 of 1000building tree 786 of 1000building tree 787 of 1000


building tree 788 of 1000building tree 789 of 1000
building tree 790 of 1000building tree 791 of 1000


building tree 792 of 1000
building tree 793 of 1000building tree 794 of 1000

building tree 795 of 1000building tree 796 of 1000building tree 797 of 1000building tree 798 of 1000

building tree 799 of 1000

building tree 800 of 1000building tree 801 of 1000
building tree 802 of 1000
building tree 803 of 1000building tree 804 of 1000

building tree 805 of 1000

building tree 806 of 1000

building tree 807 of 1000
building tree 808 of 1000
building tree 809 of 1000
building tre

[Parallel(n_jobs=-1)]: Done 810 tasks      | elapsed:    4.0s


building tree 848 of 1000
building tree 849 of 1000
building tree 850 of 1000building tree 851 of 1000

building tree 852 of 1000
building tree 853 of 1000
building tree 854 of 1000
building tree 855 of 1000building tree 856 of 1000

building tree 857 of 1000
building tree 858 of 1000
building tree 859 of 1000
building tree 860 of 1000building tree 861 of 1000

building tree 862 of 1000
building tree 863 of 1000building tree 864 of 1000
building tree 865 of 1000
building tree 866 of 1000
building tree 867 of 1000
building tree 868 of 1000

building tree 869 of 1000building tree 870 of 1000

building tree 871 of 1000
building tree 872 of 1000
building tree 873 of 1000
building tree 874 of 1000building tree 875 of 1000

building tree 876 of 1000
building tree 877 of 1000
building tree 878 of 1000building tree 879 of 1000building tree 880 of 1000


building tree 881 of 1000
building tree 882 of 1000
building tree 883 of 1000
building tree 884 of 1000
building tree 885 of 1000
building tre

[Parallel(n_jobs=-1)]: Done 853 tasks      | elapsed:    4.3s


building tree 927 of 1000
building tree 928 of 1000
building tree 929 of 1000
building tree 930 of 1000building tree 931 of 1000

building tree 932 of 1000
building tree 933 of 1000building tree 934 of 1000
building tree 935 of 1000
building tree 936 of 1000

building tree 937 of 1000building tree 938 of 1000building tree 939 of 1000building tree 940 of 1000



building tree 941 of 1000building tree 942 of 1000

building tree 943 of 1000
building tree 944 of 1000building tree 945 of 1000
building tree 946 of 1000building tree 947 of 1000

building tree 948 of 1000
building tree 949 of 1000
building tree 950 of 1000building tree 951 of 1000


building tree 952 of 1000building tree 953 of 1000

building tree 954 of 1000
building tree 955 of 1000
building tree 956 of 1000
building tree 957 of 1000
building tree 958 of 1000
building tree 959 of 1000
building tree 960 of 1000
building tree 961 of 1000
building tree 962 of 1000
building tree 963 of 1000
building tree 964 of 1000
building tre

[Parallel(n_jobs=-1)]: Done 896 tasks      | elapsed:    4.5s



building tree 966 of 1000
building tree 967 of 1000
building tree 968 of 1000
building tree 969 of 1000building tree 970 of 1000

building tree 971 of 1000building tree 972 of 1000

building tree 973 of 1000building tree 974 of 1000
building tree 975 of 1000building tree 976 of 1000
building tree 977 of 1000
building tree 978 of 1000building tree 979 of 1000



building tree 980 of 1000building tree 981 of 1000building tree 982 of 1000
building tree 983 of 1000

building tree 984 of 1000building tree 985 of 1000
building tree 986 of 1000building tree 987 of 1000

building tree 988 of 1000

building tree 989 of 1000
building tree 990 of 1000
building tree 991 of 1000

building tree 992 of 1000
building tree 993 of 1000
building tree 994 of 1000
building tree 995 of 1000
building tree 996 of 1000
building tree 997 of 1000
building tree 998 of 1000
building tree 999 of 1000
building tree 1000 of 1000


[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:    4.9s finished


In [13]:
combine_and_save(X=result_X, y=y, dataset_name="Fashion", algorithm_name="RF")

# Load Reduced Datasets

In [None]:
with open(f"{os.getcwd()}/DimensionalityReduction/PCA_MNIST_Reduced_Dataset.pkl", "rb") as input_file:
    pca_mnist_reduced_data = pickle.load(input_file)
    input_file.close()
with open(f"{os.getcwd()}/DimensionalityReduction/PCA_Fashion_Reduced_Dataset.pkl", "rb") as input_file:
    pca_fashion_reduced_data = pickle.load(input_file)
    input_file.close()
print(f"PCA MNIST: {pca_mnist_reduced_data.shape}")
print(f"PCA Fashion: {pca_fashion_reduced_data.shape}")

In [None]:
with open(f"{os.getcwd()}/DimensionalityReduction/ICA_MNIST_Reduced_Dataset.pkl", "rb") as input_file:
    ica_mnist_reduced_data = pickle.load(input_file)
    input_file.close()
with open(f"{os.getcwd()}/DimensionalityReduction/ICA_Fashion_Reduced_Dataset.pkl", "rb") as input_file:
    ica_fashion_reduced_data = pickle.load(input_file)
    input_file.close()
print(f"ICA MNIST: {ica_mnist_reduced_data.shape}")
print(f"ICA Fashion: {ica_fashion_reduced_data.shape}")

In [None]:
with open(f"{os.getcwd()}/DimensionalityReduction/RF_MNIST_Reduced_Dataset.pkl", "rb") as input_file:
    rf_mnist_reduced_data = pickle.load(input_file)
    input_file.close()
with open(f"{os.getcwd()}/DimensionalityReduction/RF_Fashion_Reduced_Dataset.pkl", "rb") as input_file:
    rf_fashion_reduced_data = pickle.load(input_file)
    input_file.close()
print(f"RF MNIST: {rf_mnist_reduced_data.shape}")
print(f"RF Fashion: {rf_fashion_reduced_data.shape}")

In [None]:
with open(f"{os.getcwd()}/DimensionalityReduction/RP_MNIST_Reduced_Dataset.pkl", "rb") as input_file:
    rp_mnist_reduced_data = pickle.load(input_file)
    input_file.close()
with open(f"{os.getcwd()}/DimensionalityReduction/RP_Fashion_Reduced_Dataset.pkl", "rb") as input_file:
    rp_fashion_reduced_data = pickle.load(input_file)
    input_file.close()
print(f"RP MNIST: {rp_mnist_reduced_data.shape}")
print(f"RP Fashion: {rp_fashion_reduced_data.shape}")

# K-Means Clustering

In [None]:
temp_folder = "Clustering/" + "Reduced/"
utl.check_folder(temp_folder)
save_dir = os.getcwd() + temp_folder
limit = 5000
idx = [i for i in range(2, 31, 1)]
cols = ["Inertia", "Silhouette", "Homogeneity", "Completeness", "Harmonic_Mean", "Calinski_Harabasz", "Davies_Bouldin"]

In [None]:
mnist_results = pd.DataFrame(columns=cols, index=idx,
                               data=np.zeros(shape=(len(idx), len(cols))))

fashion_results = pd.DataFrame(columns=cols, index=idx,
                                          data=np.zeros(shape=(len(idx), len(cols))))
algorithm_names = ["PCA", "RP", "RF", "ICA"]
print("Starting K-Means Clustering")
for _df in ["MNIST", "Fashion-MNIST"]:
    for alg in algorithm_names:
        for k in idx:
            if _df == "MNIST":
                with open(f"{os.getcwd()}/DimensionalityReduction/{alg}_{_df}_Reduced_Dataset.pkl", "rb") as input_file:
                    temp_reduced = pickle.load(input_file)
                    input_file.close()
                data = utl.extract_data_and_labels(temp_reduced)
                temp_train_X = data["train_X"]
                temp_train_y = data["train_y"]
                k_means = KMeans(n_clusters=k, verbose=VERBOSE).fit(temp_train_X)
                inertia = k_means.inertia_
                silhouette_average = silhouette_score(temp_train_X, k_means.labels_, sample_size=limit)
                homogeneity, completeness, v_measure = homogeneity_completeness_v_measure(temp_train_y, k_means.labels_)
                mnist_results.loc[k, "Inertia"] = inertia
                mnist_results.loc[k, "Silhouette"] = silhouette_average
                mnist_results.loc[k, "Calinski_Harabasz"] = calinski_harabasz_score(temp_train_X, k_means.labels_)
                mnist_results.loc[k, "Davies_Bouldin"] = davies_bouldin_score(temp_train_X, k_means.labels_)
                mnist_results.loc[k, "Homogeneity"] = homogeneity
                mnist_results.loc[k, "Completeness"] = completeness
                mnist_results.loc[k, "Harmonic_Mean"] = v_measure
                print(f"\n\t{_df} - k={k} \n{mnist_results.loc[k]}")

            elif _df == "Fashion-MNIST":
                with open(f"{os.getcwd()}/DimensionalityReduction/{alg}_Fashion_Reduced_Dataset.pkl", "rb") as input_file:
                    temp_reduced = pickle.load(input_file)
                    input_file.close()
                data = utl.extract_data_and_labels(temp_reduced)
                temp_train_X = data["train_X"]
                temp_train_y = data["train_y"]
                k_means = KMeans(n_clusters=k, verbose=VERBOSE).fit(temp_train_X)
                inertia = k_means.inertia_
                silhouette_average = silhouette_score(temp_train_X, k_means.labels_, sample_size=limit)
                homogeneity, completeness, v_measure = homogeneity_completeness_v_measure(temp_train_y, k_means.labels_)
                fashion_results.loc[k, "Inertia"] = inertia
                fashion_results.loc[k, "Silhouette"] = silhouette_average
                fashion_results.loc[k, "Calinski_Harabasz"] = calinski_harabasz_score(temp_train_X, k_means.labels_)
                fashion_results.loc[k, "Davies_Bouldin"] = davies_bouldin_score(temp_train_X, k_means.labels_)
                fashion_results.loc[k, "Homogeneity"] = homogeneity
                fashion_results.loc[k, "Completeness"] = completeness
                fashion_results.loc[k, "Harmonic_Mean"] = v_measure
                print(f"\n\t{_df} - k={k} \n{fashion_results.loc[k]}")


In [None]:
with open(f"{os.getcwd()}/Clustering/Clustered_Reduced_MNIST.pkl", "wb") as output_file:
    pickle.dump(mnist_results, output_file)
    output_file.close()
with open(f"{os.getcwd()}/Clustering/Clustered_Reduced_Fashion.pkl", "wb") as output_file:
    pickle.dump(fashion_results, output_file)
    output_file.close()

In [None]:
with open(f"{os.getcwd()}/{temp_folder}/MNIST_Results.pkl", "rb") as in_file:
    mnist_results = pickle.load(in_file)
    in_file.close()

In [None]:
with open(f"{os.getcwd()}/{temp_folder}/Fashion_Results.pkl", "rb") as in_file:
    fashion_results = pickle.load(in_file)
    in_file.close()

In [None]:
with open(f"{os.getcwd()}/{temp_folder}/MNIST_Results.pkl", "wb") as out_file:
    pickle.dump(mnist_results, out_file)
    out_file.close()

In [None]:
with open(f"{os.getcwd()}/{temp_folder}/Fashion_Results.pkl", "wb") as out_file:
    pickle.dump(fashion_results, out_file)
    out_file.close()

## Elbow Method

In [None]:
limit=5000

In [None]:
plt.close("all")
end = 31
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))
# ax1_secondary = ax1.twinx()
# ax2_secondary = ax2.twinx()

mnist_model = KMeans()
mnist_visualizer = KElbowVisualizer(mnist_model, k=(2, end), ax=ax1, timings=False)
mnist_visualizer.fit(mnist["train_X"].iloc[:limit, :])

fashion_model = KMeans()
fashion_visualizer = KElbowVisualizer(fashion_model, k=(2, end), ax=ax2, timings=False)
fashion_visualizer.fit(fashion_mnist["train_X"].iloc[:limit, :])

# mnist_results[["Silhouette", "Homogeneity", "Completeness", "Harmonic_Mean"]].iloc[:end].plot(ax=ax1_secondary, linestyle="--")
ax1.set_title(f"K Means Clustering\nDistortion MNIST", fontsize=15, weight='bold')
# ax1.grid(which='major', linestyle='-', linewidth='0.5', color='white')
ax1.set_xlabel("K Clusters", fontsize=15, weight='heavy')
ax1.set_ylabel("Distortion", fontsize=15, weight='heavy')
ax1.legend(loc="best", markerscale=1.1, frameon=True,
                   edgecolor="black", fancybox=True, shadow=True)

# fashion_results[["Silhouette", "Homogeneity", "Completeness", "Harmonic_Mean"]].iloc[:end].plot(ax=ax2_secondary, linestyle="--")
ax2.set_title(f"K Means Clustering\nDistortion Fashion MNIST", fontsize=15, weight='bold')
# ax2.grid(which='major', linestyle='-', linewidth='0.5', color='white')
ax2.set_xlabel("K Clusters", fontsize=15, weight='heavy')
ax2.set_ylabel("Distortion", fontsize=15, weight='heavy')
ax2.legend(loc="best", markerscale=1.1, frameon=True,
                   edgecolor="black", fancybox=True, shadow=True)

plt.savefig(f"{os.getcwd()}/{temp_folder}KMEans_Elbow_Method_Combined.png", bbox_inches='tight')

## Silhouette Method

In [None]:
plt.close("all")
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))
mnist_cluster_count = 10
fashion_cluster_count = 9

mnist_model = KMeans(n_clusters=mnist_cluster_count, random_state=42)
fashion_model = KMeans(n_clusters=fashion_cluster_count, random_state=42)

mnist_vis = SilhouetteVisualizer(mnist_model, ax=ax1, 
                                 colors='yellowbrick').fit(mnist["train_X"].iloc[:limit, :]).finalize()
fashion_mnist_vis = SilhouetteVisualizer(fashion_model, ax=ax2, 
                                         colors='yellowbrick').fit(fashion_mnist["train_X"].iloc[:limit, :]).finalize()

ax1.set_title(f"Silhouette Plot of KMEans Clustering\non MNIST with {mnist_cluster_count} Clusters", fontsize=15, weight='bold')
ax1.legend(loc="best", markerscale=1.1, frameon=True,
                   edgecolor="black", fancybox=True, shadow=True)
ax1.set_xlabel("Silhouette Coefficient Values", fontsize=15, weight='heavy')
ax1.set_ylabel("Cluster Label", fontsize=15, weight='heavy')

ax2.set_title(f"Silhouette Plot of KMEans Clustering\non Fashion MNIST with {fashion_cluster_count} Clusters", fontsize=15, weight='bold')
ax2.legend(loc="best", markerscale=1.1, frameon=True,
                   edgecolor="black", fancybox=True, shadow=True)
ax2.set_xlabel("Silhouette Coefficient Values", fontsize=15, weight='heavy')
ax2.set_ylabel("Cluster Label", fontsize=15, weight='heavy')

plt.tight_layout()

plt.savefig(f"{os.getcwd()}/{temp_folder}KMEans_Silhouette_Combined.png", bbox_inches='tight')

# Expectation Maximization 

In [None]:
index = np.arange(1, 10, 1).astype(np.int)
types = ["Full"]
columns = ["AIC_Full", "BIC_Full"]
em_mnist_results = pd.DataFrame(columns=columns, index=index, data=np.zeros(shape=(index.shape[0], len(columns))))
em_fashion_results = pd.DataFrame(columns=columns, index=index, data=np.zeros(shape=(index.shape[0], len(columns))))

In [None]:
X = mnist["train_X"].iloc[:limit, :]
for idx in index:
    print(f"N_Components: {idx}")
    for _type in types:
        temp_gmm = GaussianMixture(n_components=idx, n_init=10, covariance_type=_type.lower(), warm_start=True, max_iter=500).fit(X)
        em_mnist_results.loc[idx, f"AIC_{_type}"] = temp_gmm.aic(X)
        em_mnist_results.loc[idx, f"BIC_{_type}"] = temp_gmm.bic(X)

In [None]:
with open(f"{os.getcwd()}/{temp_folder}/MNIST_EM_Results.pkl", "wb") as out_file:
    pickle.dump(em_mnist_results, out_file)
    out_file.close()

In [None]:
X = fashion_mnist["train_X"].iloc[:limit, :]
for idx in index:
    print(f"N_Components: {idx}")
    for _type in types:
        temp_gmm = GaussianMixture(n_components=idx, n_init=10, covariance_type=_type.lower(), warm_start=True, max_iter=500).fit(X)
        em_fashion_results.loc[idx, f"AIC_{_type}"] = temp_gmm.aic(X)
        em_fashion_results.loc[idx, f"BIC_{_type}"] = temp_gmm.bic(X)


In [None]:
with open(f"{os.getcwd()}/{temp_folder}/Fashion_EM_Results.pkl", "wb") as out_file:
    pickle.dump(em_fashion_results, out_file)
    out_file.close()

In [None]:
with open(f"{os.getcwd()}/{temp_folder}/MNIST_EM_Results.pkl", "rb") as in_file:
    em_mnist_results = pickle.load(in_file)
    in_file.close()

In [None]:
with open(f"{os.getcwd()}/{temp_folder}/Fashion_EM_Results.pkl", "rb") as in_file:
    em_fashion_results = pickle.load(in_file)
    in_file.close()

In [None]:
plt.close("all")
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))

em_mnist_results[["AIC_Full", "BIC_Full"]].plot(ax=ax1)
ax1.set_title(f"AIC / BIC Comparison\n MNIST", fontsize=15, weight='bold')
ax1.legend(loc="best", markerscale=1.1, frameon=True,
                   edgecolor="black", fancybox=True, shadow=True)
ax1.set_xlabel("N Components", fontsize=15, weight='heavy')
ax1.set_ylabel("Information Criterion", fontsize=15, weight='heavy')

em_fashion_results[["AIC_Full", "BIC_Full"]].plot(ax=ax2)
ax2.set_title(f"AIC / BIC Comparison\n Fashion-MNIST", fontsize=15, weight='bold')
ax2.legend(loc="best", markerscale=1.1, frameon=True,
                   edgecolor="black", fancybox=True, shadow=True)
ax2.set_xlabel("N Components", fontsize=15, weight='heavy')
ax2.set_ylabel("Information Criterion", fontsize=15, weight='heavy')

plt.tight_layout()

plt.savefig(f"{os.getcwd()}/{temp_folder}EM_AicBic_Combined.png", bbox_inches='tight')

In [None]:
em_mnist_results[["AIC_Full", "BIC_Full"]].plot()

In [None]:
X = fashion_mnist["train_X"].iloc[:2000, :]
for idx in index:
    print(f"N_Components: {idx}")
    for _type in types:
        temp_gmm = GaussianMixture(n_components=idx, n_init=10, covariance_type=_type.lower()).fit(X)
        em_fashion_results.loc[idx, f"AIC_{_type}"] = temp_gmm.aic(X)
        em_fashion_results.loc[idx, f"BIC_{_type}"] = temp_gmm.bic(X)


In [None]:
with open(f"{os.getcwd()}/DimensionalityReduction/PrincipleComponentAnalysis/"
          f"MNIST_PCA_Results.pkl", "rb") as input_file:
    pca_mnist_results = pickle.load(input_file)
    input_file.close()

with open(f"{os.getcwd()}/DimensionalityReduction/PrincipleComponentAnalysis/"
          f"Fashion_PCA_Results.pkl", "rb") as input_file:
    pca_fashion_results = pickle.load(input_file)
    input_file.close()
with open(f"{os.getcwd()}/DimensionalityReduction/"
          f"IndependentComponentAnalysis/MNIST_ICA_Results.pkl", "rb") as input_file:
    ica_results_mnist = pickle.load(input_file)
    input_file.close()
with open(f"{os.getcwd()}/DimensionalityReduction/"
          f"IndependentComponentAnalysis/Fashion_MNIST_ICA_Results.pkl", "rb") as input_file:
    ica_results_fashion = pickle.load(input_file)
    input_file.close()
with open(f"{os.getcwd()}/DimensionalityReduction/"
          f"RandomProjections/MNIST_RP_results.pkl", "rb") as input_file:
    rp_results_mnist = pickle.load(input_file)
    input_file.close()

with open(f"{os.getcwd()}/DimensionalityReduction/"
          f"RandomProjections/Fashion_RP_results.pkl", "rb") as input_file:
    rp_results_fashion = pickle.load(input_file)
    input_file.close()

with open(f"{os.getcwd()}/DimensionalityReduction/RF_MNIST_Reduced_Dataset.pkl", "rb") as input_file:
    rf_mnist_reduced_data = pickle.load(input_file)
    input_file.close()
with open(f"{os.getcwd()}/DimensionalityReduction/RF_Fashion_Reduced_Dataset.pkl", "rb") as input_file:
    rf_fashion_reduced_data = pickle.load(input_file)
    input_file.close()
print(f"RF MNIST: {rf_mnist_reduced_data.shape}")
print(f"RF Fashion: {rf_fashion_reduced_data.shape}")
with open(f"{os.getcwd()}/DimensionalityReduction/RP_MNIST_Reduced_Dataset.pkl", "rb") as input_file:
    rp_mnist_reduced_data = pickle.load(input_file)
    input_file.close()
with open(f"{os.getcwd()}/DimensionalityReduction/RP_Fashion_Reduced_Dataset.pkl", "rb") as input_file:
    rp_fashion_reduced_data = pickle.load(input_file)
    input_file.close()
print(f"RP MNIST: {rp_mnist_reduced_data.shape}")
print(f"RP Fashion: {rp_fashion_reduced_data.shape}")
with open(f"{os.getcwd()}/DimensionalityReduction/ICA_MNIST_Reduced_Dataset.pkl", "rb") as input_file:
    ica_mnist_reduced_data = pickle.load(input_file)
    input_file.close()
with open(f"{os.getcwd()}/DimensionalityReduction/ICA_Fashion_Reduced_Dataset.pkl", "rb") as input_file:
    ica_fashion_reduced_data = pickle.load(input_file)
    input_file.close()
print(f"ICA MNIST: {ica_mnist_reduced_data.shape}")
print(f"ICA Fashion: {ica_fashion_reduced_data.shape}")
with open(f"{os.getcwd()}/DimensionalityReduction/PCA_MNIST_Reduced_Dataset.pkl", "rb") as input_file:
    pca_mnist_reduced_data = pickle.load(input_file)
    input_file.close()
with open(f"{os.getcwd()}/DimensionalityReduction/PCA_Fashion_Reduced_Dataset.pkl", "rb") as input_file:
    pca_fashion_reduced_data = pickle.load(input_file)
    input_file.close()
print(f"PCA MNIST: {pca_mnist_reduced_data.shape}")
print(f"PCA Fashion: {pca_fashion_reduced_data.shape}")
with open(f"{os.getcwd()}/DimensionalityReduction/RandomForest/RandomForest_MNIST_Results.pkl", "rb") as input_file:
    rf_results_mnist = pickle.load(input_file)
    input_file.close()
with open(f"{os.getcwd()}/DimensionalityReduction/RandomForest/RandomForest_Fashion_Results.pkl",
          "rb") as input_file:
    rf_results_fashion = pickle.load(input_file)
    input_file.close()
utl.plot_better_results(data_X=rp_mnist_reduced_data.iloc[:lim, :-1], data_y=rp_mnist_reduced_data.iloc[:lim, -1],
                    data_results=rp_results_mnist, dataset_name="MNIST",
                    algorithm_name="Randomized_Projections",
                    vline_idx=715, pixel_importance=None, model=None,
                    original_data_X=mnist["train_X"].iloc[:lim, :],
                    original_data_y=mnist["train_y"].iloc[:lim],
                    results=rp_results_mnist, is_rand_proj=True, is_fashion=False, font_size=14, n_clusters=12)

utl.plot_better_results(data_X=rp_fashion_reduced_data.iloc[:lim, :-1], data_y=rp_fashion_reduced_data.iloc[:lim, -1],
                    data_results=rp_results_fashion, dataset_name="Fashion_MNIST",
                    algorithm_name="Randomized_Projections",
                    vline_idx=746, pixel_importance=None, model=None,
                    original_data_X=fashion_mnist["train_X"].iloc[:lim, :],
                    original_data_y=fashion_mnist["train_y"].iloc[:lim],
                    results=rp_results_fashion, is_rand_proj=True, is_fashion=True, font_size=14, n_clusters=10)

# ICA
temp_ica_mnist_results = FastICA(n_components=284, whiten=True, max_iter=400).fit(mnist["train_X"].iloc[:lim, :])
print("Finished training MNIST ICA")
utl.plot_better_results(data_X=ica_mnist_reduced_data.iloc[:lim, :-1], data_y=ica_mnist_reduced_data.iloc[:lim, -1],
                    data_results=ica_results_mnist, dataset_name="MNIST", algorithm_name="ICA",
                    vline_idx=284, pixel_importance=temp_ica_mnist_results.mean_, model=temp_ica_mnist_results,
                    original_data_X=mnist["train_X"].iloc[:lim, :],
                    original_data_y=mnist["train_y"].iloc[:lim],
                    results=ica_results_mnist, is_ica=True, is_fashion=False, font_size=14, n_clusters=35)

temp_ica_fashion_results = FastICA(n_components=412, whiten=True,
                               max_iter=400).fit(fashion_mnist["train_X"].iloc[:lim, :])
print("Finished training Fashion ICA")
utl.plot_better_results(data_X=ica_fashion_reduced_data.iloc[:lim, :-1],
                    data_y=ica_fashion_reduced_data.iloc[:lim, -1],
                    data_results=ica_results_fashion, dataset_name="Fashion_MNIST", algorithm_name="ICA",
                    vline_idx=412, pixel_importance=temp_ica_fashion_results.mean_,
                    model=temp_ica_fashion_results,
                    original_data_X=fashion_mnist["train_X"].iloc[:lim, :],
                    original_data_y=fashion_mnist["train_y"].iloc[:lim],
                    results=ica_results_fashion, is_ica=True, is_fashion=True, font_size=14, n_clusters=15)

PCA
temp_pca_mnist_results = PCA(whiten=True, svd_solver="full").fit(mnist["train_X"].iloc[:lim, :])
mnist_component_df = pd.DataFrame(temp_pca_mnist_results.components_)
mnist_pixel_importance = mnist_component_df.idxmax(axis=1)
utl.plot_better_results(data_X=pca_mnist_reduced_data.iloc[:lim, :-1], data_y=pca_mnist_reduced_data.iloc[:lim, -1],
                    data_results=pca_mnist_results, dataset_name="MNIST", algorithm_name="PCA",
                    vline_idx=326, pixel_importance=mnist_pixel_importance.values, model=temp_pca_mnist_results,
                    original_data_X=mnist["train_X"].iloc[:lim, :],
                    original_data_y=mnist["train_y"].iloc[:lim],
                    results=pca_mnist_results, is_pca=True, is_fashion=False, font_size=14, n_clusters=13)

temp_pca_fashion_results = PCA(whiten=True, svd_solver="full").fit(fashion_mnist["train_X"].iloc[:lim, :])
fashion_component_df = pd.DataFrame(temp_pca_fashion_results.components_)
fashion_pixel_importance = fashion_component_df.idxmax(axis=1)
utl.plot_better_results(data_X=pca_fashion_reduced_data.iloc[:lim, :-1],
                    data_y=pca_fashion_reduced_data.iloc[:lim, -1],
                    data_results=pca_fashion_results, dataset_name="Fashion_MNIST", algorithm_name="PCA",
                    vline_idx=445, pixel_importance=fashion_pixel_importance.values,
                    model=temp_pca_fashion_results,
                    original_data_X=fashion_mnist["train_X"].iloc[:lim, :],
                    original_data_y=fashion_mnist["train_y"].iloc[:lim],
                    results=pca_fashion_results, is_pca=True, is_fashion=True, font_size=14, n_clusters=11)


RF
utl.plot_better_results(data_X=rf_mnist_reduced_data.iloc[:lim, :-1],
                    data_y=rf_mnist_reduced_data.iloc[:lim, -1],
                    data_results=rf_results_mnist, dataset_name="MNIST", algorithm_name="Random Forest",
                    vline_idx=152, pixel_importance=None,
                    model=None,
                    original_data_X=mnist["train_X"].iloc[:lim, :],
                    original_data_y=mnist["train_y"].iloc[:lim],
                    results=rf_results_mnist, is_rand_forest=True, is_fashion=False, font_size=14,
                    n_clusters=11)

# TESTING

In [None]:
temp_folder = "/Clustering/" + "Reduced/"
utl.check_folder(temp_folder)
save_dir = os.getcwd() + temp_folder

In [None]:
with open(f"{save_dir}/MNIST_PCA_Results.pkl", "rb") as input_file:
    mnist_pca = pickle.load(input_file)
    input_file.close()
with open(f"{save_dir}/MNIST_ICA_Results.pkl", "rb") as input_file:
    mnist_ica = pickle.load(input_file)
    input_file.close()
with open(f"{save_dir}/MNIST_RP_Results.pkl", "rb") as input_file:
    mnist_rp = pickle.load(input_file)
    input_file.close()
with open(f"{save_dir}/MNIST_RF_Results.pkl", "rb") as input_file:
    mnist_rf = pickle.load(input_file)
    input_file.close()

In [None]:
with open(f"{save_dir}/Fashion-MNIST_PCA_Results.pkl", "rb") as input_file:
    fashion_pca = pickle.load(input_file)
    input_file.close()
with open(f"{save_dir}/Fashion-MNIST_ICA_Results.pkl", "rb") as input_file:
    fashion_ica = pickle.load(input_file)
    input_file.close()
with open(f"{save_dir}/Fashion-MNIST_RP_Results.pkl", "rb") as input_file:
    fashion_rp = pickle.load(input_file)
    input_file.close()
with open(f"{save_dir}/Fashion-MNIST_RF_Results.pkl", "rb") as input_file:
    fashion_rf = pickle.load(input_file)
    input_file.close()