<h1>
<center>T-SNE + Spectal Clustering</center>
</h1>

## Generals

<font size="3"> 
Packages import and system configurations. 
</font>

In [None]:
from keras.datasets import mnist,fashion_mnist
import numpy as np
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import pandas as pd
from datetime import datetime as dt
from sklearn.metrics import silhouette_score
import matplotlib.pyplot as plt
import multiprocessing
import seaborn as sns
from sklearn.cluster import SpectralClustering, KMeans
from sklearn.metrics import pairwise_distances
from typing import List, Tuple
import random
from scipy.linalg import eig
cores = multiprocessing.cpu_count()-2

## Data Loading & Preprocessing 

<font size="3">  
A function that gives us information about data shapes and reshapes the data in order to be suitable for our models.
</font>

In [None]:
def data_reshape(x_train,y_train,x_test,y_test,data_name):
    print ('Basic informations:')
    print('X_train: ' + str(x_train.shape))
    print('Y_train: ' + str(y_train.shape))
    print('X_test:  ' + str(x_test.shape))
    print('Y_test:  ' + str(y_test.shape))
    x_train = x_train.reshape(x_train.shape[0], np.prod(x_train.shape[1:])) 
    x_test = x_test.reshape(x_test.shape[0], np.prod(x_test.shape[1:]))  
    # Change integers to 32-bit floating point numbers
    x_train = x_train.astype('float32')   
    x_test = x_test.astype('float32')
    print("\nData shapes after reshaping:")
    print("Training matrix shape", x_train.shape)
    print("Testing matrix shape", x_test.shape)
    return x_train,y_train,x_test,y_test

<font size="3">
A function that provides us with the input data:
<ol>
<li>Load the necessary data according to the give to the given data name.</li>
<li>Create a subset for each data according to the given data sizes (If subset variable = 'True").</li>
<li>Use the above function and returns the reshaped data.</li>
</font>

In [None]:
def data_load(subset,data_name,train_subset_size,test_subset_size):
    if data_name == 'Mnist':
        (x_train, y_train), (x_test, y_test) = mnist.load_data()
    elif data_name == 'FashionMnist':
        (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
    
    if subset:
        x_train,y_train,x_test,y_test = x_train[:train_subset_size],y_train[:train_subset_size],x_test[:test_subset_size],y_test[:test_subset_size]
        x_train,y_train,x_test,y_test = data_reshape(x_train,y_train,x_test,y_test,data_name)
    else:
        x_train,y_train,x_test,y_test = data_reshape(x_train,y_train,x_test,y_test,data_name)
        
    return x_train,y_train,x_test,y_test   

<font size="3">  
A function that applies standardization on the given data and returns it.
</font>

In [None]:
def scalling(x_train,x_test):
    x_train = StandardScaler().fit_transform(x_train)
    x_test = StandardScaler().fit_transform(x_test)
    return x_train,x_test

## T-SNE

<font size="3">  
A function that applies dimensionality reduction using the T-SNE algorithm (n_components=2) on the given data and returns the embedding of the data in low-dimensional space.
</font>

In [None]:
def apply_tsne(data):
    tsne = TSNE(n_components=2,random_state=0,n_jobs=cores)
    result = tsne.fit_transform(data)
    return result

<font size="3">  
A function that applies dimensionality reduction using the PCA algorithm on the given data and returns the transformed values.
</font>

In [None]:
def apply_pca(data,n_comp):
    pca = PCA(n_components=n_comp)
    results = pca.fit_transform(data)
    return results

<font size="3">
A function that prints and saves a plot which consists of 3 Scatterplot:
<ol>
<li>Scatterplot with the x and y calculated by Pca (n_components =2).</li>
<li>Scatterplot with the x and y calculated by TSNE (n_components =2).</li>
<li>Scatterplot with the x and y calculated by Pca (n_components = 50) + TSNE (n_components =2).</li>
    
In all cases, the plotted labels are the real data labels
</font>

In [None]:
def plot_reduction_results(pca_res,tsne_res,pca_tsne_res,y_train,data_name,helper_pca_n_comp):
    fig, axes = plt.subplots(1, 3, figsize=(15, 5), sharey=True)
    fig.suptitle('{}: Dimensionality Reduction Approaches'.format(data_name), fontsize=15)
    font_size = {'fontsize': 12}

    sns.scatterplot(ax=axes[0], x = pca_res[:,0], y = pca_res[:,1], hue = y_train, 
                    palette = sns.hls_palette(10), legend = 'full')
    axes[0].set_title('PCA (n=2)',fontdict = font_size)

    sns.scatterplot(ax=axes[1], x = tsne_res[:,0], y = tsne_res[:,1], hue = y_train, 
                    palette = sns.hls_palette(10), legend = 'full')
    axes[1].set_title('T-SNE (n=2)',fontdict = font_size)

    sns.scatterplot(ax=axes[2], x = pca_tsne_res[:,0], y = pca_tsne_res[:,1], hue = y_train, 
                    palette = sns.hls_palette(10), legend = 'full')
    axes[2].set_title('PCA (n={}) + T-SNE (n=2)'.format(helper_pca_n_comp),fontdict = font_size)
    plt.savefig('Exports/TSNE_' + data_name + '.pdf')

 <font size="3">
A function that applies the following steps:
<ol>
<li>Apply Pca (n_components =2).</li>
<li>Apply TSNE (n_components =2).</li>
<li>Apply Pca (n_components = 50) and then apply TSNE (n_components =2).</li>
<li>Return the result of PCA+TSNE because they are the input for spectral computations.</li>
</font>

In [None]:
def tsne_pipeline(x_train,y_train,helper_pca_n_comp,data_name):
    pca_res = apply_pca(x_train,2)
    tsne_res = apply_tsne(x_train)
    helper_pca = apply_pca(x_train,helper_pca_n_comp)
    pca_tsne_res = apply_tsne(helper_pca)
    plot_reduction_results(pca_res,tsne_res,pca_tsne_res,y_train,data_name,helper_pca_n_comp)
    del(pca_res)
    del(tsne_res)
    return pca_tsne_res

## Computing Eigenvalues and Eigenvectors Using Spectral Methods

 <font size="3">
A function that applies spectral computations with the following steps:
<ol>
<li>It computes the pairwise distances between the rows of the data matrix using the Euclidean distance metric. This computation is done in parallel using the specified number of cores.</li>
<li>It converts the pairwise distances matrix into a matrix of 0s and 1s, where 1s indicate distances less than the distance threshold and 0s indicate distances greater than or equal to the threshold.</li>
<li>It calculates the Laplacian matrix from the 0/1 matrix using the formula L = D - W, where D is the diagonal matrix of row sums of W and W is the 0/1 matrix.</li>
<li>It computes the eigenvalues and eigenvectors of the Laplacian matrix using the eig() function.</li>
<li>It returns the eigenvalues and eigenvectors.</li>
   
    ps: This function have designed for Mnist dataset. We make experiments and we find that using a threshold we achieve better results.  
</font>

In [None]:
def spectral_computation_mnist(data,cores,distance_threshold):
    start = dt.now()
    W = pairwise_distances(data, metric="euclidean", n_jobs=cores)
    vectorizer = np.vectorize(lambda x: 1 if x < distance_threshold else 0)
    W = np.vectorize(vectorizer)(W)
    print(f"\nW with shape {W.shape} :")
    print(W)
    D = np.diag(np.sum(np.array(W), axis=1))
    print(f"\nDegree Matrix with shape {D.shape} :")
    print(D)
    L = D - W
    del(D)
    del(W)
    print(f"\nLaplacian Matrix with shape {L.shape} :")
    print(L)
    eigval, eigvec = eig(L)
    del(L)
    print (f"\nEigen Values have been computed at {(dt.now() - start).seconds} seconds")
    print(f"Eigenvalues Matrix with shape {eigval.shape} :")
    print(f"Eigenvectors Matrix with shape {eigvec.shape} :")
    return eigval,eigvec

<font size="3">
A function that applies spectral computations with the following steps:
<ol>
<li>Compute the pairwise distances between the data points using the Euclidean distance metric and the specified number of cores.</li>
<li>Compute the kernel matrix (K) of the pairwise distances using a Gaussian (rbf) kernel.</li>
<li>Compute the diagonal matrix (D) using the kernel matrix.</li>
<li>Compute the Laplacian matrix (L) using the kernel and diagonal matrices.</li>
<li>Compute the eigenvalues and eigenvectors of the Laplacian matrix.</li>
<li>Return the eigenvalues and eigenvectors.</li>
</font>

In [None]:
def spectral_computation_fashion_mnist(data,cores,sigma):
    start = dt.now()
    W = pairwise_distances(data, metric="euclidean", n_jobs=cores)
    print(W)
    K = np.exp(-W**2 / (2 * sigma**2))
    print(f"\nK with shape {K.shape} :")
    print(K)
    D = np.diag(np.sum(K, axis=1))
    print(f"\nDegree Matrix with shape {D.shape} :")
    print(D)
    L = D - K
    del(D)
    del(K)
    print(f"\nLaplacian Matrix with shape {L.shape} :")
    print(L)
    eigval, eigvec = eig(L)
    del(L)
    print (f"\nEigen Values have been computed at {(dt.now() - start).seconds} seconds")
    print(f"Eigenvalues Matrix with shape {eigval.shape} :")
    print(f"Eigenvectors Matrix with shape {eigvec.shape} :")
    return eigval,eigvec

<font size="3">
A function that preprocess the eigenvectors with the followin steps:
<ol>
<li>If the keep_real argument is True, it keeps only the real part of the eigenvalues and eigenvectors. If it is False, it keeps only the imaginary part.</li>
<li>It sorts the eigenvalues and keeps the indices of the first n_comp_egenvec eigenvalues.</li>
<li>It creates a Pandas dataframe from the eigenvectors and selects only the columns corresponding to the indices of the smallest eigenvalues(eigenvectors corresponding to zero eigenvalues are not included).</li>
<li>It converts the dataframe back to a NumPy array and scales the data using StandardScaler.</li>
<li>It returns the scaled eigenvectors and the indices of the best eigenvalues.</li>
</font>

In [None]:
def reduce_egenvectors(eigval,eigvec,n_comp_egenvec,keep_real):
    if keep_real:
        eigval, eigvec = eigval.real, eigvec.real
    else:
        eigval, eigvec = eigval.imag, eigvec.imag      
    best_egenval_idx = sorted(range(len(eigval)), key = lambda sub: eigval[sub])
    print 
    best_egenval_idx = best_egenval_idx[2:n_comp_egenvec+2]
    df_egenvec = pd.DataFrame(eigvec)
    df_egenvec = df_egenvec[best_egenval_idx]
    new_eigvec = np.array(df_egenvec)
    new_eigvec = StandardScaler().fit_transform(new_eigvec)
    return new_eigvec,best_egenval_idx

## K-Means Algorithm

<font size="3">
A function that apply K-mean algorithm from scratch:
<ol>
<li>The function takes two arguments: k and features. k is the number of clusters to create, and features is a NumPy array of data points.</li>
<li>It initializes the centroids of the clusters using the k-means++ algorithm. This involves randomly selecting one centroid from the data points and then selecting the remaining centroids using a probability distribution based on the distances of the data points to the nearest centroid.</li>
<li>It assigns each data point to the cluster with the nearest centroid.</li>
<li>It computes the mean of the points in each cluster to get the new centroids.</li>
<li>It repeats the process of assigning points to clusters and computing new centroids until convergence, which occurs when the centroids do not change from one iteration to the next.</li>
<li>It returns the final centroids and a list of cluster assignments for each data point.</li>
</font>

In [None]:
def kmeans(k: int, features: np.ndarray) -> Tuple[List[List[float]], List[int]]:
    
    def euclidean_distance(p1: np.ndarray, p2: np.ndarray) -> float:
        return np.linalg.norm(p1 - p2)

    def mean(points: np.ndarray) -> np.ndarray:
        return np.mean(points, axis=0)

    # Step 1: Initialize centroids using k-means++
    num_features = features.shape[0]
    centroids = []
    centroids.append(features[random.choice(range(num_features))])
    for _ in range(k - 1):
        distances = [min([euclidean_distance(point, centroid) for centroid in centroids]) for point in features]
        total_distance = sum(distances)
        probabilities = [distance / total_distance for distance in distances]
        centroids.append(features[np.random.choice(range(num_features), p=probabilities)])

    # Step 2: Assign points to closest centroid
    clusters = [[] for _ in range(k)]
    for i in range(num_features):
        point = features[i]
        centroids_array = np.array(centroids)
        distances = [euclidean_distance(point, centroid) for centroid in centroids_array]
        cluster_index = distances.index(min(distances))
        clusters[cluster_index].append(point)

    # Step 3: Recompute centroids
    new_centroids = []
    for cluster in clusters:
        new_centroids.append(mean(cluster))

    # Step 4: Repeat steps 2 and 3 until convergence
    while not np.array_equal(centroids, new_centroids):
        centroids = new_centroids
        clusters = [[] for _ in range(k)]
        for i in range(num_features):
            point = features[i]
            centroids_array = np.array(centroids)
            distances = [euclidean_distance(point, centroid) for centroid in centroids_array]
            cluster_index = distances.index(min(distances))
            clusters[cluster_index].append(point)
        new_centroids = []
        for cluster in clusters:
            new_centroids.append(mean(cluster))

    # Step 5: Assign points to final clusters
    cluster_assignments = []
    for i in range(num_features):
        point = features[i]
        centroids_array = np.array(centroids)
        distances = [euclidean_distance(point, centroid) for centroid in centroids_array]
        cluster_index = distances.index(min(distances))
        cluster_assignments.append(cluster_index)

    return new_centroids, cluster_assignments

## Clustering Pipelines

<font size="3">
A function that merges the above functions to provide us with the final clustering results.
<ol>
<li>It takes as input the eigenvalues and the eigenvectors.</li>
<li>It keeps only the necessary eigenvectors.</li>
<li>It chooses proposed K-Mean or Sklearn K-Mean according to the given variable (we do this to compare our algorithm with that of Sklearn).</li>
<li>It uses K-means and takes the clustering results.</li>
<li>It Computes the silhouette metric.</li>
</font>

In [None]:
def proposed_pipeline(eigval,eigvec,cores,n_comp_egenvec,k,kmeans_mode):
    start = dt.now()
    eigvec,best_egenval_idx = reduce_egenvectors(eigval,eigvec,n_comp_egenvec,True)
    data_input = eigvec.reshape(eigvec.shape[0], -1)
    
    if kmeans_mode == 'Proposed':
        centroids, cluster_assignments = kmeans(k,data_input)
        print (f"\nProposed K-means computations have been finished successfully at {(dt.now() - start).seconds} seconds")
    elif kmeans_mode == 'Sklearn':    
        k_means = KMeans(init='k-means++', n_clusters=k, max_iter=1000)
        km_clustering = k_means.fit(data_input)
        cluster_assignments = km_clustering.labels_
        centroids = km_clustering.cluster_centers_
        print (f"\nSklearn K-means computations have been finished successfully at {(dt.now() - start).seconds} seconds")
    silhouette = silhouette_score(data_input,cluster_assignments)  
    return cluster_assignments,silhouette

<font size="3">  
A function that applies SpectralClustering algorithm from the Sklearn library. We do this to compare our algorithm with that of Sklearn
</font>

In [None]:
def sklearn_pipeline(pca_tsne_res,k):
    start = dt.now()
    pca_tsne_res = StandardScaler().fit_transform(pca_tsne_res)
    sp_cl = SpectralClustering(n_clusters=k, affinity='rbf', random_state=42)
    spectral_clustering = sp_cl.fit(pca_tsne_res)
    cluster_assignments = spectral_clustering.labels_
    silhouette = silhouette_score(pca_tsne_res,cluster_assignments)
    print (f"\nSpectral Clustering of Sklearn has been finished successfully at {(dt.now() - start).seconds} seconds")
    return cluster_assignments,silhouette

<font size="3">
A function that prints and saves a plot which consists of 4 Scatterplot to compare our algorithm with that of Sklearn:
<ol>
<li>Scatterplot with the real labels as clusters.</li>
<li>Scatterplot with the cluster results of our implementations of Spectral Computations and K-means.</li>
<li>Scatterplot with the cluster results of our implementations of Spectral computation and Sklearn K-means</li>
<li>Scatterplot with the cluster results of our Sklearn Spectral Clustering</li>   
</font>

In [None]:
def plot_cluster_results(pca_tsne_res,y_train,our_cluster_assignments,km_sklearn_cluster_assignments,full_sklearn_cluster_assignments,data_name):
    fig, axes = plt.subplots(2, 2, figsize=(20, 8), sharey=True)
    fig.suptitle('{}: Spektral Clustering Approaches (Clusters = 10)'.format(data_name), fontsize=20)
    font_size = {'fontsize': 17}
    
    sns.scatterplot(ax=axes[0][0],x = pca_tsne_res[:,0], y = pca_tsne_res[:,1], hue = y_train, 
                palette = sns.hls_palette(10), legend = 'full')
    axes[0][0].set_title('Real Clusters',fontdict = font_size)

    sns.scatterplot(ax=axes[0][1], x = pca_tsne_res[:,0], y = pca_tsne_res[:,1], hue = our_cluster_assignments, 
                    palette = sns.hls_palette(10), legend = 'full')
    axes[0][1].set_title('Proposed Method',fontdict = font_size)

    sns.scatterplot(ax=axes[1][0], x = pca_tsne_res[:,0], y = pca_tsne_res[:,1], hue = km_sklearn_cluster_assignments, 
                    palette = sns.hls_palette(10), legend = 'full')
    axes[1][0].set_title('Proposed Spectral + Sklearn K-means',fontdict = font_size)
    
    sns.scatterplot(ax=axes[1][1], x = pca_tsne_res[:,0], y = pca_tsne_res[:,1], hue = full_sklearn_cluster_assignments, 
                    palette = sns.hls_palette(10), legend = 'full')
    axes[1][1].set_title('Full Sklearn Spectral Clustering',fontdict = font_size)
    plt.savefig('Exports/Spectral_Clustering_Aproaches_' + data_name + '.pdf')

<font size="3"> 
This function performs a series of experiments where spectral clustering is applied ussing different number of clusters on each experiment.

    
The function performs a loop over the values in n_clusters. For each value of n, the function performs spectral clustering using the proposed_pipeline function, with the specified number of clusters (n), and the other input arguments. 
    
The output of proposed_pipeline is a tuple containing the cluster assignments and the silhouette score for each experiment.
The function stores the cluster assignments in a list called all_results.
After the loop is finished, the function returns the list all_results, which contains the cluster assignments for all of the experiments.
</font>

In [None]:
def k_experiments(eigval,eigvec,cores,n_clusters,n_comp_egenvec):
    all_results = []
    for n in n_clusters:
        cluster_assignments,silhouette = proposed_pipeline(eigval,eigvec,cores,n_comp_egenvec,n,'Proposed')
        all_results.append(cluster_assignments) 
    return all_results    

<font size="3">
A function that prints and saves a plot which consists of 9 Scatterplot that observe the clustering behavior on different numbers of clusters. Each subplot depicts the clustering results for a specific number of clusters.
</font>

In [None]:
def plot_differen_clusters(k_list,all_results,data_name):
    fig, axes = plt.subplots(3, 3, figsize=(30, 12), sharey=True)
    fig.suptitle('{}: Experiments with different number of K'.format(data_name), fontsize=25)
    font_size = {'fontsize': 20}
    
    sns.scatterplot(ax=axes[0][0],x = pca_tsne_res[:,0], y = pca_tsne_res[:,1], hue = all_results[0], 
                palette = sns.hls_palette(k_list[0]), legend = 'full')
    axes[0][0].set_title('K = {}'.format(k_list[0]),fontdict = font_size)

    sns.scatterplot(ax=axes[0][1],x = pca_tsne_res[:,0], y = pca_tsne_res[:,1], hue = all_results[1], 
                palette = sns.hls_palette(k_list[1]), legend = 'full')
    axes[0][1].set_title('K = {}'.format(k_list[1]),fontdict = font_size)
    
    sns.scatterplot(ax=axes[0][2],x = pca_tsne_res[:,0], y = pca_tsne_res[:,1], hue = all_results[2], 
                palette = sns.hls_palette(k_list[2]), legend = 'full')
    axes[0][2].set_title('K = {}'.format(k_list[2]),fontdict = font_size)
    
    sns.scatterplot(ax=axes[1][0],x = pca_tsne_res[:,0], y = pca_tsne_res[:,1], hue = all_results[3], 
                palette = sns.hls_palette(k_list[3]), legend = 'full')
    axes[1][0].set_title('K = {}'.format(k_list[3]),fontdict = font_size)
    
    sns.scatterplot(ax=axes[1][1],x = pca_tsne_res[:,0], y = pca_tsne_res[:,1], hue = all_results[4], 
                palette = sns.hls_palette(k_list[4]), legend = 'full')
    axes[1][1].set_title('K = {}'.format(k_list[4]),fontdict = font_size)
    
    sns.scatterplot(ax=axes[1][2],x = pca_tsne_res[:,0], y = pca_tsne_res[:,1], hue = all_results[5], 
                palette = sns.hls_palette(k_list[5]), legend = 'full')
    axes[1][2].set_title('K = {}'.format(k_list[5]),fontdict = font_size)
    
    sns.scatterplot(ax=axes[2][0],x = pca_tsne_res[:,0], y = pca_tsne_res[:,1], hue = all_results[6], 
                palette = sns.hls_palette(k_list[6]), legend = 'full')
    axes[2][0].set_title('K = {}'.format(k_list[6]),fontdict = font_size)
    
    sns.scatterplot(ax=axes[2][1],x = pca_tsne_res[:,0], y = pca_tsne_res[:,1], hue = all_results[7], 
                palette = sns.hls_palette(k_list[7]), legend = 'full')
    axes[2][1].set_title('K = {}'.format(k_list[7]),fontdict = font_size)
    
    sns.scatterplot(ax=axes[2][2],x = pca_tsne_res[:,0], y = pca_tsne_res[:,1], hue = all_results[8], 
                palette = sns.hls_palette(k_list[8]), legend = 'full')
    axes[2][2].set_title('K = {}'.format(k_list[8]),fontdict = font_size)
    plt.savefig('Exports/K-Experiments_' + data_name + '.pdf')

## MNIST Dataset

<font size="3">
In the following cells we use the above functions to apply spectral clustering to the Mnist-dataset.
</font>

### Define variables

In [None]:
data_name = 'Mnist'
train_subset_size = 15000
test_subset_size = 2000
distance_threshold = 5
helper_pca_n_comp = 50
k = 10
different_k = np.arange(8,17,1)
n_comp_egenvec = k

### Data Loading, Preprocessing, T-sne, and Eigenvectors computation

In [None]:
x_train,y_train,x_test,y_test  = data_load(True,data_name,train_subset_size,test_subset_size)
x_train,x_test = scalling(x_train,x_test)
pca_tsne_res = tsne_pipeline(x_train,y_train,helper_pca_n_comp,data_name)
eigval,eigvec = spectral_computation_mnist(pca_tsne_res,cores,distance_threshold)

### Spectral Clustering using different models

In [None]:
our_cluster_assignments,our_silhouette = proposed_pipeline(eigval,eigvec,cores,n_comp_egenvec,k,
                                                           kmeans_mode='Proposed')

km_sklearn_cluster_assignments,km_sklearn_silhouette = proposed_pipeline(eigval,eigvec,cores,n_comp_egenvec,
                                                                         k,kmeans_mode='Sklearn')

full_sklearn_cluster_assignments,full_sklearn_silhouette = sklearn_pipeline(pca_tsne_res,k)

plot_cluster_results(pca_tsne_res,y_train,our_cluster_assignments,km_sklearn_cluster_assignments,
                     full_sklearn_cluster_assignments,data_name)

### Silhouette metric on MNIST

In [None]:
print('Proposed Method Silhouette: ',our_silhouette)
print('K-Means Sklearn Silhouette: ',km_sklearn_silhouette)
print('Spectral Clustering Sklearn Silhouette: ',full_sklearn_silhouette)

### Spectral Clustering using different number of clusters

In [None]:
all_results = k_experiments(eigval,eigvec,cores,different_k,n_comp_egenvec)
plot_differen_clusters(different_k,all_results,data_name)

## FashionMnist Dataset

<font size="3">
In the following cells we use the above functions to apply spectral clustering to the Mnist-dataset.
</font>

### Define variables

In [None]:
data2_name = 'FashionMnist'
train_subset_size = 15000
test_subset_size = 2000
sigma = 1
helper_pca_n_comp = 200
k = 10
different_k = np.arange(4,13,1)
n_comp_egenvec = k

### Data Loading, Preprocessing, T-sne, and Eigenvectors computation

In [None]:
x_train,y_train,x_test,y_test  = data_load(True,data2_name,train_subset_size,test_subset_size)
x_train,x_test = scalling(x_train,x_test)
pca_tsne_res = tsne_pipeline(x_train,y_train,helper_pca_n_comp,data2_name)
eigval,eigvec = spectral_computation_fashion_mnist(pca_tsne_res,cores,sigma)

### Spectral Clustering using different models

In [None]:
our_cluster_assignments,our_silhouette = proposed_pipeline(eigval,eigvec,cores,n_comp_egenvec,k,
                                                           kmeans_mode='Proposed')

km_sklearn_cluster_assignments,km_sklearn_silhouette = proposed_pipeline(eigval,eigvec,cores,n_comp_egenvec,
                                                                         k,kmeans_mode='Sklearn')

full_sklearn_cluster_assignments,full_sklearn_silhouette = sklearn_pipeline(pca_tsne_res,k)

plot_cluster_results(pca_tsne_res,y_train,our_cluster_assignments,km_sklearn_cluster_assignments,
                     full_sklearn_cluster_assignments,data2_name)

### Silhouette metric on Fashion MNIST

In [None]:
print('Proposed Method Silhouette: ',our_silhouette)
print('K-Means Sklearn Silhouette: ',km_sklearn_silhouette)
print('Spectral Clustering Sklearn Silhouette: ',full_sklearn_silhouette)

### Spectral Clustering using different number of clusters

In [None]:
all_results = k_experiments(eigval,eigvec,cores,different_k,n_comp_egenvec)
plot_differen_clusters(different_k,all_results,data2_name)