In [13]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split


X, y = fetch_openml('Fashion-MNIST', version=1, return_X_y=True)

# Normalization
X /= 255.0

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Linearization
x_train = np.array(X_train).reshape(X_train.shape[0], -1)
x_test = np.array(X_test).reshape(X_test.shape[0], -1)

x_train.shape, x_train.dtype, x_test.shape, x_test.dtype

((56000, 784), dtype('float64'), (14000, 784), dtype('float64'))

In [19]:
from sklearn.preprocessing import StandardScaler


scaler = StandardScaler()
x_train = scaler.fit_transform(X_train)
x_test = scaler.transform(X_test)

In [21]:
def replace_labels(predictions, shift):
    new_labels = []
    for pred in predictions:
        new_labels.append(shift[pred])
    return new_labels

In [20]:
from sklearn.metrics import f1_score, jaccard_score


def method_rate(predictions, shifted):
    f_scores = []
    Jaccard_scores = []
    
    for s in shifted:
      pred = replace_labels(predictions, s)
    
      f_scores.append(f1_score(y_test, pred, average='macro'))
      Jaccard_scores.append(Jaccard_score(y_test, pred, average='macro'))
    
    print("F1 score:", max(f_scores))
    print("Jaccard score:", max(Jaccard_scores))

In [22]:
from sklearn.cluster import KMeans

k_means_preds = KMeans(n_clusters=10, random_state=0).fit(x_train).predict(x_test)

In [None]:
import itertools


shifted = [X for x in itertools.permutations(k_means_preds)]
method_rate(k_means_preds, shifted)

In [5]:
from sklearn.cluster import BisectingKMeans


bk_means_preds = BisectingKMeans(n_clusters=10, init='k-means++', random_state=0, 
                              bisecting_strategy='largest_cluster').fit(x_train).predict(x_test)

In [None]:
import itertools


shifted = [X for x in itertools.permutations(k_means_preds)]
method_rate(bk_means_preds, shifted)

In [7]:
from sklearn.cluster import DBSCAN


dbscan_preds = DBSCAN(eps=5, min_samples=10, n_jobs=-1).fit_predict(x_train)

In [None]:
import itertools


shifted = [X for x in itertools.permutations(k_means_preds)]
method_rate(dbscan_preds, shifted)

In [None]:
class_names = [
    'T-shirt',
    'Trouser',
    'Pullover',
    'Dress',
    'Coat',
    'Sandal',
    'Shirt',
    'Sneaker',
    'Bag',
    'Ankle boot'
]

class_names[int(y_train[0])]

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE


tsne = TSNE(n_components=2, random_state=0)
x_test_tsne = tsne.fit_transform(x_test)

# Plotting the t-SNE result
plt.figure(figsize=(8, 6))

# Scatter plot each point with its predicted cluster as color
plt.scatter(x_test_tsne[:, 0], x_test_tsne[:, 1], c=k_means_preds, cmap='viridis', s=10)
plt.colorbar(label='Cluster')

plt.title('t-SNE Visualization of Predicted Clusters')
plt.xlabel('Component 1')
plt.ylabel('Component 2')
plt.show()