In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA, TruncatedSVD, FastICA
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans

In [None]:
train_data = pd.read_csv('fashion-mnist_train.csv')
test_data = pd.read_csv('fashion-mnist_test.csv')

X_train = train_data.drop('label', axis=1).values
y_train = train_data['label'].values

X_test = test_data.drop('label', axis=1).values
y_test = test_data['label'].values

X_train_images = X_train.reshape(-1, 28, 28)
X_test_images = X_test.reshape(-1, 28, 28)

In [None]:
print("Shape of training data:", X_train.shape)
print("Shape of test data:", X_test.shape)

class_labels = [
    "T-shirt/top", "Trouser", "Pullover", "Dress", "Coat",
    "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"
]

class_counts = np.bincount(y_train)
for i, count in enumerate(class_counts):
    print(f"Class {i}: {count} samples ({class_labels[i]})")

plt.figure(figsize=(10, 5))
for i in range(10):
    plt.subplot(2, 5, i + 1)
    plt.imshow(X_train_images[i], cmap='gray')
    plt.title(class_labels[y_train[i]], fontsize=10)
    plt.axis('off')
plt.show()

In [None]:
tsne = TSNE(n_components=2, random_state=42)
X_tsne = tsne.fit_transform(X_train)

plt.figure(figsize=(8, 6))
for i in range(10):
    plt.scatter(X_tsne[y_train == i, 0], X_tsne[y_train == i, 1], label=class_labels[i])
plt.title("t-SNE Clustering of Fashion MNIST Data")
plt.legend()
plt.show()

In [None]:
pca = PCA(n_components=50, random_state=42)
X_pca = pca.fit_transform(X_train)

kmeans_pca = KMeans(n_clusters=10, random_state=42)
y_kmeans_pca = kmeans_pca.fit_predict(X_pca)

plt.figure(figsize=(8, 6))
for i in range(10):
    plt.scatter(X_pca[y_kmeans_pca == i, 0], X_pca[y_kmeans_pca == i, 1], label=class_labels[i])
plt.scatter(kmeans_pca.cluster_centers_[:, 0], kmeans_pca.cluster_centers_[:, 1],
            s=100, c='red', label='Cluster Centers')
plt.title("KMeans Clustering after PCA")
plt.legend()
plt.show()

In [None]:
svd = TruncatedSVD(n_components=50, random_state=42)
X_svd = svd.fit_transform(X_train)

kmeans_svd = KMeans(n_clusters=10, random_state=42)
y_kmeans_svd = kmeans_svd.fit_predict(X_svd)

plt.figure(figsize=(8, 6))
for i in range(10):
    plt.scatter(X_svd[y_kmeans_svd == i, 0], X_svd[y_kmeans_svd == i, 1], label=class_labels[i])
plt.scatter(kmeans_svd.cluster_centers_[:, 0], kmeans_svd.cluster_centers_[:, 1],
            s=100, c='red', label='Cluster Centers')
plt.title("KMeans Clustering after SVD")
plt.legend()
plt.show()

In [None]:
ica = FastICA(n_components=50, random_state=42)
X_ica = ica.fit_transform(X_train)

# Apply KMeans clustering on ICA-transformed data
kmeans_ica = KMeans(n_clusters=10, random_state=42)
y_kmeans_ica = kmeans_ica.fit_predict(X_ica)

# Plot the KMeans clusters after ICA
plt.figure(figsize=(8, 6))
for i in range(10):
    plt.scatter(X_ica[y_kmeans_ica == i, 0], X_ica[y_kmeans_ica == i, 1], label=class_labels[i])
plt.scatter(kmeans_ica.cluster_centers_[:, 0], kmeans_ica.cluster_centers_[:, 1],
            s=100, c='red', label='Cluster Centers')
plt.title("KMeans Clustering after ICA")
plt.legend()
plt.show()

In [None]:
from sklearn.metrics import silhouette_score

print("Silhouette Score after PCA:", silhouette_score(X_pca, y_kmeans_pca))
print("Silhouette Score after SVD:", silhouette_score(X_svd, y_kmeans_svd))
print("Silhouette Score after ICA:", silhouette_score(X_ica, y_kmeans_ica))

In [None]:
# Apply PCA for dimensionality reduction
pca_best = PCA(n_components=50, random_state=42)
X_best = pca_best.fit_transform(X_train)

# Apply KMeans