In [None]:
from keras.datasets import mnist
import warnings
warnings.filterwarnings("ignore")

(_,_), (X, y) = mnist.load_data()

In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(4, 6)
for i, axi in enumerate(ax.flat):
    axi.imshow(X[i], cmap='gray')
    axi.set(xticks=[], yticks=[])
    axi.set_xlabel(y[i],
                   color='black')

In [None]:
print(X.shape, y.shape)

In [None]:
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.cluster import MiniBatchKMeans
import tensorflow as tf
import numpy as np
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=29)

In [None]:
X_train = X_train.reshape(-1, 784)
kmeans = KMeans(n_clusters=10, random_state=29)
kmeans.fit(X_train)
train_labels = kmeans.labels_

In [None]:
print(np.sum(kmeans.labels_ == 3))

In [None]:
centroids = kmeans.cluster_centers_
centroids_images = centroids.reshape(10, 28, 28)

fig, axes = plt.subplots(2, 5, figsize=(8, 3))
for i, ax in enumerate(axes.ravel()):
    ax.imshow(centroids_images[i], cmap='gray')
    ax.axis('off')
plt.show()

In [None]:
from scipy.stats import mode

cluster_labels = np.zeros(10, dtype=int)
for i in range(10):
    mask = (train_labels == i)
    cluster_labels[i] = mode(y_train[mask])[0]

train_predictions = cluster_labels[train_labels]

print(cluster_labels)

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix

test_labels = kmeans.predict(X_test.reshape(-1, 784))
test_predictions = cluster_labels[test_labels]

In [None]:
conf_matrix = confusion_matrix(y_test, test_predictions)

# Визуализация матрицы ошибок
plt.figure(figsize=(10, 8))
plt.imshow(conf_matrix, cmap='Blues')
plt.colorbar()
plt.show()

most_confused_with_9 = np.argmax(conf_matrix[9])
print(most_confused_with_9)

In [None]:
test_accuracy = accuracy_score(y_test, test_predictions)
print(test_accuracy)

In [None]:
from sklearn.manifold import TSNE
tsne = TSNE(n_components=2, init='random', random_state=29)
X_train_tsne = tsne.fit_transform(X_train)
kmeans_tsne = KMeans(n_clusters=10, random_state=29)
kmeans_tsne.fit(X_train_tsne)

train_labels_tsne = kmeans_tsne.labels_

In [None]:
kmeans_tsne = KMeans(n_clusters=10, random_state=29)
kmeans_tsne.fit(X_train_tsne)

train_labels_tsne = kmeans_tsne.labels_

In [None]:
from scipy.stats import mode

cluster_labels_tsne = np.zeros(10, dtype=int)
for i in range(10):
    mask = (train_labels_tsne == i)
    cluster_labels_tsne[i] = mode(y_train[mask])[0]

train_predictions_tsne = cluster_labels_tsne[train_labels_tsne]

print(cluster_labels_tsne)


In [None]:
train_accuracy_tsne = accuracy_score(y_train, train_predictions_tsne)
print(train_accuracy_tsne)