In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets

n_samples = 1500
dataset = datasets.make_blobs(n_samples=n_samples, centers=2, center_box=(-7.0, 7.5),
                              cluster_std=[1.4, 1.7],
                              random_state=42)
X_2, _ = datasets.make_blobs(n_samples=n_samples, random_state=170, centers=[[-4, -3]], cluster_std=[1.9])
transformation = [[1.2, -0.8], [-0.4, 1.7]]
X_2 = np.dot(X_2, transformation)
X, y = np.concatenate((dataset[0], X_2)), np.concatenate((dataset[1], np.array([2] * len(X_2))))

In [2]:
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

# сначала получим предсказанные кластеры при помощи метода кластеризации
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(X)
kmeans_pred = kmeans.labels_

# теперь посчитаем коэффициент силуэта
silhouette_score(X=X, labels=kmeans_pred, metric='euclidean')

0.5131660482634046

## Задчание 11.1

In [3]:
from sklearn.mixture import GaussianMixture

gm = GaussianMixture(n_components=3, random_state=42)

In [4]:
y_pred = gm.fit_predict(X)

In [6]:
silhouette_score(X=X, labels=y_pred, metric='euclidean')

0.3988405457243407

## Задчание 11.2

In [9]:
k_means = KMeans(n_clusters=3, random_state=42)
k_means.fit(X)

KMeans(n_clusters=3, random_state=42)

In [10]:
silhouette_score(X=X, labels=k_means.labels_, metric='euclidean')

0.5131660482634046

In [12]:
from sklearn.cluster import AgglomerativeClustering

ac = AgglomerativeClustering(n_clusters=3)

In [13]:
y_pred = ac.fit_predict(X)

In [14]:
silhouette_score(X=X, labels=y_pred, metric='euclidean')

0.4811992210663849

In [15]:
from sklearn.cluster import DBSCAN

dbscan = DBSCAN(eps=0.9,
                min_samples=35)

In [16]:
dbscan.fit(X)

DBSCAN(eps=0.9, min_samples=35)

In [17]:
silhouette_score(X=X, labels=dbscan.labels_, metric='euclidean')

0.4454335539277996

## Задчание 11.3

In [22]:
for i in range(2, 11):
    k_means = KMeans(n_clusters=i, random_state=42)
    k_means.fit(X)
    ss_k_means = silhouette_score(X=X, labels=k_means.labels_, metric='euclidean')
    gm = GaussianMixture(n_components=i, random_state=42)
    gm_pred = gm.fit_predict(X)
    ss_gm = silhouette_score(X=X, labels=gm_pred, metric='euclidean')
    ac = AgglomerativeClustering(n_clusters=i)
    ac_pred = ac.fit_predict(X)
    ss_ac = silhouette_score(X=X, labels=ac_pred, metric='euclidean')
    print(f'K-means: {ss_k_means} при i = {i}')
    print(f'EM: {ss_gm} при i = {i}')
    print(f'AC: {ss_ac} при i = {i}')
    print()

K-means: 0.45525850917271177 при i = 2
EM: 0.45506572865005773 при i = 2
AC: 0.40560374862968174 при i = 2

K-means: 0.5131660482634046 при i = 3
EM: 0.3988405457243407 при i = 3
AC: 0.4811992210663849 при i = 3

K-means: 0.5114118014309094 при i = 4
EM: 0.510655066343949 при i = 4
AC: 0.48470679039805054 при i = 4

K-means: 0.48208278531968773 при i = 5
EM: 0.4813621459449205 при i = 5
AC: 0.4732978942735451 при i = 5

K-means: 0.46004229886156645 при i = 6
EM: 0.4628181210461304 при i = 6
AC: 0.4360547281175275 при i = 6

K-means: 0.39789335922616964 при i = 7
EM: 0.397854908720611 при i = 7
AC: 0.3610107002721555 при i = 7

K-means: 0.33357737485627553 при i = 8
EM: 0.32999199343917063 при i = 8
AC: 0.3470279907723165 при i = 8

K-means: 0.3279588779748603 при i = 9
EM: 0.3130377197653585 при i = 9
AC: 0.27831000812401474 при i = 9

K-means: 0.3215688946087787 при i = 10
EM: 0.2972718044706393 при i = 10
AC: 0.281314682459535 при i = 10

