# C210 - Exemplo de Agrupamento

### Geradores de dados disponíveis no SKL:
https://scikit-learn.org/stable/datasets/index.html#sample-generators

In [None]:
# criacao de um dataset artificial
from sklearn.datasets import make_blobs
X, y = make_blobs(n_samples=300, n_features=2, centers=3, cluster_std=0.5, shuffle=True, random_state=0)

In [None]:
# visualizacao do dataset recem criado
import matplotlib.pyplot as plt
plt.rcParams['figure.dpi'] = 200
plt.scatter(X[:, 0], X[:, 1], c='white', marker='o', edgecolor='black', s=50)
plt.show()

### Modelos de aprendizado não-supervisionado (ex.: agrupamento) disponíveis no SKL:
https://scikit-learn.org/stable/unsupervised_learning.html

In [None]:
# importacao, criacao e treinamento do agrupador
from sklearn.cluster import KMeans
km = KMeans(n_clusters=3, init='random', n_init=10, max_iter=300, tol=1e-04, random_state=0)
y_km = km.fit_predict(X)

In [None]:
# visualizacao dos clusters e dos centroides
plt.scatter(X[y_km == 0, 0], X[y_km == 0, 1], s=50, c='lightgreen', marker='s', edgecolor='black', label='cluster 1')
plt.scatter(X[y_km == 1, 0], X[y_km == 1, 1], s=50, c='orange', marker='o', edgecolor='black', label='cluster 2')
plt.scatter(X[y_km == 2, 0], X[y_km == 2, 1], s=50, c='lightblue', marker='v', edgecolor='black', label='cluster 3')
plt.scatter(km.cluster_centers_[:, 0], km.cluster_centers_[:, 1], s=250, marker='*', c='red', edgecolor='black', label='centroids')
plt.legend(scatterpoints=1)
plt.grid()
plt.show()

### Métricas de agrupamento disponíveis no SKL:
https://scikit-learn.org/stable/modules/classes.html#clustering-metrics

In [None]:
# metricas para avaliacao do modelo
from sklearn.metrics import calinski_harabasz_score, silhouette_score
print('Calinski-Harabasz score: %.2f' % calinski_harabasz_score(X, km.labels_))
print('Silhouette score: %.2f' % silhouette_score(X, km.labels_))

##### Referências:
https://towardsdatascience.com/k-means-clustering-with-scikit-learn-6b47a369a83c