In [None]:
# import matplotlib
import matplotlib.pyplot as plt

# set figure size
plt.rcParams["figure.figsize"] = (12,8)

#import numpy
import numpy as np

# from sklearn import make_blobs
from sklearn.datasets import make_blobs

# import DBSCAN
from sklearn.cluster import DBSCAN

## Task: Generate clusters with make_blobs function 

* use this settings:
    * n_samples=150,
    * n_features=2,
    * centers=3,
    * cluster_std=0.6,
    * random_state=0

In [None]:
X, y = make_blobs(n_samples=150,
                  n_features=2,
                  centers=3,
                  cluster_std=0.6,
                  random_state=0)

* plot data points

In [None]:
plt.scatter(X[:,0],
            X[:,1],
            c='black',
            marker='o')
plt.grid()
plt.show()

* define function plot clusters

In [None]:
def plot_clusters(X,y_res, plt_cluster_centers = False):
    X_centroids = []
    Y_centroids = []
    
    for cluster in set(y_res):
        x = X[y_res == cluster,0]
        y = X[y_res == cluster,1]
        X_centroids.append(np.mean(x))
        Y_centroids.append(np.mean(y))
        
        plt.scatter(x,
                    y,
                    s=50,
                    marker='s',
                    label=f'cluster {cluster}')
        
    if plt_cluster_centers:
        plt.scatter(X_centroids,
                    Y_centroids,
                    marker='*',
                    c='red',
                    s=250,
                    label='centroids')
    plt.legend()
    plt.grid()
    plt.show()

## Objective: 
Try to find optimal settings of `eps` and `min_samples` params of DBSCAN. You can iterate over multiple parameters combinations and plot the clustering results.

* define params space

In [None]:
eps_values = [0.1,0.2,0.5,0.8]
min_samples_values = [1,3,5]

* combinations (hint: use 'product' function from 'intertools')

In [None]:
from itertools import product
combinations = list(product(eps_values,min_samples_values))

In [None]:
for com in combinations:
    eps, min_samples = com
    
    # Fit DBSCAN
    db = DBSCAN(eps=eps,
                min_samples=min_samples,
                metric='euclidean')

    y_db = db.fit_predict(X)

    # Plot DBSCAN clusters
    plt.title(f'eps: {eps}, min_samples: {min_samples}')
    plot_clusters(X,y_db)