# Imports

In [101]:
%matplotlib inline
import numpy as np
import pandas as pd
from sklearn import cluster, metrics, datasets
import matplotlib.pyplot as plt

# Application of k-means clustering algorithm on HRTU2 dataset

In [113]:
hrtu = pd.read_csv('Datasets/HTRU2/HTRU_2.csv', header=None)
x = hrtu.iloc[:, :-1].to_numpy()

k = range(1, 10)
min_dbscore = 10**10
kmin = 1
for k in range(2, 11):
    kmeans = cluster.KMeans(n_clusters=k)
    kmeans.fit(x)
    # # print(y)
    labels = kmeans.labels_
    dbscore = metrics.davies_bouldin_score(x, labels)
    if dbscore < min_dbscore:
        min_dbscore = dbscore
        kmin = k
    print("Davies-Bouldin score for k =", k, ":", dbscore)
    
print("k =", kmin, "gives best Davies-Bouldin score(", min_dbscore, ")")

Davies-Bouldin score for k = 2 : 0.6793486550368992
Davies-Bouldin score for k = 3 : 0.7481502337059877
Davies-Bouldin score for k = 4 : 0.7837697587959213
Davies-Bouldin score for k = 5 : 0.7399607288949918
Davies-Bouldin score for k = 6 : 0.7303417368032523
Davies-Bouldin score for k = 7 : 0.7391748881771709
Davies-Bouldin score for k = 8 : 0.7443313258605988
Davies-Bouldin score for k = 9 : 0.7571938542886659
Davies-Bouldin score for k = 10 : 0.7834505333834458
k = 2 gives best Davies-Bouldin score( 0.6793486550368992 )


# Application of DBSCAN clustering algorithm on Perfume dataset

In [114]:
perfume = pd.read_excel('Datasets/Perfume_Data/perfume_data.xlsx', header=None)
x = perfume.iloc[:, 1:].to_numpy().flatten()
np.random.shuffle(x)
x.shape = (-1, 1)

min_dbscore = 10**10
eps_min, samples_min = -1, -1
epsilon = np.logspace(1, 3, 4)
samples = np.linspace(5, 10, 6)
for eps in epsilon:
    for min_samples in samples:
        db = cluster.DBSCAN(eps=eps, min_samples=min_samples).fit(x)
        labels = db.labels_
#         print(labels)
#         print(len(set(labels)))
        dbscore = metrics.davies_bouldin_score(x, labels)
        if dbscore < min_dbscore:
            min_dbscore = dbscore
            eps_min = eps
            samples_min = min_samples
        print("Davies-Bouldin score for eps =", eps, "and min_samples =", min_samples, ":", dbscore)
    
print("eps =", eps_min, "and min_samples =", samples_min, "gives best Davies-Bouldin score(", min_dbscore, ")")

Davies-Bouldin score for eps = 10.0 and min_samples = 5.0 : 2.3247293875323876
Davies-Bouldin score for eps = 10.0 and min_samples = 6.0 : 2.6463062784868887
Davies-Bouldin score for eps = 10.0 and min_samples = 7.0 : 3.0149416667256794
Davies-Bouldin score for eps = 10.0 and min_samples = 8.0 : 5.158858411472511
Davies-Bouldin score for eps = 10.0 and min_samples = 9.0 : 5.158858411472511
Davies-Bouldin score for eps = 10.0 and min_samples = 10.0 : 8.449327546675814
Davies-Bouldin score for eps = 46.41588833612777 and min_samples = 5.0 : 2.3404706698645232
Davies-Bouldin score for eps = 46.41588833612777 and min_samples = 6.0 : 2.3404706698645232
Davies-Bouldin score for eps = 46.41588833612777 and min_samples = 7.0 : 6.15642532769999
Davies-Bouldin score for eps = 46.41588833612777 and min_samples = 8.0 : 2.536371890319451
Davies-Bouldin score for eps = 46.41588833612777 and min_samples = 9.0 : 1.5106699602677343
Davies-Bouldin score for eps = 46.41588833612777 and min_samples = 10.0

# Application of Agglomerative clustering algorithm on sklearn's toy dataset 

In [115]:
moons = datasets.make_moons()
x = moons[0]

min_dbscore = 10**10
min_linkage = ''
for linkage in ['average', 'complete', 'single']:
    hierarchical = cluster.AgglomerativeClustering(linkage=linkage)
    hierarchical.fit(x)
    labels = hierarchical.labels_
    dbscore = metrics.davies_bouldin_score(x, labels)
    if dbscore < min_dbscore:
        min_dbscore = dbscore
        min_linkage = linkage
    print("Davies-Bouldin score for", linkage, "linkage:", dbscore)
print(min_linkage.capitalize(), "linkage gives best Davies-Bouldin score(", min_dbscore, ")")

Davies-Bouldin score for average linkage: 0.7874623048058049
Davies-Bouldin score for complete linkage: 0.8074533760682475
Davies-Bouldin score for single linkage: 1.1814187686460293
Average linkage gives best Davies-Bouldin score( 0.7874623048058049 )
