In [1]:
import pandas as pd
import numpy as np
from sklearn.cluster import *
from sklearn.metrics import silhouette_score
from sklearn.metrics import davies_bouldin_score

In [2]:
def readData(path, label_col, labeled = 0):
    dataframe = pd.read_csv(path)
    if(labeled is 0):
        X = dataframe.to_numpy()
    else:
        X = np.array(dataframe.drop(label_col, axis = 1))
    return X

In [3]:
def cluster(method, **parameters):
    """
    methods = {'KMeans' : KMeans,
               'AffinityPropagation' : AffinityPropagation,
               'AgglormerativeClustering' : AgglomerativeClustering,
               'Birch' : Birch,
               'DBSCAN' : DBSCAN,
               'FeatureAgglomeration' : FeatureAgglomeration,
               'MiniBatchKMeans' : MiniBatchKMeans,
               'MeanShift' : MeanShift,
               'OPTICS' : OPTICS,
               'SpectralClustering' : SpectralClustering,
               'SpectralBiclustering' : SpectralBiclustering,
               'SpectralCoclustering' : SpectralCoclustering,}
    """
    return method(**parameters)

In [4]:
def getScores(X, labels):
    slt_score = silhouette_score(X, labels)
    db_score = davies_bouldin_score(X, labels)
    return (slt_score, db_score)

In [5]:
def main():
    dataset = readData("iris.csv", "variety", labeled=1)
    cluster_method = cluster(KMeans, **{'n_clusters' : 3})
    cluster_method.fit(dataset)
    print(cluster_method.labels_)
    print(getScores(dataset, cluster_method.labels_))

In [6]:
main()

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 2 2 2 0 2 2 2 2
 2 2 0 0 2 2 2 2 0 2 0 2 0 2 2 0 0 2 2 2 2 2 0 2 2 2 2 0 2 2 2 0 2 2 2 0 2
 2 0]
(0.5528190123564101, 0.6619715465007542)
