## Import Libraries And Create Custom Libraries

In [2]:
import numpy as np

from pyclustering.cluster.encoder import type_encoding
from pyclustering.utils import median
from pyclustering.utils.metric import distance_metric, type_metric
 
import pyclustering.core.kmedoids_wrapper as wrapper
 
from pyclustering.core.wrapper import ccore_library
from pyclustering.core.metric_wrapper import metric_wrapper
from sklearn.metrics.pairwise import pairwise_distances
from sklearn import metrics

import pandas as pd
from openpyxl import load_workbook
from collections import Counter

class kmedoids:
     """!
     @brief Class represents clustering algorithm K-Medoids (another one title is PAM - Partitioning Around Medoids).
     @details The algorithm is less sensitive to outliers tham K-Means. The principle difference between K-Medoids and K-Medians is that
              K-Medoids uses existed points from input data space as medoids, but median in K-Medians can be unreal object (not from
              input data space).
              
              CCORE option can be used to use core pyclustering - C/C++ shared library for processing that significantly increases performance.
     
     Clustering example:
     @code
         # load list of points for cluster analysis
         sample = read_sample(path)
         
         # set random initial medoids
         initial_medoids = [1, 10]
         
         # create instance of K-Medoids algorithm
         kmedoids_instance = kmedoids(sample, initial_medoids)
         
         # run cluster analysis and obtain results
         kmedoids_instance.process();
         clusters = kmedoids_instance.get_clusters()
         
         # show allocated clusters
         print(clusters)
     @endcode
 
     Metric for calculation distance between points can be specified by parameter additional 'metric':
     @code
         # create Minkowski distance metric with degree equals to '2'
         metric = distance_metric(type_metric.MINKOWSKI, degree=2)
 
         # create K-Medoids algorithm with specific distance metric
         kmedoids_instance = kmedoids(sample, initial_medoids, metric=metric)
 
         # run cluster analysis and obtain results
         kmedoids_instance.process()
         clusters = kmedoids_instance.get_clusters()
     @endcode
 
     Distance matrix can be used instead of sequence of points to increase performance and for that purpose parameter 'data_type' should be used:
     @code
         # calculate distance matrix for sample
         sample = read_sample(path_to_sample)
         matrix = calculate_distance_matrix(sample)
 
         # create K-Medoids algorithm for processing distance matrix instead of points
         kmedoids_instance = kmedoids(matrix, initial_medoids, data_type='distance_matrix')
 
         # run cluster analysis and obtain results
         kmedoids_instance.process()
 
         clusters = kmedoids_instance.get_clusters()
         medoids = kmedoids_instance.get_medoids()
     @endcode
 
     """
     
     def __init__(self, data, initial_index_medoids, tolerance=0.001, ccore=True, **kwargs):
        """!
        @brief Constructor of clustering algorithm K-Medoids.
         
        @param[in] data (list): Input data that is presented as list of points (objects), each point should be represented by list or tuple.
        @param[in] initial_index_medoids (list): Indexes of intial medoids (indexes of points in input data).
        @param[in] tolerance (double): Stop condition: if maximum value of distance change of medoids of clusters is less than tolerance than algorithm will stop processing.
        @param[in] ccore (bool): If specified than CCORE library (C++ pyclustering library) is used for clustering instead of Python code.
        @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'metric', 'data_type').
 
        <b>Keyword Args:</b><br>
            - metric (distance_metric): Metric that is used for distance calculation between two points.
            - data_type (string): Data type of input sample 'data' that is processed by the algorithm ('points', 'distance_matrix').
 
        """
        self.__pointer_data = data
        self.__clusters = []
        self.__medoid_indexes = initial_index_medoids
        self.__tolerance = tolerance
 
        self.__metric = kwargs.get('metric', distance_metric(type_metric.EUCLIDEAN_SQUARE))
        self.__data_type = kwargs.get('data_type', 'points')
        self.__distance_calculator = self.__create_distance_calculator()
 
        self.__ccore = ccore and self.__metric.get_type() != type_metric.USER_DEFINED
        if self.__ccore:
            self.__ccore = ccore_library.workable()
 
 
     def process(self):
        """!
        @brief Performs cluster analysis in line with rules of K-Medoids algorithm.
 
        @return (kmedoids) Returns itself (K-Medoids instance).
 
        @remark Results of clustering can be obtained using corresponding get methods.
         
        @see get_clusters()
        @see get_medoids()
         
        """
         
        if self.__ccore is True:
            ccore_metric = metric_wrapper.create_instance(self.__metric)
            self.__clusters, self.__medoid_indexes = wrapper.kmedoids(self.__pointer_data, self.__medoid_indexes, self.__tolerance, ccore_metric.get_pointer(), self.__data_type)
         
        else:
            changes = float('inf')
              
            stop_condition = self.__tolerance
              
            while changes > stop_condition:
                self.__clusters = self.__update_clusters()
                update_medoid_indexes = self.__update_medoids()
 
                changes = max([self.__distance_calculator(self.__medoid_indexes[index], update_medoid_indexes[index]) for index in range(len(update_medoid_indexes))])
 
                self.__medoid_indexes = update_medoid_indexes
 
        return self
 
     def get_clusters(self):
        """!
         @brief Returns list of allocated clusters, each cluster contains indexes of objects in list of data.
         
         @see process()
         @see get_medoids()
         
         """
         
        return self.__clusters
     
     
     def get_medoids(self):
        """!
         @brief Returns list of medoids of allocated clusters represented by indexes from the input data.
         
         @see process()
         @see get_clusters()
         
         """
 
        return self.__medoid_indexes
 
 
     def get_cluster_encoding(self):
         """!
         @brief Returns clustering result representation type that indicate how clusters are encoded.
         
         @return (type_encoding) Clustering result representation.
         
         @see get_clusters()
         
         """
         
         return type_encoding.CLUSTER_INDEX_LIST_SEPARATION
 
 
     def __create_distance_calculator(self):
        """!
        @brief Creates distance calculator in line with algorithms parameters.
 
        @return (callable) Distance calculator.
 
        """
        if self.__data_type == 'points':
            return lambda index1, index2: self.__metric(self.__pointer_data[index1], self.__pointer_data[index2])
        elif self.__data_type == 'distance_matrix':
             if isinstance(self.__pointer_data, numpy.matrix):
                return lambda index1, index2: self.__pointer_data.item((index1, index2))
 
             return lambda index1, index2: self.__pointer_data[index1][index2]
        else:
             raise TypeError("Unknown type of data is specified '%s'" % self.__data_type)
 
 
     def __update_clusters(self):
        """!
        @brief Calculate distance to each point from the each cluster. 
        @details Nearest points are captured by according clusters and as a result clusters are updated.
         
        @return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data.
         
        """
         
        clusters = [[self.__medoid_indexes[i]] for i in range(len(self.__medoid_indexes))]
        for index_point in range(len(self.__pointer_data)):
            if index_point in self.__medoid_indexes:
                continue
 
            index_optim = -1
            dist_optim = float('Inf')
             
            for index in range(len(self.__medoid_indexes)):
                dist = self.__distance_calculator(index_point, self.__medoid_indexes[index])
                 
                if dist < dist_optim:
                    index_optim = index
                    dist_optim = dist
             
            clusters[index_optim].append(index_point)
         
        return clusters
    
    
     def __update_medoids(self):
        """!
         @brief Find medoids of clusters in line with contained objects.
         
         @return (list) list of medoids for current number of clusters.
         
         """
        
        medoid_indexes = [-1] * len(self.__clusters)
         
        for index in range(len(self.__clusters)):
                medoid_index = median(self.__pointer_data, self.__clusters[index], metric=self.__metric, data_type=self.__data_type)
                medoid_indexes[index] = medoid_index
        return medoid_indexes

## Load Datasets

In [3]:
wb = load_workbook(filename='Data Pengelola2.xlsx')
sheetName = wb['Sheet1']
datasets = pd.DataFrame(sheetName.values)
# data.fillna(0, inplace=True)
datasets.columns = ['NAMA', 'KB1', 'KB2','KB3', 'KB4', 'KB5', 'KB6', 'SM1', 'SM2', 'SM3', 'SM4', 'SM5', 'SM6', 'MK1', 'MK2', 'MK3', 'MK4', 'MK5', 'MK6','MK7', 'MK8', 'MK9', 'MK10', 'MK11', 'MK12', 'MK13', 'MK14', 'MK15', 'MK16', 'MK17', 'PB1', 'PB2', 'PB3', 'PB4', 'PB5', 'PB6', 'PB7', 'PB8', 'PB9', 'PB10', 'PB11', 'PB12', 'PB13', 'PB14', 'PB15', 'PB16', 'PB17']
# print(np.nansum(data['MK7']))
# print(data['MK7'].count())
# print(round(np.nansum(data['MK7'])/data['MK7'].count()))
dataWithoutName = datasets.iloc[:, datasets.columns != 'NAMA']
for i in dataWithoutName:
    average = round(np.nansum(dataWithoutName[i])/dataWithoutName[i].count())
    dataWithoutName[i].fillna(average, inplace=True)
dataWithoutName

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._update_inplace(new_data)


Unnamed: 0,KB1,KB2,KB3,KB4,KB5,KB6,SM1,SM2,SM3,SM4,...,PB8,PB9,PB10,PB11,PB12,PB13,PB14,PB15,PB16,PB17
0,1,1,1,1,1,1,1,2,2,3,...,3,1,2,1,1,1,2,2,2,1
1,1,1,2,2,2,2,1,2,2,3,...,3,1,2,2,1,2,1,2,2,2
2,1,1,1,2,2,1,2,2,2,3,...,1,1,2,1,2,1,2,1,2,1
3,2,2,2,1,1,1,1,2,2,4,...,2,3,2,1,1,1,1,1,2,1
4,1,1,1,1,1,1,1,2,2,3,...,1,1,2,1,2,2,2,2,2,1
5,1,1,1,1,1,1,1,2,3,3,...,2,1,2,2,2,2,2,2,2,2
6,1,1,1,1,2,1,3,2,3,3,...,3,1,2,1,1,1,2,2,2,2
7,2,2,2,1,1,1,1,2,2,2,...,1,1,2,1,2,2,2,1,2,2
8,2,2,1,2,2,1,2,2,1,4,...,2,1,2,2,2,2,2,1,2,2
9,2,2,2,2,2,2,1,2,3,3,...,2,1,1,1,2,1,2,2,2,1


## Create List Dataset

In [4]:
datax = []
for i in dataWithoutName.values:
    datax.append(list(i))

## Get Distance With Euclidean

In [5]:
data = (pairwise_distances(datax, metric='euclidean'))
print(data)

[[0.         5.09901951 5.19615242 ... 4.         4.58257569 4.12310563]
 [5.09901951 0.         5.         ... 5.09901951 5.         5.        ]
 [5.19615242 5.         0.         ... 4.58257569 4.24264069 3.74165739]
 ...
 [4.         5.09901951 4.58257569 ... 0.         3.         3.87298335]
 [4.58257569 5.         4.24264069 ... 3.         0.         4.        ]
 [4.12310563 5.         3.74165739 ... 3.87298335 4.         0.        ]]


## Use Custom Library for Clustering

In [6]:
import random
k = 3
random.seed(10)
initial_medoids = [random.randint(1,10) for x in range(k)]
kmedoids_instance = kmedoids(data, initial_medoids)

In [7]:
kmedoids_instance.process()

<__main__.kmedoids at 0x52701d0>

In [8]:
clusters = kmedoids_instance.get_clusters()
clusters

[[19, 1, 7, 10, 45, 110, 128, 152],
 [126,
  0,
  2,
  3,
  4,
  5,
  6,
  8,
  9,
  11,
  12,
  30,
  48,
  50,
  57,
  58,
  59,
  64,
  70,
  73,
  89,
  94,
  95,
  109,
  111,
  113,
  116,
  118,
  120,
  123,
  127,
  132,
  137,
  140,
  142,
  151,
  154,
  161,
  163,
  164,
  165,
  170,
  171,
  173,
  174,
  175,
  177,
  178,
  189,
  191,
  195,
  201,
  205,
  206,
  208,
  209,
  210,
  212,
  217,
  218,
  220,
  222,
  226],
 [157,
  13,
  14,
  15,
  16,
  17,
  18,
  20,
  21,
  22,
  23,
  24,
  25,
  26,
  27,
  28,
  29,
  31,
  32,
  33,
  34,
  35,
  36,
  37,
  38,
  39,
  40,
  41,
  42,
  43,
  44,
  46,
  47,
  49,
  51,
  52,
  53,
  54,
  55,
  56,
  60,
  61,
  62,
  63,
  65,
  66,
  67,
  68,
  69,
  71,
  72,
  74,
  75,
  76,
  77,
  78,
  79,
  80,
  81,
  82,
  83,
  84,
  85,
  86,
  87,
  88,
  90,
  91,
  92,
  93,
  96,
  97,
  98,
  99,
  100,
  101,
  102,
  103,
  104,
  105,
  106,
  107,
  108,
  112,
  114,
  115,
  117,
  119,
  121,
  

In [9]:
for x in range(len(clusters)):
    print(x)

0
1
2


In [10]:
medoids = kmedoids_instance.get_medoids()
medoids

[19, 126, 157]

## Get Clustering Results

In [11]:
print('medoids:')
for point_idx in medoids:
    print(point_idx)
print('')
print('clustering result:')
for point_idx in clusters:
    print(point_idx)
print(medoids)

medoids:
19
126
157

clustering result:
[19, 1, 7, 10, 45, 110, 128, 152]
[126, 0, 2, 3, 4, 5, 6, 8, 9, 11, 12, 30, 48, 50, 57, 58, 59, 64, 70, 73, 89, 94, 95, 109, 111, 113, 116, 118, 120, 123, 127, 132, 137, 140, 142, 151, 154, 161, 163, 164, 165, 170, 171, 173, 174, 175, 177, 178, 189, 191, 195, 201, 205, 206, 208, 209, 210, 212, 217, 218, 220, 222, 226]
[157, 13, 14, 15, 16, 17, 18, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 49, 51, 52, 53, 54, 55, 56, 60, 61, 62, 63, 65, 66, 67, 68, 69, 71, 72, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 90, 91, 92, 93, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 112, 114, 115, 117, 119, 121, 122, 124, 125, 129, 130, 131, 133, 134, 135, 136, 138, 139, 141, 143, 144, 145, 146, 147, 148, 149, 150, 153, 155, 156, 158, 159, 160, 162, 166, 167, 168, 169, 172, 176, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 192, 193, 194, 196, 197, 198, 199, 200, 20

In [12]:
print(datasets['NAMA'][0])

BS Srikandi (RW 9), Kricak


In [13]:
dtab = []
totalClusterPerAnggota = []
for label, cluster in enumerate(clusters):
    a = []
    b = 0
    for point_idx in clusters[label]:
        print('label {0}:　{1}'.format(label, point_idx))
        b += sum(datax[point_idx])
        a.append(datasets['NAMA'][point_idx])
    dtab.append(a)
    totalClusterPerAnggota.append(b)
dtab

label 0:　19
label 0:　1
label 0:　7
label 0:　10
label 0:　45
label 0:　110
label 0:　128
label 0:　152
label 1:　126
label 1:　0
label 1:　2
label 1:　3
label 1:　4
label 1:　5
label 1:　6
label 1:　8
label 1:　9
label 1:　11
label 1:　12
label 1:　30
label 1:　48
label 1:　50
label 1:　57
label 1:　58
label 1:　59
label 1:　64
label 1:　70
label 1:　73
label 1:　89
label 1:　94
label 1:　95
label 1:　109
label 1:　111
label 1:　113
label 1:　116
label 1:　118
label 1:　120
label 1:　123
label 1:　127
label 1:　132
label 1:　137
label 1:　140
label 1:　142
label 1:　151
label 1:　154
label 1:　161
label 1:　163
label 1:　164
label 1:　165
label 1:　170
label 1:　171
label 1:　173
label 1:　174
label 1:　175
label 1:　177
label 1:　178
label 1:　189
label 1:　191
label 1:　195
label 1:　201
label 1:　205
label 1:　206
label 1:　208
label 1:　209
label 1:　210
label 1:　212
label 1:　217
label 1:　218
label 1:　220
label 1:　222
label 1:　226
label 2:　157
label 2:　13
label 2:　14
label 2:　15
label 2:　16
label 2:　17
label 2:　18
label 2:　20
label 2:　21
label

[['BS Jati Asri  (RW 6), Tegalrejo',
  'BS Guyub Rukun (RW 2), Kricak',
  'BS Cemara  (RW 2), Karangwaru',
  'BS Seroja  (RW 8), Karangwaru',
  'BS Melati  (RW 2), Gowongan',
  'BS Wirapeni (RW 9), Wirobrajan',
  'BS Pesinden (RW 2), Patehan',
  'BS Parangkusumo (RW 11), Keparakan'],
 ['BS Nagan Lor (RW 1), Patehan',
  'BS Srikandi (RW 9), Kricak',
  'BS Jatimulyo (RW 3), Kricak',
  'BS Menur  (RW 5), Karangwaru ',
  'BS Margo Agung  (RW 12), Karangwaru',
  'BS Gotong Royong  (RW 6), Karangwaru',
  'BS Blunyah Rejo  (RW 1), Karangwaru',
  'BS Hikmah  (RW 3), Karangwaru',
  'BS Omah Resik  (RW 10), Karangwaru',
  'BS Santosa  (RW 4), Karangwaru',
  ' BS Tegal Sejahtera (RW 3), Tegalrejo',
  'BS Mojo Resik  (RW 6), Bumijo',
  'BS Munggur (RW 3), Demangan',
  'BS Werkudoro (RW 5), Demangan',
  'BS Lagensari (RW 14), Klitren',
  'BS Balapan (RW 10), Klitren',
  'BS Turonggo (RW 3), Klitren',
  'BS Ledok Tukangan (RW 6), Tegalpanggung',
  'BS Gemah Ripah (RW 3), Bausasran',
  'BS Rono resik

In [14]:
pd.DataFrame(dtab[2])

Unnamed: 0,0
0,"BS Dados Arto (RW 3), Brontokusuman"
1,"BS Pilahan (RW 12), Tegalrejo"
2,"BS Godean (RW 1), Tegalrejo"
3,"BS Manunggal (RW 7), Tegalrejo"
4,"BS Sidomulyo (RW 8), Tegalrejo"
5,"BS Bolodewe (RW 2), Tegalrejo"
6,"BS Kencana Jaya (RW 9), Tegalrejo"
7,"BS Rapih Resik (RW 5), Tegalrejo"
8,"BS Harum Sari (RW 1), Bener"
9,"BS Bener Berseri (RW 8), Bener"


In [15]:
dt = []
for i in medoids:
    for j, cluster in enumerate(clusters):
        for point_idx in clusters[j]:
            if point_idx == i:
                print("{} {}".format(datasets['NAMA'][point_idx], datax[point_idx]))
                print(sum(datax[i]))
clusters

BS Jati Asri  (RW 6), Tegalrejo [2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]
78.0
BS Nagan Lor (RW 1), Patehan [2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0, 2.0, 3.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 1.0, 3.0, 3.0, 1.0, 1.0, 3.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0]
79.0
BS Dados Arto (RW 3), Brontokusuman [2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 1.0, 2.0, 1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 2.0]
75.0


[[19, 1, 7, 10, 45, 110, 128, 152],
 [126,
  0,
  2,
  3,
  4,
  5,
  6,
  8,
  9,
  11,
  12,
  30,
  48,
  50,
  57,
  58,
  59,
  64,
  70,
  73,
  89,
  94,
  95,
  109,
  111,
  113,
  116,
  118,
  120,
  123,
  127,
  132,
  137,
  140,
  142,
  151,
  154,
  161,
  163,
  164,
  165,
  170,
  171,
  173,
  174,
  175,
  177,
  178,
  189,
  191,
  195,
  201,
  205,
  206,
  208,
  209,
  210,
  212,
  217,
  218,
  220,
  222,
  226],
 [157,
  13,
  14,
  15,
  16,
  17,
  18,
  20,
  21,
  22,
  23,
  24,
  25,
  26,
  27,
  28,
  29,
  31,
  32,
  33,
  34,
  35,
  36,
  37,
  38,
  39,
  40,
  41,
  42,
  43,
  44,
  46,
  47,
  49,
  51,
  52,
  53,
  54,
  55,
  56,
  60,
  61,
  62,
  63,
  65,
  66,
  67,
  68,
  69,
  71,
  72,
  74,
  75,
  76,
  77,
  78,
  79,
  80,
  81,
  82,
  83,
  84,
  85,
  86,
  87,
  88,
  90,
  91,
  92,
  93,
  96,
  97,
  98,
  99,
  100,
  101,
  102,
  103,
  104,
  105,
  106,
  107,
  108,
  112,
  114,
  115,
  117,
  119,
  121,
  

In [16]:
dtab = []
for label, c in enumerate(clusters):
    a = []
    for point_idx in clusters[label]:
        print('label {0}:　{1}'.format(label, datasets['NAMA'][point_idx]))
        a.append(datasets['NAMA'][point_idx])
    dtab.append(a)
dtab
dftab = []
for i in dtab:
    a = pd.DataFrame(i)
    a.columns = ['NAMA']
    dftab.append(a)
dftab[0]

label 0:　BS Jati Asri  (RW 6), Tegalrejo
label 0:　BS Guyub Rukun (RW 2), Kricak
label 0:　BS Cemara  (RW 2), Karangwaru
label 0:　BS Seroja  (RW 8), Karangwaru
label 0:　BS Melati  (RW 2), Gowongan
label 0:　BS Wirapeni (RW 9), Wirobrajan
label 0:　BS Pesinden (RW 2), Patehan
label 0:　BS Parangkusumo (RW 11), Keparakan
label 1:　BS Nagan Lor (RW 1), Patehan
label 1:　BS Srikandi (RW 9), Kricak
label 1:　BS Jatimulyo (RW 3), Kricak
label 1:　BS Menur  (RW 5), Karangwaru 
label 1:　BS Margo Agung  (RW 12), Karangwaru
label 1:　BS Gotong Royong  (RW 6), Karangwaru
label 1:　BS Blunyah Rejo  (RW 1), Karangwaru
label 1:　BS Hikmah  (RW 3), Karangwaru
label 1:　BS Omah Resik  (RW 10), Karangwaru
label 1:　BS Santosa  (RW 4), Karangwaru
label 1:　 BS Tegal Sejahtera (RW 3), Tegalrejo
label 1:　BS Mojo Resik  (RW 6), Bumijo
label 1:　BS Munggur (RW 3), Demangan
label 1:　BS Werkudoro (RW 5), Demangan
label 1:　BS Lagensari (RW 14), Klitren
label 1:　BS Balapan (RW 10), Klitren
label 1:　BS Turonggo (RW 3), Klitren


Unnamed: 0,NAMA
0,"BS Jati Asri (RW 6), Tegalrejo"
1,"BS Guyub Rukun (RW 2), Kricak"
2,"BS Cemara (RW 2), Karangwaru"
3,"BS Seroja (RW 8), Karangwaru"
4,"BS Melati (RW 2), Gowongan"
5,"BS Wirapeni (RW 9), Wirobrajan"
6,"BS Pesinden (RW 2), Patehan"
7,"BS Parangkusumo (RW 11), Keparakan"


In [17]:
labels = []
X = []
for label, cluster in enumerate(clusters):
    for point_idx in clusters[label]:
        print('label {0}:　{1}'.format(label, datax[point_idx]))
        labels.append(label)
#         b += sum(datax[point_idx])
        X.append(datax[point_idx])

label 0:　[2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]
label 0:　[1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0, 3.0, 1.0, 3.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0, 2.0, 2.0, 3.0, 2.0, 2.0, 2.0, 2.0, 3.0, 1.0, 3.0, 1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0, 2.0, 2.0, 2.0]
label 0:　[2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0]
label 0:　[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 1.0, 2.0, 1.0, 2.0, 3.0, 2.0, 3.0, 2.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 1.0, 1.0, 1.0, 3.0, 3.0, 1.0, 1.0, 3.0, 1.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]
label 0:　[2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 

label 2:　[2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 1.0, 2.0, 1.0, 2.0, 3.0, 2.0, 3.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 1.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0]
label 2:　[2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 3.0, 2.0, 2.0, 3.0, 1.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 1.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0]
label 2:　[2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 2.0, 3.0, 1.0, 2.0, 1.0, 2.0, 3.0, 2.0, 3.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0]
label 2:　[2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 1.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 1.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0]
label 2:　[2.0, 2.0, 1.0, 2.0, 1.0, 1.0, 

In [18]:
print(labels)

[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]


In [19]:
print(X)

[[2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0], [1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0, 3.0, 1.0, 3.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0, 2.0, 2.0, 3.0, 2.0, 2.0, 2.0, 2.0, 3.0, 1.0, 3.0, 1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0, 2.0, 2.0, 2.0], [2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0], [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 1.0, 2.0, 1.0, 2.0, 3.0, 2.0, 3.0, 2.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 1.0, 1.0, 1.0, 3.0, 3.0, 1.0, 1.0, 3.0, 1.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0], [2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 

In [20]:
print("Silhouette Coefficient: %0.3f"
      % metrics.silhouette_score(X, labels, metric='euclidean'))

Silhouette Coefficient: 0.090
