# <center>This `.ipynb` file contains the code for computing the internal cluster validation metrics</center>

### 1. Import the required libraries

In [1]:
from MODULE.ClustersFeatures import ClustersCharacteristics
import pandas as pd

### 2. Load latent space of all cluster sets

In [2]:
latent_10 = pd.read_csv('clusters_10/HHH10_C10_D10.csv', header=None)
latent_11 = pd.read_csv('clusters_11/HHH11_C11_D11.csv', header=None)
latent_12 = pd.read_csv('clusters_12/HHH12_C12_D12.csv', header=None)
latent_13 = pd.read_csv('clusters_13/HHH13_C13_D13.csv', header=None)
latent_14 = pd.read_csv('clusters_14/HHH14_C14_D14.csv', header=None)
latent_15 = pd.read_csv('clusters_15/HHH15_C15_D15.csv', header=None)
latent_16 = pd.read_csv('clusters_16/HHH16_C16_D16.csv', header=None)

### 3. Rename the label column

In [3]:
df10 = latent_10.loc[:, 0:16]
df10.rename(columns={16: 'class'}, inplace=True)
df11 = latent_11.loc[:, 0:16]
df11.rename(columns={16: 'class'}, inplace=True)
df12 = latent_12.loc[:, 0:16]
df12.rename(columns={16: 'class'}, inplace=True)
df13 = latent_13.loc[:, 0:16]
df13.rename(columns={16: 'class'}, inplace=True)
df14 = latent_14.loc[:, 0:16]
df14.rename(columns={16: 'class'}, inplace=True)
df15 = latent_15.loc[:, 0:16]
df15.rename(columns={16: 'class'}, inplace=True)
df16 = latent_16.loc[:, 0:16]
df16.rename(columns={16: 'class'}, inplace=True)

### 4. Specify the label column

In [4]:
cc10 = ClustersCharacteristics(df10, label_target='class')
cc11 = ClustersCharacteristics(df11, label_target='class')
cc12 = ClustersCharacteristics(df12, label_target='class')
cc13 = ClustersCharacteristics(df13, label_target='class')
cc14 = ClustersCharacteristics(df14, label_target='class')
cc15 = ClustersCharacteristics(df15, label_target='class')
cc16 = ClustersCharacteristics(df16, label_target='class')

### 5. Calculate different internal cluster validation metrics

In [6]:
print('Calinski-Harabasz index (10-clusters):', round(cc10.score_index_calinski_harabasz(), 5))
print('Calinski-Harabasz index (11-clusters):', round(cc11.score_index_calinski_harabasz(), 5))
print('Calinski-Harabasz index (12-clusters):', round(cc12.score_index_calinski_harabasz(), 5))
print('Calinski-Harabasz index (13-clusters):', round(cc13.score_index_calinski_harabasz(), 5))
print('Calinski-Harabasz index (14-clusters):', round(cc14.score_index_calinski_harabasz(), 5))
print('Calinski-Harabasz index (15-clusters):', round(cc15.score_index_calinski_harabasz(), 5))
print('Calinski-Harabasz index (16-clusters):', round(cc16.score_index_calinski_harabasz(), 5))

Calinski-Harabasz index (10-clusters): 2142.54026
Calinski-Harabasz index (11-clusters): 2866.86433
Calinski-Harabasz index (12-clusters): 1865.81774
Calinski-Harabasz index (13-clusters): 2836.2272
Calinski-Harabasz index (14-clusters): 3367.74211
Calinski-Harabasz index (15-clusters): 2027.82944
Calinski-Harabasz index (16-clusters): 2603.28507


### Higher value of `Calinski-Harabasz index` indicates better clustering

In [7]:
print('C index (10-clusters):', round(cc10.score_index_c(), 5))
print('C index (11-clusters):', round(cc11.score_index_c(), 5))
print('C index (12-clusters):', round(cc12.score_index_c(), 5))
print('C index (13-clusters):', round(cc13.score_index_c(), 5))
print('C index (14-clusters):', round(cc14.score_index_c(), 5))
print('C index (15-clusters):', round(cc15.score_index_c(), 5))
print('C index (16-clusters):', round(cc16.score_index_c(), 5))

C index (10-clusters): 0.12234
C index (11-clusters): 0.10477
C index (12-clusters): 0.12042
C index (13-clusters): 0.09131
C index (14-clusters): 0.07645
C index (15-clusters): 0.10489
C index (16-clusters): 0.08615


### Lower value of `C index` indicates better clustering.

In [8]:
print('Dunn index (10-clusters):', round(cc10.score_index_dunn(), 5))
print('Dunn index (11-clusters):', round(cc11.score_index_dunn(), 5))
print('Dunn index (12-clusters):', round(cc12.score_index_dunn(), 5))
print('Dunn index (13-clusters):', round(cc13.score_index_dunn(), 5))
print('Dunn index (14-clusters):', round(cc14.score_index_dunn(), 5))
print('Dunn index (15-clusters):', round(cc15.score_index_dunn(), 5))
print('Dunn index (16-clusters):', round(cc16.score_index_dunn(), 5))

Dunn index (10-clusters): 0.00441
Dunn index (11-clusters): 0.00477
Dunn index (12-clusters): 0.00463
Dunn index (13-clusters): 0.00424
Dunn index (14-clusters): 0.00548
Dunn index (15-clusters): 0.00424
Dunn index (16-clusters): 0.0049


### Higher value of `Dunn index` indicates better clustering.

In [9]:
print('Hartigan index (10-clusters):', round(cc10.score_index_log_ss_ratio(), 5))
print('Hartigan index (11-clusters):', round(cc11.score_index_log_ss_ratio(), 5))
print('Hartigan index (12-clusters):', round(cc12.score_index_log_ss_ratio(), 5))
print('Hartigan index (13-clusters):', round(cc13.score_index_log_ss_ratio(), 5))
print('Hartigan index (14-clusters):', round(cc14.score_index_log_ss_ratio(), 5))
print('Hartigan index (15-clusters):', round(cc15.score_index_log_ss_ratio(), 5))
print('Hartigan index (16-clusters):', round(cc16.score_index_log_ss_ratio(), 5))

Hartigan index (10-clusters): 0.65763
Hartigan index (11-clusters): 1.05432
Hartigan index (12-clusters): 0.72021
Hartigan index (13-clusters): 1.2261
Hartigan index (14-clusters): 1.47801
Hartigan index (15-clusters): 1.04494
Hartigan index (16-clusters): 1.36384


### Higher value of `Hartigan index` indicates better clustering.

In [10]:
print('Mclain-Rao index (10-clusters):', round(cc10.score_index_mclain_rao(), 5))
print('Mclain-Rao index (11-clusters):', round(cc11.score_index_mclain_rao(), 5))
print('Mclain-Rao index (12-clusters):', round(cc12.score_index_mclain_rao(), 5))
print('Mclain-Rao index (13-clusters):', round(cc13.score_index_mclain_rao(), 5))
print('Mclain-Rao index (14-clusters):', round(cc14.score_index_mclain_rao(), 5))
print('Mclain-Rao index (15-clusters):', round(cc15.score_index_mclain_rao(), 5))
print('Mclain-Rao index (16-clusters):', round(cc16.score_index_mclain_rao(), 5))

Mclain-Rao index (10-clusters): 0.54631
Mclain-Rao index (11-clusters): 0.49791
Mclain-Rao index (12-clusters): 0.55443
Mclain-Rao index (13-clusters): 0.46335
Mclain-Rao index (14-clusters): 0.41643
Mclain-Rao index (15-clusters): 0.51414
Mclain-Rao index (16-clusters): 0.45223


### Lower value of `Mclain-Rao index` indicates better clustering.