## <center><a><span style="color:green">Real Datasets - `Iris`</span></a></center>

In [1]:
import warnings ; warnings.filterwarnings('ignore')
import numpy as np
from utils import measures_calculator
from dataloaders import Real_DataLoader as Real_DataLoader

### Load the Iris data

In [2]:
X_iris, Y_iris_true = Real_DataLoader(name='iris', path="../../datasets/real_datasets").load()
print(f'The total number of Clusters in the Iris Dataset is: {len(np.unique(Y_iris_true))}, and has {len(X_iris[0])} Features')

The total number of Clusters in the Iris Dataset is: 3, and has 4 Features


### <span style="color:red">GIT Clustring</span> <a class="anchor"></a>

In [3]:
from git_cluster import GIT

In [4]:
# Create an instance of the GIT clustering
git = GIT(k=15, target_ratio=[1, 1, 1])

# Apply the GIT algorithm to predict the clusters in the data
Y_iris_pred_git = git.fit_predict(X_iris)

# Calculate various clustering metrics to evaluate the performance
perf_metrics_git = measures_calculator(Y_iris_pred_git, Y_iris_true)
perf_metrics_git

Unnamed: 0,f1,ARI,ACC,NMI,cover_rate,classes
0,0.883153,0.706006,0.88,0.756087,1.0,3.0


### <span style="color:red">HDBSCAN</span> <a class="anchor"></a>

In [5]:
import hdbscan

In [6]:
# Create an instance of the HDBSCAN clustering
hdbscan = hdbscan.HDBSCAN(min_cluster_size=30, 
                            min_samples=20, 
                            gen_min_span_tree=True)

# Apply the HDBSCAN algorithm to predict the clusters in the data
hdbscan.fit(X_iris)

# Get the predicted Clusters
Y_iris_pred_hdbscan = hdbscan.labels_

# Calculate various clustering metrics to evaluate the performance
perf_metrics_hdbscan = measures_calculator(Y_iris_pred_hdbscan, Y_iris_true)
perf_metrics_hdbscan

Unnamed: 0,f1,ARI,ACC,NMI,cover_rate,classes
0,0.775507,0.562136,0.686667,0.713099,1.0,3.0


### <span style="color:red">Spectral Clustering</span> <a class="anchor"></a>

In [7]:
from sklearn.cluster import SpectralClustering

In [8]:
# Create an instance of the Spectral Clustering algorithm 
spectral_clustering = SpectralClustering(n_clusters=3, assign_labels="discretize",
                                         random_state=0, affinity='rbf')

# Apply the Spectral Clustering algorithm to predict the clusters in the data
spectral_clustering.fit(X_iris)

# Get the predicted Clusters
Y_iris_pred_speclustr = spectral_clustering.labels_

# Calculate various clustering metrics to evaluate the performance
perf_metrics_speclustr = measures_calculator(Y_iris_pred_speclustr, Y_iris_true)
perf_metrics_speclustr

Unnamed: 0,f1,ARI,ACC,NMI,cover_rate,classes
0,0.901225,0.743683,0.9,0.766036,1.0,3.0


### <span style="color:red">K-Means</span> <a class="anchor"></a>

In [9]:
from sklearn.cluster import KMeans

In [10]:
# Create an instance of the K-means algorithm 
kmeans = KMeans(n_clusters=3, random_state=0)

# Apply the K-means algorithm to predict the clusters in the data
kmeans.fit(X_iris)

# Get the predicted Clusters
Y_iris_pred_kmeans = kmeans.labels_

# Calculate various clustering metrics to evaluate the performance
perf_metrics_kmeans = measures_calculator(Y_iris_pred_kmeans, Y_iris_true)
perf_metrics_kmeans

Unnamed: 0,f1,ARI,ACC,NMI,cover_rate,classes
0,0.888055,0.716342,0.886667,0.741912,1.0,3.0


## <center><a><span style="color:green">Real Datasets - `Wine`</span></a></center>

### Load the Wine data

In [11]:
X_wine, Y_wine_true = Real_DataLoader(name='wine', path="../../datasets/real_datasets").load()
print(f'The total number of Clusters in the Wine Dataset is: {len(np.unique(Y_wine_true))}, and has {len(X_wine[0])} Features')

The total number of Clusters in the Wine Dataset is: 3, and has 13 Features


### <span style="color:red">GIT Clustring</span> <a class="anchor"></a>

In [12]:
from git_cluster import GIT

In [13]:
# Create an instance of the GIT clustering
git = GIT(k=20, target_ratio=[1, 1, 1])

# Apply the GIT algorithm to predict the clusters in the data
Y_wine_pred_git = git.fit_predict(X_wine)

# Calculate various clustering metrics to evaluate the performance
perf_metrics_git = measures_calculator(Y_wine_pred_git, Y_wine_true)
perf_metrics_git

Unnamed: 0,f1,ARI,ACC,NMI,cover_rate,classes
0,0.900735,0.713308,0.898876,0.75702,1.0,3.0


### <span style="color:red">HDBSCAN</span> <a class="anchor"></a>

In [14]:
import hdbscan

In [15]:
# Create an instance of the HDBSCAN clustering
hdbscan = hdbscan.HDBSCAN(min_cluster_size=20, min_samples=2, gen_min_span_tree=True)

# Apply the HDBSCAN algorithm to predict the clusters in the data
hdbscan.fit(X_wine)

# Get the predicted Clusters
Y_wine_pred_hdbscan = hdbscan.labels_

# Calculate various clustering metrics to evaluate the performance
perf_metrics_hdbscan = measures_calculator(Y_wine_pred_hdbscan, Y_wine_true)
perf_metrics_hdbscan

Unnamed: 0,f1,ARI,ACC,NMI,cover_rate,classes
0,0.678477,0.29075,0.606742,0.402635,1.0,3.0


### <span style="color:red">Spectral Clustering</span> <a class="anchor"></a>

In [16]:
from sklearn.cluster import SpectralClustering

In [17]:
# Create an instance of the Spectral Clustering algorithm 
spectral_clustering = SpectralClustering(n_clusters=3, assign_labels="discretize", 
                                         random_state=0, affinity='rbf')

# Apply the Spectral Clustering algorithm to predict the clusters in the data
spectral_clustering.fit(X_wine)

# Get the predicted Clusters
Y_wine_pred_speclustr = spectral_clustering.labels_

# Calculate various clustering metrics to evaluate the performance
perf_metrics_speclustr = measures_calculator(Y_wine_pred_speclustr, Y_wine_true)
perf_metrics_speclustr

Unnamed: 0,f1,ARI,ACC,NMI,cover_rate,classes
0,0.466706,0.01346,0.432584,0.014041,1.0,3.0


### <span style="color:red">K-Means</span> <a class="anchor"></a>

In [18]:
from sklearn.cluster import KMeans

In [19]:
# Create an instance of the K-means algorithm 
kmeans = KMeans(n_clusters=3, random_state=0)

# Apply the K-means algorithm to predict the clusters in the data
kmeans.fit(X_wine)

# Get the predicted Clusters
Y_wine_pred_kmeans = kmeans.labels_

# Calculate various clustering metrics to evaluate the performance
perf_metrics_kmeans = measures_calculator(Y_wine_pred_kmeans, Y_wine_true)
perf_metrics_kmeans

Unnamed: 0,f1,ARI,ACC,NMI,cover_rate,classes
0,0.689714,0.371114,0.702247,0.428757,1.0,3.0
