## Final Project: Phase 3 - Clustering Pipeline Optimization
Spring 2024  
Group: Michael Massone and Joseph Nelson Farrell   
DS 5230 Unsupervised Machine Learning  
Professor Steven Morin, PhD  
Due: 04/21/2024  
___

In [33]:
## Libraries

# base
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import random


# preprocessing
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler

# data
from sklearn.datasets import load_digits

# clustering
from sklearn.cluster import DBSCAN, KMeans

#external indices
from sklearn.metrics.cluster import adjusted_rand_score, contingency_matrix

# runtime and run tracking
import time
from datetime import datetime

# pathing
from pathlib import Path
import os
import sys

import warnings

# Disable runtime warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)



___

### Start Timer

In [34]:
# track runtime
start = time.time()

___

### Define File Paths

In [35]:
# define path
nb_path = Path(os.getcwd())
print(nb_path)
path = str(nb_path.parent)
print(path)

# path to figs folder
figs_path = path + '/figs'

# path to data
data_path= path + '/data'

# path to src folder
src_path = path + '/src'
print(src_path)

# sys path
sys.path.append(src_path)

/Users/mikey/LocalFiles/DS5230/final_project/DS5230-final/notebooks
/Users/mikey/LocalFiles/DS5230/final_project/DS5230-final
/Users/mikey/LocalFiles/DS5230/final_project/DS5230-final/src


____

### Import Modules

In [36]:
# modules and util
import dimensionality_reduction as dr
import clustering as cl
from cluster_utils import *
from external_indices_utils import *

___

### Parameters

In [37]:
# transformed data csv file name
design_file = '/curated/shuffled_target.csv'

# target vector csv file name
target_file = "/curated/beans_target.csv"

# sampled target dataframe csv loc
target_vector_shuffled_filename = f'/sampled/sampled_target{str(datetime.now()).replace(" ", "")}.csv'

# sampled design dataframe csv loc
design_matrix_shuffled_filename = f'/sampled/sampled_design{str(datetime.now()).replace(" ", "")}.csv'

# all results dataframe csv loc
all_results_file = f'/results/all_results_{str(datetime.now()).replace(" ", "")}.csv'

# filtered results dataframe csv loc
filtered_results_file = f'/results/filtered_results_{str(datetime.now()).replace(" ", "")}.csv'


____

### Load Data

In [38]:
design_file = '/curated/shuffled_target.csv'
target_vector_shuffled = pd.read_csv(data_path + design_file)

target_file = '/curated/shuffled_design.csv'
design_matrix_shuffled = pd.read_csv(data_path + target_file)


In [39]:
design_matrix_shuffled = design_matrix_shuffled.loc[:, :'numerical__ShapeFactor1']

____

### Sample Data

In [40]:
sample = True
if sample:
    # num samples
    n = 1000

    # random seed
    rand_seed = 42

    # sample target and save copy to csv
    target_vector_shuffled = target_vector_shuffled.sample(n=n, random_state=rand_seed)
    target_vector_shuffled.to_csv(data_path + target_vector_shuffled_filename, index=False)
    print("Sampled target dataframe saved at:", data_path + target_vector_shuffled_filename)


     # sample target and save copy to csv
    design_matrix_shuffled = design_matrix_shuffled.sample(n=n, random_state=rand_seed)
    design_matrix_shuffled.to_csv(data_path + design_matrix_shuffled_filename, index=False)
    print("Sampled design dataframe saved at:", data_path + design_matrix_shuffled_filename)


Sampled target dataframe saved at: /Users/mikey/LocalFiles/DS5230/final_project/DS5230-final/data/sampled/sampled_target2024-04-1714:29:13.525291.csv
Sampled design dataframe saved at: /Users/mikey/LocalFiles/DS5230/final_project/DS5230-final/data/sampled/sampled_design2024-04-1714:29:13.525316.csv


___

### Create capX

In [41]:
# drop ID col
design_matrix_shuffled_noID = design_matrix_shuffled.drop('ID', axis=1)

# convert to ndarray
cap_x = design_matrix_shuffled_noID.to_numpy()
cap_x = design_matrix_shuffled
print(f'cap_x shape: {cap_x.shape}')
cap_x.shape

cap_x shape: (1000, 14)


(1000, 14)

___

### Select Hyperparameters for Gridsearch

In [42]:
# select hyperparams
params1=False
params2=False
params3=False
params4=True

if params1:

    min_dist_list = [0.0, 0.01, 0.1, 0.25, 0.5]
    n_neighbors_list = [10, 25, 50, 100, 200]
    metric_list =['euclidean', 'canberra', 'mahalanobis', 'correlation']
    n_components_list = range(2, 4)


if params2:

    min_dist_list = [0.5]
    n_neighbors_list = [100]
    metric_list = ['euclidean']
    n_components_list = [3]


if params3:

    min_dist_list = [0.0, 0.1, 0.5]
    n_neighbors_list = [10, 25, 100, 200]
    metric_list =['euclidean', 'canberra', 'chebyshev', 'correlation']
    n_components_list = range(2, 4)


if params4:

    min_dist_list = [0.0, 0.1, 0.5]
    n_neighbors_list = [10, 50, 100, 200, 500]
    metric_list =['correlation', 'euclidean']
    n_components_list = range(2, 6)




___

### Gridsearch Algorithm

In [43]:
runs = len(n_components_list)*len(min_dist_list)*len(n_neighbors_list)*len(metric_list)
run = 0
df_row_dict_list = []
for n_components in n_components_list:
  for min_dist in min_dist_list:
    for n_neighbors in n_neighbors_list: 
      for metric in metric_list:
        run +=1
        print('*'*100)
        print(f'Run {run} of {runs}')
        results_dict = dr.umap_dim_red(cap_x, n_neighbors, min_dist, metric, n_components) 
        df_row_dict = cl.clustering(results_dict)
        df_row_dict_list.append(df_row_dict)
results_df = pd.DataFrame(df_row_dict_list)


****************************************************************************************************
Run 1 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  10
min_dist:  0.0
metric:  correlation
n_components:  2
Hopkin's Statistic = 0.1213009703897284
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  6
Validity Index:  0.8399025655250236
****************************************************************************************************
Run 2 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  10
min_dist:  0.0
metric:  cosine
n_components:  2
Hopkin's Statistic = 0.082702563243827
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  2
Validity Index:  0.9387943607939297
****************************************************************************************************
Run 3 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  25
min_dist:  0.0
metric:  correlation
n_components:  2
Hopkin's Statistic = 0.12876424012542326
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  3
Validity Index:  0.856663280002953
****************************************************************************************************
Run 4 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  25
min_dist:  0.0
metric:  cosine
n_components:  2
Hopkin's Statistic = 0.07210219918826391
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  3
Validity Index:  0.999546438502425
****************************************************************************************************
Run 5 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  50
min_dist:  0.0
metric:  correlation
n_components:  2
Hopkin's Statistic = 0.12002286048143254
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  5
Validity Index:  0.8289461942523921
****************************************************************************************************
Run 6 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  50
min_dist:  0.0
metric:  cosine
n_components:  2
Hopkin's Statistic = 0.04762904447611593
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  5
Validity Index:  0.969590153944985
****************************************************************************************************
Run 7 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  100
min_dist:  0.0
metric:  correlation
n_components:  2
Hopkin's Statistic = 0.14252363449893918
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  5
Validity Index:  0.8499164243572515
****************************************************************************************************
Run 8 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  100
min_dist:  0.0
metric:  cosine
n_components:  2
Hopkin's Statistic = 0.06475633000119725
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  3
Validity Index:  0.9735180959222254
****************************************************************************************************
Run 9 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  200
min_dist:  0.0
metric:  correlation
n_components:  2
Hopkin's Statistic = 0.11809524554229184
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  3
Validity Index:  0.9323997842745015
****************************************************************************************************
Run 10 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  200
min_dist:  0.0
metric:  cosine
n_components:  2
Hopkin's Statistic = 0.07794544963225969
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation maximum which has no identity
DBSCAN
Number of Clusters:  2
Validity Index:  0.986645068501332
****************************************************************************************************
Run 11 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  10
min_dist:  0.01
metric:  correlation
n_components:  2
Hopkin's Statistic = 0.1221556177240391
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  7
Validity Index:  0.7627147429928359
****************************************************************************************************
Run 12 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  10
min_dist:  0.01
metric:  cosine
n_components:  2
Hopkin's Statistic = 0.09408962822162381
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  7
Validity Index:  0.8811320626005964
****************************************************************************************************
Run 13 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  25
min_dist:  0.01
metric:  correlation
n_components:  2
Hopkin's Statistic = 0.13109282254456697
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  5
Validity Index:  0.8405256090040403
****************************************************************************************************
Run 14 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  25
min_dist:  0.01
metric:  cosine
n_components:  2
Hopkin's Statistic = 0.08244785302575122
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  3
Validity Index:  0.9844153877936607
****************************************************************************************************
Run 15 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  50
min_dist:  0.01
metric:  correlation
n_components:  2
Hopkin's Statistic = 0.1258688779890924
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  5
Validity Index:  0.8364886875709189
****************************************************************************************************
Run 16 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  50
min_dist:  0.01
metric:  cosine
n_components:  2
Hopkin's Statistic = 0.04127052736555513
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  5
Validity Index:  0.9787205419186511
****************************************************************************************************
Run 17 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  100
min_dist:  0.01
metric:  correlation
n_components:  2
Hopkin's Statistic = 0.1337448074587192
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  5
Validity Index:  0.869799474523443
****************************************************************************************************
Run 18 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  100
min_dist:  0.01
metric:  cosine
n_components:  2
Hopkin's Statistic = 0.05338087881238733
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.9879443536993064
****************************************************************************************************
Run 19 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  200
min_dist:  0.01
metric:  correlation
n_components:  2
Hopkin's Statistic = 0.13793456790468853
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  2
Validity Index:  0.8899178927963383
****************************************************************************************************
Run 20 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  200
min_dist:  0.01
metric:  cosine
n_components:  2
Hopkin's Statistic = 0.09148135846383545
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  5
Validity Index:  0.9532881452407976
****************************************************************************************************
Run 21 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  10
min_dist:  0.1
metric:  correlation
n_components:  2
Hopkin's Statistic = 0.16513583756161945
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  5
Validity Index:  0.813250170727845
****************************************************************************************************
Run 22 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  10
min_dist:  0.1
metric:  cosine
n_components:  2
Hopkin's Statistic = 0.1507034397827657
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  2
Validity Index:  0.9886129445415
****************************************************************************************************
Run 23 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  25
min_dist:  0.1
metric:  correlation
n_components:  2
Hopkin's Statistic = 0.16341464153554916
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  5
Validity Index:  0.7799466861363493
****************************************************************************************************
Run 24 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  25
min_dist:  0.1
metric:  cosine
n_components:  2
Hopkin's Statistic = 0.11630614253723065
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  3
Validity Index:  0.9864989369571883
****************************************************************************************************
Run 25 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  50
min_dist:  0.1
metric:  correlation
n_components:  2
Hopkin's Statistic = 0.1829551936902847
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.8576303994618942
****************************************************************************************************
Run 26 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  50
min_dist:  0.1
metric:  cosine
n_components:  2
Hopkin's Statistic = 0.06374413922622739
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  5
Validity Index:  0.9436124131603573
****************************************************************************************************
Run 27 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  100
min_dist:  0.1
metric:  correlation
n_components:  2
Hopkin's Statistic = 0.17283626734946
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  2
Validity Index:  0.9046236761022888
****************************************************************************************************
Run 28 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  100
min_dist:  0.1
metric:  cosine
n_components:  2
Hopkin's Statistic = 0.08078070182496244
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  2
Validity Index:  0.9133354018199243
****************************************************************************************************
Run 29 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  200
min_dist:  0.1
metric:  correlation
n_components:  2
Hopkin's Statistic = 0.17226885723642915
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  2
Validity Index:  0.909069660314294
****************************************************************************************************
Run 30 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  200
min_dist:  0.1
metric:  cosine
n_components:  2
Hopkin's Statistic = 0.05120060664484874
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  5
Validity Index:  0.9650271277522586
****************************************************************************************************
Run 31 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  10
min_dist:  0.5
metric:  correlation
n_components:  2
Hopkin's Statistic = 0.30959389917676233
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.5985736467027698
****************************************************************************************************
Run 32 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  10
min_dist:  0.5
metric:  cosine
n_components:  2
Hopkin's Statistic = 0.2281647607090965
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.7832292751951277
****************************************************************************************************
Run 33 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  25
min_dist:  0.5
metric:  correlation
n_components:  2
Hopkin's Statistic = 0.2805800781710057
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation maximum which has no identity
DBSCAN
Number of Clusters:  2
Validity Index:  0.9942344507359808
****************************************************************************************************
Run 34 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  25
min_dist:  0.5
metric:  cosine
n_components:  2
Hopkin's Statistic = 0.1670626250124928
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  2
Validity Index:  0.9575982699956438
****************************************************************************************************
Run 35 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  50
min_dist:  0.5
metric:  correlation
n_components:  2
Hopkin's Statistic = 0.29119949859971983
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.5525479226873471
****************************************************************************************************
Run 36 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  50
min_dist:  0.5
metric:  cosine
n_components:  2
Hopkin's Statistic = 0.1234561659643633
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  5
Validity Index:  0.8505705905293064
****************************************************************************************************
Run 37 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  100
min_dist:  0.5
metric:  correlation
n_components:  2
Hopkin's Statistic = 0.3306768504589631
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.4393546034835559
****************************************************************************************************
Run 38 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  100
min_dist:  0.5
metric:  cosine
n_components:  2
Hopkin's Statistic = 0.09355211492284995
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  5
Validity Index:  0.9648417910738425
****************************************************************************************************
Run 39 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  200
min_dist:  0.5
metric:  correlation
n_components:  2
Hopkin's Statistic = 0.29357859479003645
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  5
Validity Index:  0.5263204495629541
****************************************************************************************************
Run 40 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  200
min_dist:  0.5
metric:  cosine
n_components:  2
Hopkin's Statistic = 0.07745070667463762
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  2
Validity Index:  0.9971527449045683
****************************************************************************************************
Run 41 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  10
min_dist:  0.0
metric:  correlation
n_components:  3
Hopkin's Statistic = 0.05789833507327951
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  7
Validity Index:  0.8340334002020736
****************************************************************************************************
Run 42 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  10
min_dist:  0.0
metric:  cosine
n_components:  3
Hopkin's Statistic = 0.04204574786072489
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  7
Validity Index:  0.9047418242686658
****************************************************************************************************
Run 43 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  25
min_dist:  0.0
metric:  correlation
n_components:  3
Hopkin's Statistic = 0.06272920589498968
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  3
Validity Index:  0.9997174236299935
****************************************************************************************************
Run 44 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  25
min_dist:  0.0
metric:  cosine
n_components:  3
Hopkin's Statistic = 0.028256154156505536
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.9987557995077854
****************************************************************************************************
Run 45 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  50
min_dist:  0.0
metric:  correlation
n_components:  3
Hopkin's Statistic = 0.07276502574178102
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  3
Validity Index:  0.9997245328450204
****************************************************************************************************
Run 46 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  50
min_dist:  0.0
metric:  cosine
n_components:  3
Hopkin's Statistic = 0.03891001348028234
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  5
Validity Index:  0.9737500564713674
****************************************************************************************************
Run 47 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  100
min_dist:  0.0
metric:  correlation
n_components:  3
Hopkin's Statistic = 0.06783293234396341
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  3
Validity Index:  0.9932780225810219
****************************************************************************************************
Run 48 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  100
min_dist:  0.0
metric:  cosine
n_components:  3
Hopkin's Statistic = 0.03129599066055156
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  2
Validity Index:  0.9990894199306748
****************************************************************************************************
Run 49 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  200
min_dist:  0.0
metric:  correlation
n_components:  3
Hopkin's Statistic = 0.08001341009634506
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  3
Validity Index:  0.9936020449629822
****************************************************************************************************
Run 50 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  200
min_dist:  0.0
metric:  cosine
n_components:  3
Hopkin's Statistic = 0.046226719454712725
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  5
Validity Index:  0.9419937067725681
****************************************************************************************************
Run 51 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  10
min_dist:  0.01
metric:  correlation
n_components:  3
Hopkin's Statistic = 0.05997222011847965
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  6
Validity Index:  0.8121042310291864
****************************************************************************************************
Run 52 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  10
min_dist:  0.01
metric:  cosine
n_components:  3
Hopkin's Statistic = 0.04822083536990667
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation minimum which has no identity
DBSCAN
Number of Clusters:  4
Validity Index:  0.8729173062633555
****************************************************************************************************
Run 53 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  25
min_dist:  0.01
metric:  correlation
n_components:  3
Hopkin's Statistic = 0.0641933834686722
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  3
Validity Index:  0.9851686938094706
****************************************************************************************************
Run 54 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  25
min_dist:  0.01
metric:  cosine
n_components:  3
Hopkin's Statistic = 0.035778836789144404
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation minimum which has no identity
DBSCAN
Number of Clusters:  2
Validity Index:  0.9955030669454905
****************************************************************************************************
Run 55 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  50
min_dist:  0.01
metric:  correlation
n_components:  3
Hopkin's Statistic = 0.07305165214065841
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation minimum which has no identity
DBSCAN
Number of Clusters:  3
Validity Index:  0.9998557144418142
****************************************************************************************************
Run 56 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  50
min_dist:  0.01
metric:  cosine
n_components:  3
Hopkin's Statistic = 0.029356320660585723
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  2
Validity Index:  0.9792217256631823
****************************************************************************************************
Run 57 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  100
min_dist:  0.01
metric:  correlation
n_components:  3
Hopkin's Statistic = 0.0738659634438219
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation minimum which has no identity
DBSCAN
Number of Clusters:  3
Validity Index:  0.9999362780813992
****************************************************************************************************
Run 58 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  100
min_dist:  0.01
metric:  cosine
n_components:  3
Hopkin's Statistic = 0.054628083384324726
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  2
Validity Index:  0.9885875367006269
****************************************************************************************************
Run 59 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  200
min_dist:  0.01
metric:  correlation
n_components:  3
Hopkin's Statistic = 0.08380377153515424
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  3
Validity Index:  0.999144832818533
****************************************************************************************************
Run 60 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  200
min_dist:  0.01
metric:  cosine
n_components:  3
Hopkin's Statistic = 0.024703723474261474
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  2
Validity Index:  0.9959325159758127
****************************************************************************************************
Run 61 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  10
min_dist:  0.1
metric:  correlation
n_components:  3
Hopkin's Statistic = 0.09345948527251098
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation maximum which has no identity
zero-size array to reduction operation maximum which has no identity
DBSCAN
Number of Clusters:  6
Validity Index:  0.7833543064362526
****************************************************************************************************
Run 62 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  10
min_dist:  0.1
metric:  cosine
n_components:  3
Hopkin's Statistic = 0.06634481350020274
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  7
Validity Index:  0.8334352363533292
****************************************************************************************************
Run 63 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  25
min_dist:  0.1
metric:  correlation
n_components:  3
Hopkin's Statistic = 0.08651933033846393
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation maximum which has no identity
zero-size array to reduction operation maximum which has no identity
DBSCAN
Number of Clusters:  3
Validity Index:  0.9994851394124887
****************************************************************************************************
Run 64 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  25
min_dist:  0.1
metric:  cosine
n_components:  3
Hopkin's Statistic = 0.05527741676302644
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  3
Validity Index:  0.991938819833748
****************************************************************************************************
Run 65 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  50
min_dist:  0.1
metric:  correlation
n_components:  3
Hopkin's Statistic = 0.09754801615754372
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation minimum which has no identity
zero-size array to reduction operation minimum which has no identity
DBSCAN
Number of Clusters:  3
Validity Index:  0.9994290476214227
****************************************************************************************************
Run 66 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  50
min_dist:  0.1
metric:  cosine
n_components:  3
Hopkin's Statistic = 0.04760727258311489
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  5
Validity Index:  0.9449164472739533
****************************************************************************************************
Run 67 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  100
min_dist:  0.1
metric:  correlation
n_components:  3
Hopkin's Statistic = 0.1115873235104114
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation minimum which has no identity
zero-size array to reduction operation minimum which has no identity
DBSCAN
Number of Clusters:  3
Validity Index:  0.9978793333204408
****************************************************************************************************
Run 68 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  100
min_dist:  0.1
metric:  cosine
n_components:  3
Hopkin's Statistic = 0.08999840001422676
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  5
Validity Index:  0.8749488924347064
****************************************************************************************************
Run 69 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  200
min_dist:  0.1
metric:  correlation
n_components:  3
Hopkin's Statistic = 0.10672905784941857
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  3
Validity Index:  0.9998583516424241
****************************************************************************************************
Run 70 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  200
min_dist:  0.1
metric:  cosine
n_components:  3
Hopkin's Statistic = 0.04086690555816956
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  5
Validity Index:  0.9565308214077245
****************************************************************************************************
Run 71 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  10
min_dist:  0.5
metric:  correlation
n_components:  3
Hopkin's Statistic = 0.19649020558545208
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  2
Validity Index:  0.9945789228789659
****************************************************************************************************
Run 72 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  10
min_dist:  0.5
metric:  cosine
n_components:  3
Hopkin's Statistic = 0.15165323707132852
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  2
Validity Index:  0.9409874633290106
****************************************************************************************************
Run 73 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  25
min_dist:  0.5
metric:  correlation
n_components:  3
Hopkin's Statistic = 0.20838586467118186
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.5190873471030905
****************************************************************************************************
Run 74 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  25
min_dist:  0.5
metric:  cosine
n_components:  3
Hopkin's Statistic = 0.08900616209015828
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.8909846150716336
****************************************************************************************************
Run 75 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  50
min_dist:  0.5
metric:  correlation
n_components:  3
Hopkin's Statistic = 0.21984575088838126
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation minimum which has no identity
zero-size array to reduction operation minimum which has no identity
DBSCAN
Number of Clusters:  2
Validity Index:  0.9755627306733214
****************************************************************************************************
Run 76 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  50
min_dist:  0.5
metric:  cosine
n_components:  3
Hopkin's Statistic = 0.08165600081207564
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  2
Validity Index:  0.9912915854404073
****************************************************************************************************
Run 77 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  100
min_dist:  0.5
metric:  correlation
n_components:  3
Hopkin's Statistic = 0.27107281741342376
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation minimum which has no identity
DBSCAN
Number of Clusters:  4
Validity Index:  0.6706342354294329
****************************************************************************************************
Run 78 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  100
min_dist:  0.5
metric:  cosine
n_components:  3
Hopkin's Statistic = 0.1007726035931025
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation minimum which has no identity
DBSCAN
Number of Clusters:  4
Validity Index:  0.9406127960216789
****************************************************************************************************
Run 79 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  200
min_dist:  0.5
metric:  correlation
n_components:  3
Hopkin's Statistic = 0.24050267856702753
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation maximum which has no identity
DBSCAN
Number of Clusters:  4
Validity Index:  0.5059801217771319
****************************************************************************************************
Run 80 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  200
min_dist:  0.5
metric:  cosine
n_components:  3
Hopkin's Statistic = 0.07461919796309933
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.965617566588981
****************************************************************************************************
Run 81 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  10
min_dist:  0.0
metric:  correlation
n_components:  4
Hopkin's Statistic = 0.03601888520958996
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation maximum which has no identity
DBSCAN
Number of Clusters:  4
Validity Index:  0.9398159044040085
****************************************************************************************************
Run 82 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  10
min_dist:  0.0
metric:  cosine
n_components:  4
Hopkin's Statistic = 0.02935706123115888
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  7
Validity Index:  0.9034472868267
****************************************************************************************************
Run 83 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  25
min_dist:  0.0
metric:  correlation
n_components:  4
Hopkin's Statistic = 0.04312188503188935
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.9980031235274733
****************************************************************************************************
Run 84 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  25
min_dist:  0.0
metric:  cosine
n_components:  4
Hopkin's Statistic = 0.026404029868453322
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.9380652718050279
****************************************************************************************************
Run 85 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  50
min_dist:  0.0
metric:  correlation
n_components:  4
Hopkin's Statistic = 0.04163110877135523
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.9990171340699479
****************************************************************************************************
Run 86 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  50
min_dist:  0.0
metric:  cosine
n_components:  4
Hopkin's Statistic = 0.0395887790631245
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  5
Validity Index:  0.9454420832154131
****************************************************************************************************
Run 87 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  100
min_dist:  0.0
metric:  correlation
n_components:  4
Hopkin's Statistic = 0.04524669304303934
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.9991097803568745
****************************************************************************************************
Run 88 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  100
min_dist:  0.0
metric:  cosine
n_components:  4
Hopkin's Statistic = 0.02906959946755523
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  3
Validity Index:  0.9931972440075587
****************************************************************************************************
Run 89 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  200
min_dist:  0.0
metric:  correlation
n_components:  4
Hopkin's Statistic = 0.05883764187080357
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  3
Validity Index:  0.9960293119976824
****************************************************************************************************
Run 90 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  200
min_dist:  0.0
metric:  cosine
n_components:  4
Hopkin's Statistic = 0.032446270435749686
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  5
Validity Index:  0.924844710635641
****************************************************************************************************
Run 91 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  10
min_dist:  0.01
metric:  correlation
n_components:  4
Hopkin's Statistic = 0.04131807121716163
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.9576835110983695
****************************************************************************************************
Run 92 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  10
min_dist:  0.01
metric:  cosine
n_components:  4
Hopkin's Statistic = 0.033500946450447516
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  7
Validity Index:  0.8930624767085484
****************************************************************************************************
Run 93 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  25
min_dist:  0.01
metric:  correlation
n_components:  4
Hopkin's Statistic = 0.04979604036770423
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.9986839191971227
****************************************************************************************************
Run 94 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  25
min_dist:  0.01
metric:  cosine
n_components:  4
Hopkin's Statistic = 0.04060230923254822
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  6
Validity Index:  0.9176379132377301
****************************************************************************************************
Run 95 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  50
min_dist:  0.01
metric:  correlation
n_components:  4
Hopkin's Statistic = 0.04181286630604272
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.9964087575999204
****************************************************************************************************
Run 96 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  50
min_dist:  0.01
metric:  cosine
n_components:  4
Hopkin's Statistic = 0.03292493324819573
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  5
Validity Index:  0.9688455235206149
****************************************************************************************************
Run 97 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  100
min_dist:  0.01
metric:  correlation
n_components:  4
Hopkin's Statistic = 0.05283742578747677
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  3
Validity Index:  0.8472959071331305
****************************************************************************************************
Run 98 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  100
min_dist:  0.01
metric:  cosine
n_components:  4
Hopkin's Statistic = 0.03891400079340862
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation minimum which has no identity
DBSCAN
Number of Clusters:  5
Validity Index:  0.964625208801452
****************************************************************************************************
Run 99 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  200
min_dist:  0.01
metric:  correlation
n_components:  4
Hopkin's Statistic = 0.058913568087207456
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation minimum which has no identity
zero-size array to reduction operation minimum which has no identity
DBSCAN
Number of Clusters:  3
Validity Index:  0.9911924090966846
****************************************************************************************************
Run 100 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  200
min_dist:  0.01
metric:  cosine
n_components:  4
Hopkin's Statistic = 0.04424377444761762
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.9811006222701643
****************************************************************************************************
Run 101 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  10
min_dist:  0.1
metric:  correlation
n_components:  4
Hopkin's Statistic = 0.06506430982029097
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  5
Validity Index:  0.8547167876185864
****************************************************************************************************
Run 102 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  10
min_dist:  0.1
metric:  cosine
n_components:  4
Hopkin's Statistic = 0.04271279831491352
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation minimum which has no identity
DBSCAN
Number of Clusters:  6
Validity Index:  0.9132961568548148
****************************************************************************************************
Run 103 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  25
min_dist:  0.1
metric:  correlation
n_components:  4
Hopkin's Statistic = 0.06444809930084988
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation minimum which has no identity
zero-size array to reduction operation minimum which has no identity
DBSCAN
Number of Clusters:  4
Validity Index:  0.9257888031034482
****************************************************************************************************
Run 104 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  25
min_dist:  0.1
metric:  cosine
n_components:  4
Hopkin's Statistic = 0.03416211991961122
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  3
Validity Index:  0.9965672280598367
****************************************************************************************************
Run 105 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  50
min_dist:  0.1
metric:  correlation
n_components:  4
Hopkin's Statistic = 0.07114768725427263
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation minimum which has no identity
DBSCAN
Number of Clusters:  4
Validity Index:  0.9193049791110182
****************************************************************************************************
Run 106 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  50
min_dist:  0.1
metric:  cosine
n_components:  4
Hopkin's Statistic = 0.0427623248882376
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  5
Validity Index:  0.951230836547714
****************************************************************************************************
Run 107 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  100
min_dist:  0.1
metric:  correlation
n_components:  4
Hopkin's Statistic = 0.06399379365586404
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.8886604786393869
****************************************************************************************************
Run 108 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  100
min_dist:  0.1
metric:  cosine
n_components:  4
Hopkin's Statistic = 0.04772015610550556
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  3
Validity Index:  0.9928412587523955
****************************************************************************************************
Run 109 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  200
min_dist:  0.1
metric:  correlation
n_components:  4
Hopkin's Statistic = 0.06792038923981769
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation minimum which has no identity
zero-size array to reduction operation minimum which has no identity
DBSCAN
Number of Clusters:  4
Validity Index:  0.8475502912358107
****************************************************************************************************
Run 110 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  200
min_dist:  0.1
metric:  cosine
n_components:  4
Hopkin's Statistic = 0.06147779701240426
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  5
Validity Index:  0.9425209015938709
****************************************************************************************************
Run 111 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  10
min_dist:  0.5
metric:  correlation
n_components:  4
Hopkin's Statistic = 0.1601450218590522
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.6901553785137553
****************************************************************************************************
Run 112 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  10
min_dist:  0.5
metric:  cosine
n_components:  4
Hopkin's Statistic = 0.10286354268104135
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.882538955200775
****************************************************************************************************
Run 113 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  25
min_dist:  0.5
metric:  correlation
n_components:  4
Hopkin's Statistic = 0.14261337343770433
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.8191022677596844
****************************************************************************************************
Run 114 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  25
min_dist:  0.5
metric:  cosine
n_components:  4
Hopkin's Statistic = 0.058535105030498934
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation minimum which has no identity
zero-size array to reduction operation minimum which has no identity
DBSCAN
Number of Clusters:  2
Validity Index:  0.9791054634207604
****************************************************************************************************
Run 115 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  50
min_dist:  0.5
metric:  correlation
n_components:  4
Hopkin's Statistic = 0.17595546168577023
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.7859443524027205
****************************************************************************************************
Run 116 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  50
min_dist:  0.5
metric:  cosine
n_components:  4
Hopkin's Statistic = 0.0752543927084052
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  5
Validity Index:  0.9472965379458601
****************************************************************************************************
Run 117 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  100
min_dist:  0.5
metric:  correlation
n_components:  4
Hopkin's Statistic = 0.14440754734021505
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  2
Validity Index:  0.9570019557562685
****************************************************************************************************
Run 118 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  100
min_dist:  0.5
metric:  cosine
n_components:  4
Hopkin's Statistic = 0.052062847573177286
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.9556094714053989
****************************************************************************************************
Run 119 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  200
min_dist:  0.5
metric:  correlation
n_components:  4
Hopkin's Statistic = 0.16254986971515514
Test1 Pass: Kmeans successfully clustered.
Number of Clusters:  4
****************************************************************************************************
Run 120 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  200
min_dist:  0.5
metric:  cosine
n_components:  4
Hopkin's Statistic = 0.08124880769844603
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation minimum which has no identity
DBSCAN
Number of Clusters:  4
Validity Index:  0.9611370032284458
****************************************************************************************************
Run 121 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  10
min_dist:  0.0
metric:  correlation
n_components:  5
Hopkin's Statistic = 0.029600167204407404
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.986212402160639
****************************************************************************************************
Run 122 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  10
min_dist:  0.0
metric:  cosine
n_components:  5
Hopkin's Statistic = 0.029004224270937514
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.9433855259638698
****************************************************************************************************
Run 123 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  25
min_dist:  0.0
metric:  correlation
n_components:  5
Hopkin's Statistic = 0.03433821562555664
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.9991840288591877
****************************************************************************************************
Run 124 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  25
min_dist:  0.0
metric:  cosine
n_components:  5
Hopkin's Statistic = 0.025124563997532982
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation minimum which has no identity
DBSCAN
Number of Clusters:  7
Validity Index:  0.9318507992924853
****************************************************************************************************
Run 125 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  50
min_dist:  0.0
metric:  correlation
n_components:  5
Hopkin's Statistic = 0.03889677105606543
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.9990076172518132
****************************************************************************************************
Run 126 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  50
min_dist:  0.0
metric:  cosine
n_components:  5
Hopkin's Statistic = 0.029828703956827556
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  5
Validity Index:  0.947783300049124
****************************************************************************************************
Run 127 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  100
min_dist:  0.0
metric:  correlation
n_components:  5
Hopkin's Statistic = 0.035613407777261824
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.9975033634442293
****************************************************************************************************
Run 128 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  100
min_dist:  0.0
metric:  cosine
n_components:  5
Hopkin's Statistic = 0.027438026045911713
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  3
Validity Index:  0.9686853406252292
****************************************************************************************************
Run 129 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  200
min_dist:  0.0
metric:  correlation
n_components:  5
Hopkin's Statistic = 0.03945642997055867
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation minimum which has no identity
DBSCAN
Number of Clusters:  5
Validity Index:  0.8807282943975479
****************************************************************************************************
Run 130 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  200
min_dist:  0.0
metric:  cosine
n_components:  5
Hopkin's Statistic = 0.03273756165950139
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  5
Validity Index:  0.9449304255211457
****************************************************************************************************
Run 131 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  10
min_dist:  0.01
metric:  correlation
n_components:  5
Hopkin's Statistic = 0.033977278800551694
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation minimum which has no identity
DBSCAN
Number of Clusters:  4
Validity Index:  0.9838091883391205
****************************************************************************************************
Run 132 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  10
min_dist:  0.01
metric:  cosine
n_components:  5
Hopkin's Statistic = 0.02898570823983999
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation maximum which has no identity
DBSCAN
Number of Clusters:  7
Validity Index:  0.9229792481920477
****************************************************************************************************
Run 133 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  25
min_dist:  0.01
metric:  correlation
n_components:  5
Hopkin's Statistic = 0.03689729240133687
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.9958841348669243
****************************************************************************************************
Run 134 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  25
min_dist:  0.01
metric:  cosine
n_components:  5
Hopkin's Statistic = 0.035006884126846806
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  6
Validity Index:  0.9411656762631652
****************************************************************************************************
Run 135 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  50
min_dist:  0.01
metric:  correlation
n_components:  5
Hopkin's Statistic = 0.039267624993229985
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.9977322075077567
****************************************************************************************************
Run 136 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  50
min_dist:  0.01
metric:  cosine
n_components:  5
Hopkin's Statistic = 0.0258513225837834
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  5
Validity Index:  0.9397778161447218
****************************************************************************************************
Run 137 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  100
min_dist:  0.01
metric:  correlation
n_components:  5
Hopkin's Statistic = 0.040915628762724046
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.9985671974263227
****************************************************************************************************
Run 138 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  100
min_dist:  0.01
metric:  cosine
n_components:  5
Hopkin's Statistic = 0.039294738943931054
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation minimum which has no identity
DBSCAN
Number of Clusters:  5
Validity Index:  0.9498574448606864
****************************************************************************************************
Run 139 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  200
min_dist:  0.01
metric:  correlation
n_components:  5
Hopkin's Statistic = 0.04489419024072911
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation maximum which has no identity
DBSCAN
Number of Clusters:  4
Validity Index:  0.9982205251660223
****************************************************************************************************
Run 140 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  200
min_dist:  0.01
metric:  cosine
n_components:  5
Hopkin's Statistic = 0.03272254388243763
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  5
Validity Index:  0.9572284476216266
****************************************************************************************************
Run 141 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  10
min_dist:  0.1
metric:  correlation
n_components:  5
Hopkin's Statistic = 0.04992599477299097
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation minimum which has no identity
zero-size array to reduction operation minimum which has no identity
DBSCAN
Number of Clusters:  4
Validity Index:  0.9898978946821225
****************************************************************************************************
Run 142 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  10
min_dist:  0.1
metric:  cosine
n_components:  5
Hopkin's Statistic = 0.03837772248310004
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation minimum which has no identity
zero-size array to reduction operation minimum which has no identity
DBSCAN
Number of Clusters:  6
Validity Index:  0.875447480428535
****************************************************************************************************
Run 143 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  25
min_dist:  0.1
metric:  correlation
n_components:  5
Hopkin's Statistic = 0.049971312473763754
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.9991744685627085
****************************************************************************************************
Run 144 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  25
min_dist:  0.1
metric:  cosine
n_components:  5
Hopkin's Statistic = 0.032884231508019404
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  5
Validity Index:  0.9073371580344439
****************************************************************************************************
Run 145 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  50
min_dist:  0.1
metric:  correlation
n_components:  5
Hopkin's Statistic = 0.05383844037767858
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation minimum which has no identity
DBSCAN
Number of Clusters:  4
Validity Index:  0.9977672108571749
****************************************************************************************************
Run 146 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  50
min_dist:  0.1
metric:  cosine
n_components:  5
Hopkin's Statistic = 0.0343704484135635
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  5
Validity Index:  0.9511074781149548
****************************************************************************************************
Run 147 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  100
min_dist:  0.1
metric:  correlation
n_components:  5
Hopkin's Statistic = 0.053313487941874745
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.9956075551356771
****************************************************************************************************
Run 148 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  100
min_dist:  0.1
metric:  cosine
n_components:  5
Hopkin's Statistic = 0.03100924975203804
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation minimum which has no identity
DBSCAN
Number of Clusters:  5
Validity Index:  0.9035332072852728
****************************************************************************************************
Run 149 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  200
min_dist:  0.1
metric:  correlation
n_components:  5
Hopkin's Statistic = 0.051466678316119584
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation minimum which has no identity
DBSCAN
Number of Clusters:  3
Validity Index:  0.9270798086814972
****************************************************************************************************
Run 150 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  200
min_dist:  0.1
metric:  cosine
n_components:  5
Hopkin's Statistic = 0.03448820409716892
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.9636484087661767
****************************************************************************************************
Run 151 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  10
min_dist:  0.5
metric:  correlation
n_components:  5
Hopkin's Statistic = 0.12293418639029957
Fail: Kmeans did not successfully cluster.
DBSCAN
Number of Clusters:  4
Validity Index:  0.7992258823330408
****************************************************************************************************
Run 152 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  10
min_dist:  0.5
metric:  cosine
n_components:  5
Hopkin's Statistic = 0.09208231362120113
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation minimum which has no identity
DBSCAN
Number of Clusters:  2
Validity Index:  0.8503122306153292
****************************************************************************************************
Run 153 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  25
min_dist:  0.5
metric:  correlation
n_components:  5
Hopkin's Statistic = 0.12300617125590019
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation minimum which has no identity
zero-size array to reduction operation minimum which has no identity
DBSCAN
Number of Clusters:  2
Validity Index:  0.9500252122890165
****************************************************************************************************
Run 154 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  25
min_dist:  0.5
metric:  cosine
n_components:  5
Hopkin's Statistic = 0.07953742433227841
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation minimum which has no identity
DBSCAN
Number of Clusters:  4
Validity Index:  0.9396069725919591
****************************************************************************************************
Run 155 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  50
min_dist:  0.5
metric:  correlation
n_components:  5
Hopkin's Statistic = 0.12376503904804652
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation minimum which has no identity
zero-size array to reduction operation minimum which has no identity
zero-size array to reduction operation minimum which has no identity
DBSCAN
Number of Clusters:  2
Validity Index:  0.7060035162561229
****************************************************************************************************
Run 156 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  50
min_dist:  0.5
metric:  cosine
n_components:  5
Hopkin's Statistic = 0.05341611139960338
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation minimum which has no identity
zero-size array to reduction operation minimum which has no identity
DBSCAN
Number of Clusters:  4
Validity Index:  0.9451013334833247
****************************************************************************************************
Run 157 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  100
min_dist:  0.5
metric:  correlation
n_components:  5
Hopkin's Statistic = 0.13002395436332886
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation maximum which has no identity
zero-size array to reduction operation maximum which has no identity
DBSCAN
Number of Clusters:  4
Validity Index:  0.7686222394581927
****************************************************************************************************
Run 158 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  100
min_dist:  0.5
metric:  cosine
n_components:  5
Hopkin's Statistic = 0.08054633368773426
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation minimum which has no identity
DBSCAN
Number of Clusters:  4
Validity Index:  0.9545860657011821
****************************************************************************************************
Run 159 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  200
min_dist:  0.5
metric:  correlation
n_components:  5
Hopkin's Statistic = 0.1177249387807747
Test1 Pass: Kmeans successfully clustered.
Number of Clusters:  4
****************************************************************************************************
Run 160 of 160


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


****************************************************************************************************
****************************************************************************************************
Hyperparameters:
n_neighbors:  200
min_dist:  0.5
metric:  cosine
n_components:  5
Hopkin's Statistic = 0.06092145541586417
Fail: Kmeans did not successfully cluster.
zero-size array to reduction operation minimum which has no identity
zero-size array to reduction operation minimum which has no identity
DBSCAN
Number of Clusters:  4
Validity Index:  0.9437468230604724


___

## All Results

In [44]:
results_df.to_csv(data_path + all_results_file, index=False)
print("Filtered results dataframe saved at:", data_path + all_results_file)

# display max rows
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
results_df

Filtered results dataframe saved at: /Users/mikey/LocalFiles/DS5230/final_project/DS5230-final/data/results/all_results_2024-04-1714:29:13.525335.csv


Unnamed: 0,algo,n_clusters_found,n_clusters_db_score_is_min,n_clusters_ch_score_is_max,n_clusters_silhouette_score_is_max,silhouette_score,hopkins_statistic,umap_n_neighbors,umap_min_dist,umap_metric,umap_n_components,trustworthiness,eps,dbscan_min_samples,dbscan_metric,validity_index,cluster_labels
0,dbscan,6,,,,,0.121301,10,0.0,correlation,2,0.900346,0.535269,6.0,euclidean,0.839903,"[0, 1, 1, 1, 1, 2, 1, 1, 0, 1, 0, 0, 1, 1, 1, ..."
1,dbscan,2,,,,,0.082703,10,0.0,cosine,2,0.89445,0.178015,6.0,cosine,0.938794,"[0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, ..."
2,dbscan,3,,,,,0.128764,25,0.0,correlation,2,0.897259,0.237863,6.0,cosine,0.856663,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,dbscan,3,,,,,0.072102,25,0.0,cosine,2,0.902647,0.159106,6.0,cosine,0.999546,"[0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, ..."
4,dbscan,5,,,,,0.120023,50,0.0,correlation,2,0.900903,0.457435,6.0,euclidean,0.828946,"[0, 1, 1, 1, 1, 2, 1, 1, 0, 1, 0, 0, 1, 1, 1, ..."
5,dbscan,5,,,,,0.047629,50,0.0,cosine,2,0.89742,0.330292,6.0,chebyshev,0.96959,"[0, 1, 1, 1, 1, 2, 1, 1, 0, 1, 0, 0, 1, 1, 1, ..."
6,dbscan,5,,,,,0.142524,100,0.0,correlation,2,0.90044,0.407246,5.0,euclidean,0.849916,"[0, 1, 1, 1, 1, 2, 1, 1, 0, 1, 0, 0, 1, 1, 1, ..."
7,dbscan,3,,,,,0.064756,100,0.0,cosine,2,0.895656,0.154497,4.0,cosine,0.973518,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
8,dbscan,3,,,,,0.118095,200,0.0,correlation,2,0.867513,0.198757,6.0,cosine,0.9324,"[0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, ..."
9,dbscan,2,,,,,0.077945,200,0.0,cosine,2,0.889832,0.179695,6.0,cosine,0.986645,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [45]:
# set max rows back to default
pd.reset_option('display.max_rows')
pd.reset_option('display.max_columns')

____

### Get n Best Results (kmeans or DBSCAN)

In [56]:
n=20

results_df['combined_score'] = results_df['silhouette_score'].fillna(results_df['validity_index'])
results_df = results_df.sort_values(by='combined_score', ascending=False)
filter = results_df.head(n)
filtered_results_df = results_df[results_df.index.isin(filter.index)]
filtered_results_df = filtered_results_df.drop(columns=['combined_score'])

#filtered_results_df[filtered_results_df['n_clusters_found'] == 7]
filtered_results_df

Unnamed: 0,algo,n_clusters_found,n_clusters_db_score_is_min,n_clusters_ch_score_is_max,n_clusters_silhouette_score_is_max,silhouette_score,hopkins_statistic,umap_n_neighbors,umap_min_dist,umap_metric,umap_n_components,trustworthiness,eps,dbscan_min_samples,dbscan_metric,validity_index,cluster_labels
56,dbscan,3,,,,,0.073866,100,0.01,correlation,3,0.899723,0.252674,6.0,correlation,0.999936,"[0, 1, 1, 1, 1, 2, 1, 1, 0, 1, 0, 0, 1, 1, 1, ..."
68,dbscan,3,,,,,0.106729,200,0.1,correlation,3,0.868299,0.297113,6.0,correlation,0.999858,"[0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, ..."
54,dbscan,3,,,,,0.073052,50,0.01,correlation,3,0.898359,0.268203,6.0,correlation,0.999856,"[0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, ..."
44,dbscan,3,,,,,0.072765,50,0.0,correlation,3,0.898747,0.249199,6.0,correlation,0.999725,"[0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, ..."
42,dbscan,3,,,,,0.062729,25,0.0,correlation,3,0.897117,0.264367,6.0,correlation,0.999717,"[0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, ..."
3,dbscan,3,,,,,0.072102,25,0.0,cosine,2,0.902647,0.159106,6.0,cosine,0.999546,"[0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, ..."
62,dbscan,3,,,,,0.086519,25,0.1,correlation,3,0.900207,0.326134,6.0,correlation,0.999485,"[0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, ..."
64,dbscan,3,,,,,0.097548,50,0.1,correlation,3,0.895912,0.327744,6.0,correlation,0.999429,"[0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, ..."
122,dbscan,4,,,,,0.034338,25,0.0,correlation,5,0.899669,0.265335,6.0,correlation,0.999184,"[0, 1, 1, 1, 1, 2, 1, 1, 0, 1, 0, 0, 1, 1, 1, ..."
142,dbscan,4,,,,,0.049971,25,0.1,correlation,5,0.900277,0.324954,6.0,correlation,0.999174,"[0, 1, 1, 1, 1, 2, 1, 1, 0, 1, 0, 0, 1, 1, 1, ..."


____

### Save Filtered Results to CSV

In [47]:
filtered_results_df.to_csv(data_path + filtered_results_file, index=False)
print("Filtered results dataframe saved at:", data_path + filtered_results_file)

Filtered results dataframe saved at: /Users/mikey/LocalFiles/DS5230/final_project/DS5230-final/data/results/filtered_results_2024-04-1714:29:13.525353.csv


___

## Runtime

In [48]:
finish = time.time()
hours = int((finish - start) // 3600)
minutes = int(((finish - start) % 3600) // 60)
seconds = int((finish - start) % 60)
print(f"Total Run Time(hh:mm.ss): {hours:02d}:{minutes:02d}.{seconds:02d}")

Total Run Time(hh:mm.ss): 00:28.30
