In [1]:
#default_exp Clustering

In [2]:
#hide
from nbdev import *
from nbdev.showdoc import show_doc

In [3]:
#export
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from HierarchicalGeoClustering.TreeClusters import *

# Clustering 

In this module all the clustering methods wrap or implemented (Natural cities, DBSCAN, OPTICS, HDBSCAN, and adap_DBSCAN) all with the intention to have the same input and output. 

A recursive function is implemented to obtain the cluster iterative using the output as the new input. To select the method to use only a string is needed.


In [4]:
#export
def clustering(
            t_next_level_2,
            level=None,
            algorithm='dbscan',
            **kwargs
    ):
    """Function to get the clusters for single group by
    
    :param t_next_level_2 Dictionary with the points to compute the
            cluster
    :param level:  None Level to compute (Default None)
    
    :param str algorithm : Algorithm type is supported (Default= 'dbscan')
    
    :param int min_points_cluster:  minimun number of point to consider a cluster(Default 50)
    
    :param double eps: Epsilon parameter In case is needed
    
    :param bool return_noise: To return the noise (Default False)
    
    :param bool verbose: Printing (Dafault False)  
    
    :returns list t_next_level_n: A list with dictionaries with the points, the parent, and nois 
    """
    verbose= kwargs.get('verbose',False)
    min_points = kwargs.get( 'min_points_cluster', 50)
    ret_noise= kwargs.get('return_noise', False)
    eps = kwargs.get('eps',0.8)  # Epsilon value to dbscan
    t_next_level_n = []
    if level == None:
        level = 0

    for li_num, cluster_list_D in enumerate(t_next_level_2):
        cluster_list = cluster_list_D['points']
        cluster_list_pa = cluster_list_D['parent']
        if verbose:
            print("Size cluster list: ", len(cluster_list))
            
        for c_num, cluster in enumerate(cluster_list):
            if verbose:
                print("Size cluster: ", len(cluster))
                print('Algorithm: ', algorithm)

            if len(cluster) > 5:
                if algorithm == 'dbscan':
                    if verbose:
                        print("Epsilon Value: ", eps)
                    tmp = compute_dbscan(cluster,
                                 eps_DBSCAN = eps,
                                 debugg=verbose,
                                  **kwargs)
                    if ret_noise:
                        noise_points = tmp[1]
                        tmp =  tmp[0]
                
               
                elif algorithm == 'hdbscan':
                    tmp = compute_hdbscan(cluster,
                                **kwargs)
                    if ret_noise:
                        noise_points = tmp[1]
                        tmp =  tmp[0]
                ##########  
                elif algorithm == 'adaptative_DBSCAN':
                    #### If the number of cluster is too small 
                    
                    tmp = adaptative_DBSCAN(cluster, **kwargs)
                    if ret_noise:
                        noise_points = tmp[1]
                        tmp =  tmp[0]

                elif algorithm == 'optics':
                    tmp = compute_OPTICS(cluster,
                                eps_OPTICS = eps,
                                **kwargs)
                    if ret_noise:
                        noise_points = tmp[1]
                        tmp =  tmp[0]
                ##########  
                elif algorithm == 'natural_cities':
                    tmp = compute_Natural_cities(cluster,
                                **kwargs)
                    if ret_noise:
                        noise_points = tmp[1]
                        tmp =  tmp[0]
                ##########  
                else:
                    raise ValueError('Algorithm must be dbscan or hdbscan')
                    # sys.exit("1")
                
                
                
                if verbose:
                    print("The number of resulting clusters is : ", len(tmp))
                if ret_noise:
                    dic_clos = {'points': tmp,
                           'parent': cluster_list_pa + '_L_'+str(level) +
                            '_l_' + str(li_num) + '_c_'+str(c_num), 
                            'noise_points':noise_points
                    }
                else:
                    dic_clos = {'points': tmp, 'parent': cluster_list_pa +
                            '_L_'+str(level) + '_l_' + str(li_num) + '_c_'+str(c_num)}
                
                t_next_level_n.append(dic_clos)
            else:
                if ret_noise:
                    dic_clos = {'points': [],
                           'parent': cluster_list_pa + '_L_'+str(level) +
                            '_l_' + str(li_num) + '_c_'+str(c_num), 
                            'noise_points':cluster
                    }
                else:
                    dic_clos = {'points': [], 'parent': cluster_list_pa +
                            '_L_'+str(level) + '_l_' + str(li_num) + '_c_'+str(c_num)}
                t_next_level_n.append(dic_clos)
    
    return t_next_level_n

In [5]:
#hide
### Pruebas 

In [6]:
show_doc(clustering)

<h4 id="clustering" class="doc_header"><code>clustering</code><a href="__main__.py#L2" class="source_link" style="float:right">[source]</a></h4>

> <code>clustering</code>(**`t_next_level_2`**, **`level`**=*`None`*, **`algorithm`**=*`'dbscan'`*, **\*\*`kwargs`**)

Function to get the clusters for single group by

:param t_next_level_2 Dictionary with the points to compute the
        cluster
:param level:  None Level to compute (Default None)

:param str algorithm : Algorithm type is supported (Default= 'dbscan')

:param int min_points_cluster:  minimun number of point to consider a cluster(Default 50)

:param double eps: Epsilon parameter In case is needed

:param bool return_noise: To return the noise (Default False)

:param bool verbose: Printing (Dafault False)  

:returns list t_next_level_n: A list with dictionaries with the points, the parent, and nois 

In [7]:
#export
def recursive_clustering(
                this_level,  # Dictionary with Points
                to_process,  # levels to process
                cluster_tree,  # to store the clusters
                level = 0,  # current level
                algorithm ='dbscan',  # Algorithm to use
                **kwargs
               ):
    """
    Performs the recursive clustering.
    Calls compute_dbscan for each
    list of clusters keepen the structure and then calls itself
    until no more clusters satisfy the condition
        
    :param dict this_level: level is the current level 
    
    :param int to_process: the max level to process
    
    :param double eps: The epsilon parameter distance to pass to the needed algorithm 
    
    :param list cluster_tree : list of list to insert the levels 
    
    :param bool verbose : To print 
    
    :param double decay: In the use of dbscan the deacy parameter to reduce eps
    
    :param int min_points_cluster: The min point for each cluster to pass to algorithm
    
    :param str algorithm:  The string of the algorithm name to use
    """

    verbose= kwargs.get('verbose',False)
    min_points = kwargs.get( 'min_points_cluster', 50)
    decay = kwargs.get('decay', 0.7)
    eps = kwargs.get('eps' ,0.8)  # Epsilon distance to DBSCAN parameter
    tmp = None

    if level == 0:
        kwargs['eps'] = eps
    else:
        kwargs['eps'] = eps  * decay

    cluster_result_polygons = []
    if level > to_process:
        if verbose:
            print('Done clustering')
        return
    ######## Get the clusters for the current list of points 
    all_l = clustering(
                    this_level,
                    level=level,
                    algorithm=algorithm,
                    
                    **kwargs
                    )
    ##########

    cluster_tree.append(all_l)
    cluster_n = 0
    for i in all_l:
        cluster_n += len(i['points'])
    if verbose:
        print('At level ', level, ' the number of lists are ',
              len(all_l), ' with ', cluster_n, 'clusters')
    level += 1
    if len(all_l) > 0:
        return recursive_clustering(all_l, 
                               to_process=to_process,
                               cluster_tree=cluster_tree,
                               level= level,
                               algorithm=algorithm,
                               **kwargs
                               )
    else:
        if verbose:
            print('done clustering')
        return

In [8]:
#hide
### Pruebas 

In [9]:
show_doc(recursive_clustering)

<h4 id="recursive_clustering" class="doc_header"><code>recursive_clustering</code><a href="__main__.py#L2" class="source_link" style="float:right">[source]</a></h4>

> <code>recursive_clustering</code>(**`this_level`**, **`to_process`**, **`cluster_tree`**, **`level`**=*`0`*, **`algorithm`**=*`'dbscan'`*, **\*\*`kwargs`**)

Performs the recursive clustering.
Calls compute_dbscan for each
list of clusters keepen the structure and then calls itself
until no more clusters satisfy the condition
    
:param dict this_level: level is the current level 

:param int to_process: the max level to process

:param double eps: The epsilon parameter distance to pass to the needed algorithm 

:param list cluster_tree : list of list to insert the levels 

:param bool verbose : To print 

:param double decay: In the use of dbscan the deacy parameter to reduce eps

:param int min_points_cluster: The min point for each cluster to pass to algorithm

:param str algorithm:  The string of the algorithm name to use

## Clustering Algorithms
A wapper functions to obtain the clusterizations 

In [10]:
#export
def compute_dbscan(cluster,  **kwargs):
    
    """ 
    Sklearn DBSCAN wrapper.
    
    :param cluster: a (N,2) numpy array containing the obsevations

    :returns list with numpy arrays for all the clusters obtained
    """
    eps = kwargs.get( 'eps_DBSCAN',.04)
    debugg= kwargs.get( 'debugg',False)
    min_samples= kwargs.get( 'min_samples',50)
    ret_noise = kwargs.get('return_noise', False)
    # Standarize sample
    scaler = StandardScaler()
    cluster = scaler.fit_transform(cluster)
    if debugg:
        print('epsilon distance to DBSCAN: ', eps)
        print("min_samples to DBScan: ", min_samples )
        print("Number of points to fit the DBScan: ",cluster.shape[0])

    db = DBSCAN(eps=eps, min_samples=min_samples).fit(cluster)  # Check if can be run with n_jobs = -1
    
    core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
    core_samples_mask[db.core_sample_indices_] = True
    labels = db.labels_
    l_unique_labels = len(set(labels)) - (1 if -1 in labels else 0)
    unique_labels = set(labels) 
    cluster = scaler.inverse_transform(cluster)
    clusters = []
    if debugg:
        print('Number of clusters:' ,l_unique_labels)
    
    for l in unique_labels:
        if l != -1:
            class_member_mask = (labels == l)
            clusters.append(cluster[class_member_mask])
        elif l == -1 and debugg == True:
            class_member_mask = (labels == l)
            print("Muestras consideradas ruido: ",  sum(class_member_mask))
    
    if ret_noise == True:
        class_member_mask = (labels == -1)
        return clusters, cluster[class_member_mask]
    
    return clusters

In [11]:
#hide
### Pruebas 


In [12]:
show_doc(compute_dbscan)

<h4 id="compute_dbscan" class="doc_header"><code>compute_dbscan</code><a href="__main__.py#L2" class="source_link" style="float:right">[source]</a></h4>

> <code>compute_dbscan</code>(**`cluster`**, **\*\*`kwargs`**)

Sklearn DBSCAN wrapper.

:param cluster: a (N,2) numpy array containing the obsevations

:returns list with numpy arrays for all the clusters obtained

In [13]:
#export
def adap_DBSCAN(points2_clusters ,
                **kwargs):
    """
    The function use the knee and average to obtain a good value for epsilon and use 
    DBSCAN to obtain the clusters
    
    :param list Points points2_clusters: Point to clusterize  
    
    :param int max_k: = (Default = len(points2_clusters)*.1)
    
    :param int  min_k: (Default =50)
    
    :param int step_k: (Default = 50)
    
    :param int leaf_size: (Default = 50)
    
    :param bool scale_points: (Default = True)
    
    :param bool debugg: (Default = False)
    
    :param bool ret_noise:  (Default = True)
    
    :returns list : list of cluster. If ret_noise = True return tuple list of cluter and noise 
    """
    max_k = kwargs.get('max_k', int(len(points2_clusters)*.1))
    min_k = kwargs.get('min_k', 50)
    step_k = kwargs.get('step_k', 50)
    leaf_size =  kwargs.get('leaf_size',50)
    scale_points= kwargs.get('scale_points',True)
    debugg = kwargs.get('verbose',False)
    ret_noise = kwargs.get('return_noise', True)
    ###### Se tienen que hacer algunos cambios para cuando
    #  los clusters son menores a los minimos establecidos previemente
    
    ##### Establecer los minimos posibles 
    if max_k > len(points2_clusters):
        raise ValueError('The max_k value is too large for the number of points')
    
    if min_k >  len(points2_clusters):
        print('The min_k value is too large for the number of points returns empty clusters')
        if ret_noise == True:
            return [] , points2_clusters
        else:
            return []
    
    if step_k > len(points2_clusters):
        raise ValueError('The step_k value is too large for the number of points')

    
    if min_k == max_k:
        print('min_k reset to obtain at least 1 value')
        min_k = max_k-1

    if scale_points ==True:
        scaler = StandardScaler()
        points_arr = scaler.fit_transform(points2_clusters)
    else:
        points_arr = points2_clusters
    
    kdt=  cKDTree(points_arr, leafsize=leaf_size)
    lits_appe_all_aver=[]
    for j in range( min_k, max_k, step_k ):
        dist_va, ind = kdt.query(points_arr, k=j, n_jobs =-1) 
        non_zero =  dist_va[:, 1:]
        non_zero = np.ndarray.flatten(non_zero)
        non_zero = np.sort(non_zero)
        lis_aver_k=[]
        for i in range(int(non_zero.shape[0]/(j-1)) -1):
            lis_aver_k.append(np.average(non_zero[i*(j-1):(i+1)*(j-1)]))

        average_arr= np.array(lis_aver_k)
        kneedle_1_average = kneed.KneeLocator(
                range(average_arr.shape[0]),
                average_arr,
                curve="convex",## This should be the case since the values are sorted 
                direction="increasing", ## This should be the case since the values are sorted incresing
                online=True, ### To find the correct knee the false returns the first find 
        )
        epsilon= kneedle_1_average.knee_y
        min_point = kneedle_1_average.knee
        #### We take the average never the less
        
        lits_appe_all_aver.append({ 'k':j,
                    'Epsilon':epsilon,
                    'value':min_point})
    
    #### Check if the list is empty
    if len(lits_appe_all_aver) ==0:
        if debugg:
            print('AUTOIMATIC DBSCAN')
            print('Using 0.6 as epsilon and 20 as Minpoints')
        db_scan= DBSCAN(eps=0.6, min_samples=20).fit(points_arr)
    else:
        df_all_average= pd.DataFrame(lits_appe_all_aver)
        max_epsi_all_average= df_all_average['Epsilon'].max()
        if debugg:
            print('Valor de epsion  : ', max_epsi_all_average)
        db_scan= DBSCAN(eps=max_epsi_all_average, min_samples=min_k).fit(points_arr)
    
    ####Get the clusters
    core_samples_mask = np.zeros_like(db_scan.labels_, dtype=bool)
    core_samples_mask[db_scan.core_sample_indices_] = True
    labels = db_scan.labels_
    unique_labels = set(labels)
    if scale_points ==True:
        points_ret = scaler.inverse_transform(points_arr)
    else:
        points_ret = points_arr
    clusters = []
    for l in unique_labels:
        if l != -1:
            class_member_mask = (labels == l)
            clusters.append(points_ret[class_member_mask])
        elif l == -1 and debugg == True:
            class_member_mask = (labels == l)
            print("Muestras consideradas ruido: ",  sum(class_member_mask))

    if ret_noise == True:
        class_member_mask = (labels == -1)
        return clusters, points_ret[class_member_mask]

    return clusters

In [14]:
show_doc(adap_DBSCAN)

<h4 id="adap_DBSCAN" class="doc_header"><code>adap_DBSCAN</code><a href="__main__.py#L2" class="source_link" style="float:right">[source]</a></h4>

> <code>adap_DBSCAN</code>(**`points2_clusters`**, **\*\*`kwargs`**)

The function use the knee and average to obtain a good value for epsilon and use 
DBSCAN to obtain the clusters

:param list Points points2_clusters: Point to clusterize  

:param int max_k: = (Default = len(points2_clusters)*.1)

:param int  min_k: (Default =50)

:param int step_k: (Default = 50)

:param int leaf_size: (Default = 50)

:param bool scale_points: (Default = True)

:param bool debugg: (Default = False)

:param bool ret_noise:  (Default = True)

:returns list : list of cluster. If ret_noise = True return tuple list of cluter and noise 

In [15]:
#export
def compute_hdbscan(points2_clusters,  **kwargs):
    
    """
    HDBSCAN wrapper.
    
    :param np.array cluster: a (N,2) numpy array containing the obsevations
    
    :returns:  list with numpy arrays for all the clusters obtained
    """
    
    scale_points= kwargs.get('scale_points',True)
    debugg = kwargs.get('verbose',False)
    ret_noise = kwargs.get('return_noise', True)
    min_cluster = kwargs.get('min_cluster', 20)
    if scale_points ==True:
        scaler = StandardScaler()
        points_arr = scaler.fit_transform(points2_clusters)
    else:
        points_arr = points2_clusters

    db = hdbscan.HDBSCAN( ).fit(points_arr)
    core_samples_mask = np.full_like(db.labels_, True, dtype=bool)
    labels = db.labels_
    l_unique_labels = len(set(labels)) - (1 if -1 in labels else 0)
    unique_labels = set(labels)
    if debugg:
        print('total number of clusters: ', len(unique_labels)) 
    if scale_points ==True:
        points_ret = scaler.inverse_transform(points_arr)
    else:
        points_ret = points_arr
    clusters = []

    for l in unique_labels:
        if l != -1:
            class_member_mask = (labels == l)
            clusters.append(points_ret[class_member_mask])
        elif l == -1 and debugg == True:
            class_member_mask = (labels == l)
            print("Muestras consideradas ruido: ",  sum(class_member_mask))

    if ret_noise == True:
        class_member_mask = (labels == -1)
        return clusters, points_ret[class_member_mask]

    return clusters

In [16]:
show_doc(adap_DBSCAN)

<h4 id="adap_DBSCAN" class="doc_header"><code>adap_DBSCAN</code><a href="__main__.py#L2" class="source_link" style="float:right">[source]</a></h4>

> <code>adap_DBSCAN</code>(**`points2_clusters`**, **\*\*`kwargs`**)

The function use the knee and average to obtain a good value for epsilon and use 
DBSCAN to obtain the clusters

:param list Points points2_clusters: Point to clusterize  

:param int max_k: = (Default = len(points2_clusters)*.1)

:param int  min_k: (Default =50)

:param int step_k: (Default = 50)

:param int leaf_size: (Default = 50)

:param bool scale_points: (Default = True)

:param bool debugg: (Default = False)

:param bool ret_noise:  (Default = True)

:returns list : list of cluster. If ret_noise = True return tuple list of cluter and noise 

In [17]:
#export
def compute_OPTICS(points2_clusters,  **kwargs):
    
    """ OPTICS wrapper.
    :param np.array cluster: a (N,2) numpy array containing the obsevations
    :returns:  list with numpy arrays for all the clusters obtained
    """

    scale_points= kwargs.get('scale_points',True)
    debugg = kwargs.get('verbose',False)
    ret_noise = kwargs.get('return_noise', True)
    min_samples= kwargs.get( 'min_samples',5)
    eps_optics = kwargs.get('eps_optics', None)
    n_jobs = kwargs.get('num_jobs',None)
    xi= kwargs.get('xi',None)
    algorithm_optics= kwargs.get('algorithm_optics','kd_tree')

    if scale_points ==True:
        scaler = StandardScaler()
        points_arr = scaler.fit_transform(points2_clusters)
    else:
        points_arr = points2_clusters


    db = OPTICS(min_samples = min_samples,eps= eps_optics, n_jobs= n_jobs).fit(points2_clusters)
    core_samples_mask = np.full_like(db.labels_, True, dtype=bool)
    labels = db.labels_
    l_unique_labels = len(set(labels)) - (1 if -1 in labels else 0)
    unique_labels = set(labels)
    if debugg:
        print('total number of clusters: ', len(unique_labels)) 
    if scale_points ==True:
        points_ret = scaler.inverse_transform(points_arr)
    else:
        points_ret = points_arr
    clusters = []

    for l in unique_labels:
        if l != -1:
            class_member_mask = (labels == l)
            clusters.append(points_ret[class_member_mask])
        elif l == -1 and debugg == True:
            class_member_mask = (labels == l)
            print("Muestras consideradas ruido: ",  sum(class_member_mask))

    if ret_noise == True:
        class_member_mask = (labels == -1)
        return clusters, points_ret[class_member_mask]

    return clusters

In [18]:
show_doc(compute_OPTICS)

<h4 id="compute_OPTICS" class="doc_header"><code>compute_OPTICS</code><a href="__main__.py#L2" class="source_link" style="float:right">[source]</a></h4>

> <code>compute_OPTICS</code>(**`points2_clusters`**, **\*\*`kwargs`**)

OPTICS wrapper.
:param np.array cluster: a (N,2) numpy array containing the obsevations
:returns:  list with numpy arrays for all the clusters obtained

In [19]:
#export 
def compute_Natural_cities(points2_clusters,  **kwargs):
    
    """
    Compute Natural cities clustering
    
    :param np.array points2_clusters: a (N,2) numpy array containing the obsevations
    
    :returns: list with numpy arrays for all the clusters obtained
    """
    ### The function is in acordance with the all the previus functions
    scale_points= kwargs.get('scale_points',True)
    debugg = kwargs.get('verbose',False)
    ret_noise = kwargs.get('return_noise', True)

    if scale_points ==True:
        scaler = StandardScaler()
        points_arr = scaler.fit_transform(points2_clusters)
    else:
        points_arr = points2_clusters

    edges= get_segments(points_arr)
    lenght_av  =  np.average(np.array([i.length for i in edges ]))
    edges = [i for i in edges  if i.length < lenght_av]
    polygons_natural_cities=  get_polygons_buf(edges)
    if debugg:
        if type(polygons_natural_cities)==shapely.geometry.MultiPolygon:
            print('Resulting number of polygons: ', len(polygons_natural_cities))

        elif type(polygons_natural_cities)==shapely.geometry.Polygon:
            print('Only 1 polygon: ')
        else:
            print('The result is not a Polygon or Multipolygon')
    labels_points = labels_filtra(points_arr, polygons_natural_cities)
    core_samples_mask = np.full_like(labels_points, True, dtype=bool)
    l_unique_labels = len(set(labels_points)) - (1 if -1 in labels_points else 0)
    unique_labels = set(labels_points)
    
    if debugg:
        print('total number of clusters: ', len(unique_labels)) 
    #### recover
    if scale_points ==True:
        points_ret = scaler.inverse_transform(points_arr)
    else:
        points_ret = points_arr

    
    clusters = []
    for l in unique_labels:
        if l != -1:
            class_member_mask = (labels_points == l)
            clusters.append(points_ret[class_member_mask])
        elif l == -1 and debugg == True:
            class_member_mask = (labels_points == l)
            print("Point conscider noise: ",  sum(class_member_mask))

    if ret_noise == True:
        class_member_mask = (labels_points == -1)
        return clusters, points_ret[class_member_mask]

    return clusters

In [20]:
show_doc(compute_Natural_cities)

<h4 id="compute_Natural_cities" class="doc_header"><code>compute_Natural_cities</code><a href="__main__.py#L2" class="source_link" style="float:right">[source]</a></h4>

> <code>compute_Natural_cities</code>(**`points2_clusters`**, **\*\*`kwargs`**)

Compute Natural cities clustering

:param np.array points2_clusters: a (N,2) numpy array containing the obsevations

:returns: list with numpy arrays for all the clusters obtained

In [21]:
#hide
#export

def generate_tree_clusterize_form(**kwargs ):
    """
    Generates all the experiment all the experiment creates the data and clusterize using the algorithm available
    :param levels_tree: Levels for the tree
    :param int per_cluster: Points per clusters
    :param levels_cluster:  Levels to clusterize
    :param bool verbose:  To print some outputs 
    :returns: a dictionary with all the  data frames and a dictionary 
    with the similarity measurment created
     """
    
    levels_tree= kwargs.get('tree_level', 4)
    per_cluster = kwargs.get('num_per_cluster', 200)
    levels_cluster = kwargs.get('levels_cluster', 4)
    verbose = kwargs.get('verbose', False)
    
    if verbose:
        print('generating tree')
    
    
    random.seed(int(time.time()))
    random_seed = random.randint(0,1500)
    print('Random to use: ',random_seed )
    print('With',levels_tree , ' levels' )
    tree_original= tree_clusters(levels_tree, random_seed= random_seed)
    tree_original.populate_tree(number_per_cluster=per_cluster, avoid_intersec= True)
    tree_original_points= tree_original.get_points_tree()
    X_2=np.array([[p.x,p.y] for p in tree_original_points])
    dic_points_ori={'points':[X_2], 'parent':''}
    if verbose:
        print('tree with: ', X_2.shape )
    
    while X_2.shape[0] < 2000:
        
        print('tree with too few elements to clusterize creating new tree')
        random.seed(int(time.time()))
        random_seed = random.randint(0,1500)
        print('Random to use: ',random_seed )
        tree_original= tree_clusters(levels_tree, random_seed= random_seed)
        tree_original.populate_tree(number_per_cluster=per_cluster, avoid_intersec= True)
        tree_original_points= tree_original.get_points_tree()
        X_2=np.array([[p.x,p.y] for p in tree_original_points])
        dic_points_ori={'points':[X_2], 'parent':''}
        
    
    
    if verbose:
        print('tree generated')
    
    
    if verbose:
        print('clusterize and creating the trees')
    
    tree_Natural_c = recursive_clustering_tree(dic_points_ori,
                                               levels_clustering = levels_cluster,
                                              algorithm = 'natural_cities'
                                              )
    tree_DBSCAN = recursive_clustering_tree(dic_points_ori,
                                               levels_clustering = levels_cluster,
                                              algorithm = 'dbscan')
    tree_HDBSCAN = recursive_clustering_tree(dic_points_ori,
                                               levels_clustering = levels_cluster,
                                              algorithm = 'hdbscan')
    tree_OPTICS= recursive_clustering_tree(dic_points_ori,
                                               levels_clustering = levels_cluster,
                                              algorithm = 'optics')
    tree_knee = recursive_clustering_tree(dic_points_ori,
                                               levels_clustering = levels_cluster,
                                              algorithm = 'auto_knee_average')
    if verbose:
        print('DONE clusterize and creating the trees')
    ######  get the points dataframe for each tree 
    data_fram_or = tree_original.get_dataframe_recursive_node_label(func_level_nodes = levels_from_strings)
    df_Natural = tree_Natural_c.get_dataframe_recursive_node_label()
    df_DBSCAN = tree_DBSCAN.get_dataframe_recursive_node_label()
    df_HDBSCAN = tree_HDBSCAN.get_dataframe_recursive_node_label()
    df_OPTICS = tree_OPTICS.get_dataframe_recursive_node_label()
    df_knee = tree_knee.get_dataframe_recursive_node_label()
    
    df_Natural.name='Natural_C'
    df_DBSCAN.name= 'DBSCAN'
    df_HDBSCAN.name= 'HDBSCAN'
    df_OPTICS.name= 'OPTICS'
    df_knee.name = 'knee'
    
    if verbose:
        print('Original size',data_fram_or.shape )
        print('Natural size',df_Natural.shape)
        print('DBSCAN size',df_DBSCAN.shape)
        print('HDBSCAN size',df_HDBSCAN.shape)
        print('OPTICS size',df_OPTICS.shape)
        print('KNEE size',df_knee.shape)
    
    ######For each dataframe  
    if verbose:
        print('get dataframe Original')
    get_tag_level_df_labels(data_fram_or, levels_cluster)
    ###Natural Cities
    if verbose:
        print('get dataframe Natural cities')
        
    dic_final_levels_Natural_c = get_dics_labels(tree_original, tree_Natural_c, levels_cluster)
    dic_label_final_levels_Natural=[ {'level_ori':dic['level_ori'], 'dict':mod_cid_label(dic['dict']) } for dic in  dic_final_levels_Natural_c]
    get_tag_level_df_labels(df_Natural, levels_cluster)
    for dic in dic_label_final_levels_Natural[1:]: ## En el nivel 0 no tiene sentido
        tag_ori = dic['level_ori']
        dic_lev = dic['dict']
        retag_originals(data_fram_or,
                        df_Natural,
                        tag_ori,
                        tag_ori,#### Como se hizo con la misma funcion tienen las misma etiqueta 
                        dic_lev)
    
    
    ### DBSCAN
    if verbose:
        print('get dataframe DBSCAN')
        
    dic_final_levels_DBSCAN = get_dics_labels(tree_original, tree_DBSCAN, levels_cluster)
    dic_label_final_levels_DBSCAN=[ {'level_ori':dic['level_ori'], 'dict':mod_cid_label(dic['dict']) } for dic in  dic_final_levels_DBSCAN]
    get_tag_level_df_labels(df_DBSCAN, levels_cluster)
    for dic in dic_label_final_levels_DBSCAN[1:]: ## En el nivel 0 no tiene sentido
        tag_ori = dic['level_ori']
        dic_lev = dic['dict']
        retag_originals(data_fram_or,
                        df_DBSCAN,
                        tag_ori,
                        tag_ori,#### Como se hizo con la misma funcion tienen las misma etiqueta 
                        dic_lev)
    
    
    ##HDBSCAN
    if verbose:
        print('get dataframe HDBSCAN')
    dic_final_levels_HDBSCAN = get_dics_labels(tree_original, tree_HDBSCAN, levels_cluster)
    dic_label_final_levels_HDBSCAN=[ {'level_ori':dic['level_ori'], 'dict':mod_cid_label(dic['dict']) } for dic in  dic_final_levels_HDBSCAN]
    get_tag_level_df_labels(df_HDBSCAN, levels_cluster)
    for dic in dic_label_final_levels_HDBSCAN[1:]: ## En el nivel 0 no tiene sentido
        tag_ori = dic['level_ori']
        dic_lev = dic['dict']
        retag_originals(data_fram_or,
                        df_HDBSCAN,
                        tag_ori,
                        tag_ori,#### Como se hizo con la misma funcion tienen las misma etiqueta 
                        dic_lev)
    #### OPTICS
    if verbose:
        print('get dataframe OPTICS')
        
    dic_final_levels_OPTICS = get_dics_labels(tree_original, tree_OPTICS, levels_cluster)
    dic_label_final_levels_OPTICS=[ {'level_ori':dic['level_ori'], 'dict':mod_cid_label(dic['dict']) } for dic in  dic_final_levels_OPTICS]
    get_tag_level_df_labels(df_OPTICS, levels_cluster)
    for dic in dic_label_final_levels_OPTICS[1:]: ## En el nivel 0 no tiene sentido
        tag_ori = dic['level_ori']
        dic_lev = dic['dict']
        retag_originals(data_fram_or,
                        df_OPTICS,
                        tag_ori,
                        tag_ori,#### Como se hizo con la misma funcion tienen las misma etiqueta 
                        dic_lev)
    ##### Knee
    if verbose:
        print('get dataframe Knee')
        
    dic_final_levels_knee = get_dics_labels(tree_original, tree_knee, levels_cluster)
    dic_label_final_levels_knee=[ {'level_ori':dic['level_ori'], 'dict':mod_cid_label(dic['dict']) } for dic in  dic_final_levels_knee]
    get_tag_level_df_labels(df_knee, levels_cluster)
    for dic in dic_label_final_levels_knee[1:]: ## En el nivel 0 no tiene sentido
        tag_ori = dic['level_ori']
        dic_lev = dic['dict']
        retag_originals(data_fram_or,
                        df_knee,
                        tag_ori,
                        tag_ori,#### Como se hizo con la misma funcion tienen las misma etiqueta 
                        dic_lev)
    ##############################################Get only signal  noise 
    data_fram_or_sig_noise = tree_original.get_tag_noise_signal_tree()
    df_Natural_sig_noise = tree_Natural_c.get_tag_noise_signal_tree()
    df_DBSCAN_sig_noise = tree_DBSCAN.get_tag_noise_signal_tree()
    df_HDBSCAN_sig_noise = tree_HDBSCAN.get_tag_noise_signal_tree()
    df_OPTICS_sig_noise = tree_OPTICS.get_tag_noise_signal_tree()
    df_knee_sig_noise = tree_knee.get_tag_noise_signal_tree()
    
    df_Natural_sig_noise.name='I_Natural_C'
    df_DBSCAN_sig_noise.name= 'I_DBSCAN'
    df_HDBSCAN_sig_noise.name= 'I_HDBSCAN'
    df_OPTICS_sig_noise.name= 'I_OPTICS'
    df_knee_sig_noise.name = 'I_knee'
    
    
    
    
    
    
    ######Evaluate form metric
    if verbose:
        print('Niveles: ' ,len(tree_original.levels_nodes))
        print('Nodos en el ultimo nivel: ' ,len(tree_original.levels_nodes[-1]))
        #print('tag_all : ' , data_fram_or['Final_tag'].unique())
    levels_r = range(0, levels_cluster)###
    resultado_form_metric = []
    for l in levels_r:
        
        d = { 'Level': l,
            'DBSCAN': similarity_clusterings(tree_original.levels_nodes[l],
                                            tree_DBSCAN.levels_nodes[l]),
             'HDBSCAN': similarity_clusterings(tree_original.levels_nodes[l],
                                             tree_HDBSCAN.levels_nodes[l]),
             'Natural': similarity_clusterings(tree_original.levels_nodes[l],
                                             tree_Natural_c.levels_nodes[l]),
             'OPTICS': similarity_clusterings(tree_original.levels_nodes[l],
                                            tree_OPTICS.levels_nodes[l]),
             'knee': similarity_clusterings(tree_original.levels_nodes[l],
                                          tree_knee.levels_nodes[l])
             }
        resultado_form_metric.append(d)
    

    return {'Point_dataframes':{'original_retag':data_fram_or,
            'Natural_C':df_Natural ,
            'DBSCAN':df_DBSCAN,
            'HDBSCAN':df_HDBSCAN,
            'OPTICS':df_OPTICS,
            'knee':df_knee
           }, 'metric_form': resultado_form_metric,
            'Noise_signal':{
            'original_retag':data_fram_or_sig_noise,
            'Natural_C':df_Natural_sig_noise ,
            'DBSCAN':df_DBSCAN_sig_noise,
            'HDBSCAN':df_HDBSCAN_sig_noise,
            'OPTICS':df_OPTICS_sig_noise,
            'knee':df_knee_sig_noise
            }
           
           }
