In [16]:
import numpy as np
from amuse.lab import generic_unit_converter, nbody_system
from amuse.lab import units as u
from amuse.lab import Particles
import amuse.lab

import matplotlib
matplotlib.use('Agg')
import matplotlib
font = {'family' : 'sans',
        'weight' : 'normal',
        'size'   : 24}

matplotlib.rc('font', **font)
matplotlib.rc({'savefig.dpi':300})
import matplotlib.pyplot as plt

from functools import partial
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import DBSCAN

import sys
import h5py

In [26]:
def find_clusters_with_dbscan(stars, outer_density_limit=1.0 | u.MSun*u.parsec**-3,
                              avg_stellar_mass=0.586,
                              eps=0.4, min_samples=12, leaf_size=30,
                              return_labels=False, debug=False):

    """
    Find all the stars in clusters using
    the DBSCAN implementation from scikit-learn.

    Keyword Arguments:
    stars               -- AMUSE particle set
    outer_density_limit -- If set, use this density_limit
                           in solar masses per parsec^-3 to
                           compute eps to figure
                           out where the cluster edges are
                           instead of the input eps. 
                           A good default choice
                           is 1.0 MSun * pc^-3.
                           Note this setting overrides eps.
    avg_stellar_mass    -- Average stellar mass of the IMF
                           used to make the stars your
                           clustering. Default is 
                           from a Kroupa IMF that goes 
                           from 0.08 to 150 Msun.
    eps                 -- Minimum  neighbor distance to be
                           considered in the cluster in pc.
                           This value is calculated for you
                           if you use outer_density_limit.
    min_samples         -- Minimum number of neighbors to
                           be considered a core particle.
                           Default is 12, the default for
                           DBSCAN.
    leaf_size           -- Number of particles in a leaf
                           on the KD tree the code uses
                           to find neighbors. Default is
                           30, the default for DBSCAN.
    return_labels       -- Also return the raw DBSCAN label output?
    debug               -- Turn on debugging output

    Returns:
    groups        -- A list of particle sets for the particles in each cluster.
    n_groups      -- The number of clusters found.
    labels        -- The actual label indicies returned by DBSCAN,
                     only returned if return_labels=True.
    unique_labels -- The unique labels returned by DBSCAN,
                     only returned if return_labels=True.
    """

    pre = "[find_clusters_with_dbscan]:"

    if (outer_density_limit is not None):

        # The number of samples should
        # be greater than that of
        # the average density of the
        # SN in a pc^3 (~ 0.01, BT), but not as high
        # as that of an open cluster 
        # (~10 Msun / pc^3, Binney and Tremaine)

        # Note the mean number density of the solar
        # neighborhood is 0.17 stars per parsec^3,
        # while the mean in an open cluster is 17 stars
        # per parsec^3, so a good choice is the mean
        # of these in log space, or about 1 star per parsec^-3.

        # So here we are saying they should be at least closer
        # that the average distance between stars in the SN.
        number_density_limit = (outer_density_limit.value_in(u.MSun*u.parsec**-3) 
                             / avg_stellar_mass)

        if (number_density_limit < 0.17):
            print(pre, "WARNING: Your number density limit \
                        at", number_density_limit, "pc^-3 \
                        is smaller than that of the solar \
                        neighborhood, which is ~ 0.17 pc^-3!")

        eps = number_density_limit**(-1./3.)

    if (debug):
        print(pre, "outer_density_limit  =", outer_density_limit)
        print(pre, "avg_stellar_mass     =", avg_stellar_mass)
        print(pre, "number_density_limit =", number_density_limit)
        print(pre, "eps =", eps)

    particle_positions = stars.position.value_in(u.parsec)

    # Note: I don't think its necessary to scale
    #       the inputs for DBSCAN when you are
    #       using a simple Eulerian metric on 3-d
    #       position space, as its just rescaling eps.

    # Get a DBSCAN instance running.
    db = DBSCAN(eps=eps, min_samples=min_samples, leaf_size=leaf_size)
    # Do the clustering.
    clstrs = db.fit_predict(particle_positions)
    # Get the unique cluster lables (i.e. the number of clusters).
    labels = db.labels_
    unique_labels = set(labels) # This returns only the unique ones.
    # Anything with an index of -1 is noise.
    #n_groups = len(filter((lambda x: x>=0),unique_labels))
    n_groups = 0
    groups = []

    for label in unique_labels:
        if (label >= 0): # Don't include noise particles here.
            groups.append(stars[np.where(labels == label)[0]])

    if (debug):
        print(pre, "groups=", groups)
        print(pre, "n_groups=", n_groups)
        print(pre, "labels=", labels)
        print(pre, "unique_labels=", unique_labels)

    if (return_labels):
        return groups, n_groups, labels, unique_labels
    else:
        return groups, n_groups

In [29]:
f = h5py.File('./example_data/L3-50M-2tff_stars.amuse', 'r')
list(f.keys())
dset = f['data']
print(dset)
#find_clusters_with_dbscan(file_,debug=True)

<HDF5 group "/data" (1 members)>


In [36]:
conv = generic_unit_converter.ConvertBetweenGenericAndSiUnits(
        1.0 | u.cm, 1.0 | u.g, 1.0 | u.s)
stars = amuse.io.read_set_from_file("./example_data/L3-50M-2tff_stars.amuse", format='amuse')
groups, n_groups = find_clusters_with_dbscan(stars,debug=True)

[find_clusters_with_dbscan]: outer_density_limit  = 1.0 MSun * parsec**-3
[find_clusters_with_dbscan]: avg_stellar_mass     = 0.586
[find_clusters_with_dbscan]: number_density_limit = 1.70648464164
[find_clusters_with_dbscan]: eps = 0.83682093912
[find_clusters_with_dbscan]: groups= [<amuse.datamodel.particles.ParticlesSubset object at 0x1241395d0>, <amuse.datamodel.particles.ParticlesSubset object at 0x124543cd0>, <amuse.datamodel.particles.ParticlesSubset object at 0x1245431d0>, <amuse.datamodel.particles.ParticlesSubset object at 0x12414ee50>, <amuse.datamodel.particles.ParticlesSubset object at 0x1241431d0>, <amuse.datamodel.particles.ParticlesSubset object at 0x1246dabd0>]
[find_clusters_with_dbscan]: n_groups= 0
[find_clusters_with_dbscan]: labels= [-1 -1  0 ...,  4  4  4]
[find_clusters_with_dbscan]: unique_labels= {0, 1, 2, 3, 4, 5, -1}


In [38]:
print(groups[0].center_of_mass().value_in(u.m))

[  6.98503808e+15  -9.76110951e+16   6.49901847e+16]
