In [None]:
import matplotlib.pyplot as plt
from sklearn.preprocessing import RobustScaler
#from sklearn.preprocessing import StandardScaler
import numpy as np
# Suppress warnings. Comment this out if you wish to see the warning messages
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
from sklearn.cluster import HDBSCAN
import astropy.units as u
import astropy.coordinates as apycoords
import os
import glob
# from zero_point import zpt
from tqdm import tqdm
from sklearn.cluster import DBSCAN

In [None]:
df = pd.DataFrame() # full gaia sample

In [None]:
clustering_on = ['ra','dec','pmra','pmdec','parallax']

data = df[clustering_on]
data = RobustScaler().fit_transform(data)

# DBSCAN

In [None]:
db = DBSCAN(eps = 0.4, min_samples = 8, algorithm='ball_tree').fit(data) # n_jobs = 4

In [None]:
labels = db.labels_ 
df['label_db'] = labels
for i in list(set(labels)):
    print('{} {}'.format(i,len(df[(df['label_db'] == i)])))

In [None]:
unique_labels = set(labels)
colors = [plt.cm.Spectral(each) for each in np.linspace(0, 1, len(unique_labels))]

plt.figure(figsize=(5,5))
for i, j in zip(list(unique_labels), colors):
    if not i == -1:
        plt.plot(df['pmra'][(df['Clusters'] == i)], df['pmdec'][(df['Clusters'] == i)], 
         markerfacecolor = tuple(j),marker = 'o', linestyle='None',markersize = 5, alpha = 1)
plt.xlabel("pmra [mas $yr^{-1}$]",  fontsize = 14)
plt.ylabel("pmdec [mas $yr^{-1}$]",  fontsize = 14)#,rotation=0)
plt.tight_layout()
plt.grid()
plt.show() 

# HDBSCAN

In [None]:
hd = HDBSCAN(min_cluster_size=20, min_samples=5, metric='mahalanobis', metric_params={'V': np.cov(data, rowvar=False)}).fit(data)

In [None]:
labels = hd.labels_ 
df['label_hb'] = labels
for i in list(set(labels)):
    print('{} {}'.format(i,len(df[(df['label_hb'] == i)])))

In [None]:
unique_labels = set(labels)
colors = [plt.cm.Spectral(each) for each in np.linspace(0, 1, len(unique_labels))]

plt.figure(figsize=(5,5))
for i, j in zip(list(unique_labels), colors):
    if not i == -1:
        plt.plot(df['pmra'][(df['Clusters'] == i)], df['pmdec'][(df['Clusters'] == i)], 
         markerfacecolor = tuple(j),marker = 'o', linestyle='None',markersize = 5, alpha = 1)
plt.xlabel("pmra [mas $yr^{-1}$]",  fontsize = 14)
plt.ylabel("pmdec [mas $yr^{-1}$]",  fontsize = 14)#,rotation=0)
plt.tight_layout()
plt.grid()
plt.show() 