# 1.3 Clusters with age between $1 × 10^{9}$ and $2 × 10^{9}$ yrs

In [2]:
from astropy import units as u
from astropy.coordinates import SkyCoord
from sklearn.neighbors import NearestNeighbors
import scipy as sp
from scipy.spatial import distance
from scipy import stats
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import mpl_scatter_density 
from matplotlib.colors import LinearSegmentedColormap
from scipy.optimize import curve_fit
from sklearn.mixture import GaussianMixture

import warnings
warnings.filterwarnings('ignore')

## Functions

In [3]:
def preprocess_cluster(data, g_mean_th=18):
    """
    ------
    parallax > 0 

    phot_g_mean_mag < g_mean_th
    ------
    """
    data = data[data['parallax'] > 0]
    data = data[data['phot_g_mean_mag'] < g_mean_th]
    data['Gmg'] = data['phot_g_mean_mag'] + (5 * np.log10(data['parallax']) - 10)
    data['L'] = 10**(0.4*(4.83 - data['Gmg']))
    print(len(data))
    return data


def cmd_plot(data, x_axis, y_axis, alpha=0.8, s=5):
    """
    -------
    plot isochrone
    -------
    """

    with plt.style.context(['ieee']):
        fig = plt.figure(figsize=(6,6), dpi=100)
        sns.scatterplot(data=data, y=y_axis, x=x_axis, alpha=alpha, s=s)
        plt.gca().invert_yaxis();
        
        
def joint_plot(data):

    plt.figure(dpi=90)
    sns.jointplot(
        data=data,
        x="pmra", y="pmdec",
        kind="kde"
        );
    
    
def fit_curve(data, column, bins = 100):

    plt.figure(figsize=(12,3), dpi=120)
    counts, bins, patches = plt.hist(data[column], bins = bins)

    # Define the Gaussian function
    def gaussian(x, amp, mu, sigma):
        return amp * np.exp(-(x - mu)**2 / (2 * sigma**2))
    
    # data
    x_data = bins[:-1]
    y_data = counts
    
    # Fit the Gaussian function to the data
    popt, pcov = curve_fit(gaussian, x_data, y_data)
    
    plt.figure(figsize=(12,3), dpi=120)
    # Plot the original data and the fitted curve
    sns.scatterplot(x_data, y_data, label=column)
    plt.plot(x_data, gaussian(x_data, *popt), color='red', label='Fit')
    plt.legend()
    plt.show()
    
    return popt


def guassian_filter(data, column, mu, std):    
    up = round(mu + 3 * std, 2)
    low = round(mu - 3 * std, 2)
    
    if up > low:
        print('upper bound:', up) 
        print('lower bound:', low)
        df = data[(data[column] < up) & (data[column] > low)]
    else:
        print('upper bound:', low) 
        print('lower bound:', up)
        df = data[(data[column]< low) & (data[column] > up)]
    
    print('cluster length:', len(df))
    return df


def luminosity_density(cluster_3d, clusterdf):
    """
    --------
    cluster_3d --> measuring distance in that dataframe
    
    clusterdf --> main dataframe
    -------
    """
    # create a NearestNeighbors object and fit the dataset
    nbrs = NearestNeighbors(n_neighbors=6, metric='minkowski').fit(cluster_3d)

    # find the 5 nearest neighbors for each data point including itself
    distances, indices = nbrs.kneighbors(cluster_3d)

    # find maximum distance among 5 neighbors
    max_distances = np.amax(distances, axis=1)

    # sphere of that max distance
    spheres = (4/3) * np.pi * (max_distances ** 3)

    # sum of luminosities of each 6 nn
    lum_sum = []
    for i in range(len(clusterdf)):
        lum_sum.append(np.sum(clusterdf.iloc[indices[i]]['L']))

    # luminosity density
    lum_dens = lum_sum / spheres

    return lum_dens



def lum_plot(data):
    """
    --------
    plot for luminosity density profile
    --------
    """    
    
    with plt.style.context(['ieee']):
        plt.figure(figsize=(12,6), dpi=200)
        plt.plot(range(len(data)), np.sort(data))
        plt.ylabel('ΔL/ΔV')
        plt.title('luminosity density profile');

## open cluster names

In [9]:
open_clusters = pd.read_excel('../../data/open clusters table.xlsx')

open_clusters = open_clusters.dropna(axis=0, subset=['Name', 'logt']).drop(index=0)
open_clusters['logt'] = open_clusters.logt.astype(float)

x = open_clusters[(10**open_clusters['logt'] < 2 * 10**9) & (10**open_clusters['logt'] >= 1 * 10**9)].index
open_clusters.iloc[x].dropna(axis=1).head(60)

Unnamed: 0,Name,RA,DEC,l,b,d,EBV,logt,D
66,ASCC 49,08 47 52,-37 59 24,259.31,3.47,900,0.02,8.14,54.0
110,ASCC 93,18 08 13,-22 15 36,8.33,-1.04,2500,0.28,7.22,32.4
176,Berkeley 18,05 22 12,+45 24 00,163.63,5.02,5800,0.46,9.63,12.0
183,Berkeley 27,06 51 18,+05 46 00,207.78,2.6,5035,0.05,9.3,2.0
186,Berkeley 30,06 57 42,+03 13 00,210.78,2.86,4790,0.5,8.48,3.0
188,Berkeley 32,06 58 06,+06 26 00,207.95,4.4,3100,0.16,9.53,6.0
200,Berkeley 62,01 01 00,+63 57 00,123.98,1.1,1837,0.852,7.185,5.0
205,Berkeley 68,04 44 30,+42 04 00,162.13,-2.41,1678,0.671,8.391,12.0
208,Berkeley 71,05 40 56,+32 16 42,176.63,0.9,3900,0.85,8.8,5.0
217,Berkeley 93,21 56 12,+63 56 00,105.07,7.32,5600,1.5,8.0,2.0
