# Grouping of components according to their velocity and intensity

This code is taken from Spandan Choudhury's paper for Barnard 5 flow (2023, submitted), with some modifications to optimize it for our case

In [1]:
import numpy as np
from astropy.io import fits
from tqdm.notebook import tqdm # just to see a cute loading bar :) 
import os

In [2]:
# to use this code we need to create those arrays
HC3N1gfile = '../bayes_frame/nested-sampling/HC3N/NGC1333-SE-mle-x1_filtered_QA.fits'
HC3N2gfile = '../bayes_frame/nested-sampling/HC3N/NGC1333-SE-mle-x2_filtered_QA.fits'
HC3N3gfile = '../bayes_frame/nested-sampling/HC3N/NGC1333-SE-mle-x3_filtered_QA.fits'
npeaksfile = '../bayes_frame/nested-sampling/HC3N/npeaks_cut5_noislands_QA.fits'

#master arrays
vel_master_file = 'vel_3cmp_master.fits'
sig_master_file = 'sig_3cmp_master.fits'
tmb_master_file = 'tmb_3cmp_master.fits'
radius_sample = 2
weight = 0.5

overwrite = True

In [3]:
headerHC3N = fits.getheader(HC3N1gfile)
xsize = headerHC3N['NAXIS1']
ysize = headerHC3N['NAXIS2']

params1g = fits.getdata(HC3N1gfile)
params2g = fits.getdata(HC3N2gfile)
params3g = fits.getdata(HC3N3gfile)

npeaks_map = fits.getdata(npeaksfile)
npeaks_map[np.where(np.isnan(npeaks_map))] = 0
mask_npeaksavailable = np.where(npeaks_map!=0, 1, 0)
xarray = np.linspace(0, headerHC3N['NAXIS1']-1, headerHC3N['NAXIS1']).astype(int)
yarray = np.linspace(0, headerHC3N['NAXIS2']-1, headerHC3N['NAXIS2']).astype(int)
XX, YY = np.meshgrid(xarray, yarray)

In [4]:
def dist_ar(xc=None, yc=None):
    """
    returns a 2D array with distance to each pixel from a custom centre pixel, (xc, yc)
    """
    d_ar = np.zeros(npeaks_map.shape)
    for i in range(d_ar.shape[0]):
        for j in range(d_ar.shape[1]):
            d_ar[i,j] = np.hypot(xc - j, yc - i)
        
    return d_ar

# def distance_pix(x, y, x0, y0):
#     return np.sqrt((x-x0)**2 + (y-y0)**2)

### Parameter distance 
#### $dp = \sqrt{\left( w_{\rm v} \times \frac{\rm \Delta v_{LSR}}{\rm v_{norm}} \right)^2 + \left( (1 - w_{\rm v}) \times \frac{\rm \Delta T_{TM}}{\rm T_{MB,norm}} \right)^2}$

In [5]:
def para_dist(v1=None, v2=None, tmb1=None, tmb2=None, vel_norm=0.3, tmb_norm=3, w_vel=0.5):
    """
    returns normalised combined 'distance' in velocity and T_MB
    
    v1, v2 : velocities of the two pixels
    tmb1, tmb2 : main beam brightness temperatures of the two pixels
    
    vel_norm : normalisation factor for difference in velocities
    tmb_norm : normalisation factor for difference in T_MB
    
    w_vel : weight for velocity 'distance'. T_MB diff gets weight (1-w)

    """
    
    vl_dist = np.abs((v1 - v2) / vel_norm)
    tmb_dist = np.abs((tmb1 - tmb2) / tmb_norm)
    
    dist = np.hypot(vl_dist*w_vel, (1-w_vel)*tmb_dist)
    return dist

### read in master arrays

##### * The velocity, velocity dispersion and the $T_{MB}$ for all components are stored in the arrays 'vel_master', 'sig_master', and 'tmb_master' (without any prior sorting)
##### * Both arrays have shapes (3, y, x). Each pixels have 3 values, corresponding to the velocity (or $T_{MB}$) of the 3 components in that pixel. The second (and third) values are NaNs where the pixels have only two (or one) components

In [6]:
headercomp = headerHC3N.copy()
headercomp['NAXIS3'] = 6

# we change this to add the uncertainties in the master files
if os.path.exists(vel_master_file) and os.path.exists(sig_master_file) and os.path.exists(tmb_master_file) and not overwrite:
    vel_master = fits.getdata('velmaster_HC3N.fits')
    sig_master = fits.getdata('sigmaster_HC3N.fits')
    tmb_master = fits.getdata('tmbmaster_HC3N.fits')
else:
    vel_master = np.array([params3g[1], params3g[4], params3g[7], params3g[10], params3g[13], params3g[16]]) # this gives a shape 6, y, x
    sig_master = np.array([params3g[2], params3g[5], params3g[8], params3g[11], params3g[14], params3g[17]])
    tmb_master = np.array([params3g[0], params3g[3], params3g[6], params3g[9], params3g[12], params3g[15]])

    index_params1g = np.where(npeaks_map == 1)
    for y, x in zip(index_params1g[0], index_params1g[1]):
        vel_master[0, y, x] = params1g[1, y, x]
        vel_master[3, y, x] = params1g[4, y, x]
        sig_master[0, y, x] = params1g[2, y, x]
        sig_master[3, y, x] = params1g[5, y, x]
        tmb_master[0, y, x] = params1g[0, y, x]
        tmb_master[3, y, x] = params1g[3, y, x]

    index_params2g = np.where(npeaks_map == 2)
    for y, x in zip(index_params2g[0], index_params2g[1]):
        vel_master[0, y, x] = params2g[1, y, x]
        vel_master[3, y, x] = params2g[7, y, x] # uncertainty
        sig_master[0, y, x] = params2g[2, y, x]
        sig_master[3, y, x] = params2g[8, y, x]# uncertainty
        tmb_master[0, y, x] = params2g[0, y, x]
        tmb_master[3, y, x] = params2g[6, y, x]# uncertainty
        vel_master[1, y, x] = params2g[4, y, x]
        vel_master[4, y, x] = params2g[10, y, x]# uncertainty
        sig_master[1, y, x] = params2g[5, y, x]
        sig_master[4, y, x] = params2g[11, y, x]# uncertainty
        tmb_master[1, y, x] = params2g[3, y, x]
        tmb_master[4, y, x] = params2g[9, y, x] # uncertainty
    fits.writeto('velmaster_HC3N.fits', vel_master, headercomp, overwrite=True)
    fits.writeto('sigmaster_HC3N.fits', sig_master, headercomp, overwrite=True)
    fits.writeto('tmbmaster_HC3N.fits', tmb_master, headercomp, overwrite=True)


### sort the different components based on the combined parameter distance

In [7]:
def sort_nearest_neighbour(x_start=None, y_start=None, mask=None, rad_asign=1, **kargs):
    
    """
    sorts the parameter maps based on combined parametric distance 
    
    
    inputs:
    x_start, y_start : co-ordinates of the pixel to start the sorting         ; required
    mask             : mask within which to do the sorting                    ; optional
    rad_assign.      : radius (in pix) to define the neighbourhood of a pixel ; default is 1
    
    further inputs can be passed into para_dist
    
    outputs :
    vel_sig_tmb_1, vel_sig_tmb_2, vel_sig_tmb_3 : arrays with shapes (3,:,:), corresponding to the component 
                                                  which is kinematically coherent in the largest extent, and the 
                                                  remaining components (which is to be sorted further), respectively
                                                  
                                                  each pixel in each of these arrays has three values, the velocity, 
                                                  velocity dispersion and T_MB at that pixel, for the corresponding 
                                                  copmponent
            
    """
    
    # create arrays to store the components after sorting
  
    vel_sig_tmb_1 = np.ones((6,)+ npeaks_map.shape) * np.nan # was 3
    vel_sig_tmb_2 = np.ones((6,)+ npeaks_map.shape) * np.nan
    vel_sig_tmb_3 = np.ones((6,)+ npeaks_map.shape) * np.nan
    
    # assign the velocity, velocity dispersion and T_MB at the starting pixel
    
    # vel_sig_tmb_1[:,y_start, x_start] = [vel_master[0, y_start, x_start], sig_master[0, y_start, x_start], 
    #                                    tmb_master[0, y_start, x_start]]
    # we already know that the 1st component is all the values where we have only one gaussian fit
    # this will avoid striping in the final result
    vel_sig_tmb_1 = np.array([params1g[1], params1g[2], params1g[0], params1g[4], params1g[5], params1g[3]]) # this is the change we added in v_3
    
    # we make a tracker map to see where we evaluated
    evaluated = np.zeros(np.shape(npeaks_map))
    evaluated[np.where(npeaks_map==1)] = 1
    evaluated[np.where(npeaks_map==0)] = 1 # to avoid evaluating where there are no values
    # create distance array with the reference as the starting pixel : 
    # the value at each pixel is the distance to that pixel from the reference
    
    dist_to_pixels = dist_ar(xc=x_start, yc=y_start)
    
    # apply mask if provided, if not apply a basic mask, to ensure no pixel without a good fit is tried
    
    try:
        y_list, x_list = np.where(mask)
    except :
        y_list, x_list = np.where(tmb_master[0, :, :] > 0)
    
    # distance to pixels within the mask
    dist_1d_array = dist_to_pixels[y_list, x_list]
    print('Loaded arrays and calculated distance')
    
    # sort the indices of the distance array in increasing order
    sorted_index = np.argsort(dist_1d_array)
    print('Determined sorted indexes, starting calculation...')
    """
    The sorting is done in the following steps :
     1. Start from the starting pixel, go to the next pixel in order of distance from the starting pixel, 
        and continue in this order
     
     2. Calculate the mean velocity and Tmb within the radius provided in input. In the first iteration, 
        this is just the velocity and Tmb of the starting pixel
       
     3. Calculate the parameter distances for all three components with respect to the mean velocity and Tmb 
        calculated above
        
     4. Sort the component with the minimum parameter distance to the first array, 
        and the other two components (if present) to the other two arrays. 
    """
    for indx in tqdm(sorted_index): # if you do not have tqdm, use for indx in sorted_index
        
        xi, yi = int(x_list[indx]), int(y_list[indx]) #this is the pixel we will evaluate
        if evaluated[yi, xi]: continue #do not evaluate the same pixel twice
#         dist_i = dist_ar(xc=xi, yc=yi) #this step evaluates all the map, can take too long
        
#         gd_idx = np.where(dist_i <= rad_asign)           
        
#         vel_1_all = vel_sig_tmb_1[0, :, :]
#         tmb_1_all = vel_sig_tmb_1[2, :, :]
        # calculate mean reference values for assigmnent
        # vel_1_mean = np.nanmean(vel_1_all[gd_idx])
        # tmb_1_mean = np.nanmean(tmb_1_all[gd_idx])
        # we change it to this
        vel_1_mean = np.nanmean(vel_sig_tmb_1[0, yi-rad_asign:yi+rad_asign+1, xi-rad_asign:xi+rad_asign+1])
        # this could have problems on the edges but we have no edges
        tmb_1_mean = np.nanmean(vel_sig_tmb_1[2, yi-rad_asign:yi+rad_asign+1, xi-rad_asign:xi+rad_asign+1])
        
        para_dist_1 = para_dist(v1=vel_master[0, yi, xi], tmb1=tmb_master[0, yi, xi], 
                                v2=vel_1_mean, tmb2=tmb_1_mean, **kargs)
        para_dist_2 = para_dist(v1=vel_master[1, yi, xi], tmb1=tmb_master[1, yi, xi], 
                                v2=vel_1_mean, tmb2=tmb_1_mean, **kargs)
        para_dist_3 = para_dist(v1=vel_master[2, yi, xi], tmb1=tmb_master[2, yi, xi], 
                                v2=vel_1_mean, tmb2=tmb_1_mean, **kargs)
        
        para_dist_min = np.nanmin([para_dist_1, para_dist_2, para_dist_3])
        
        if para_dist_1 == para_dist_min:
            vel_sig_tmb_1[:, yi, xi] = [vel_master[0, yi, xi], sig_master[0, yi, xi], tmb_master[0, yi, xi], vel_master[3, yi, xi], sig_master[3, yi, xi], tmb_master[3, yi, xi]]
            vel_sig_tmb_2[:, yi, xi] = [vel_master[1, yi, xi], sig_master[1, yi, xi], tmb_master[1, yi, xi], vel_master[4, yi, xi], sig_master[4, yi, xi], tmb_master[4, yi, xi]]
            vel_sig_tmb_3[:, yi, xi] = [vel_master[2, yi, xi], sig_master[2, yi, xi], tmb_master[2, yi, xi], vel_master[5, yi, xi], sig_master[5, yi, xi], tmb_master[5, yi, xi]]
            
        elif para_dist_2 == para_dist_min:
            vel_sig_tmb_1[:, yi, xi] = [vel_master[1, yi, xi], sig_master[1, yi, xi], tmb_master[1, yi, xi], vel_master[4, yi, xi], sig_master[4, yi, xi], tmb_master[4, yi, xi]]
            vel_sig_tmb_2[:, yi, xi] = [vel_master[2, yi, xi], sig_master[2, yi, xi], tmb_master[2, yi, xi], vel_master[5, yi, xi], sig_master[5, yi, xi], tmb_master[5, yi, xi]]
            vel_sig_tmb_3[:, yi, xi] = [vel_master[0, yi, xi], sig_master[0, yi, xi], tmb_master[0, yi, xi], vel_master[3, yi, xi], sig_master[3, yi, xi], tmb_master[3, yi, xi]]
            
        elif para_dist_3 == para_dist_min:

            vel_sig_tmb_1[:, yi, xi] = [vel_master[2, yi, xi], sig_master[2, yi, xi], tmb_master[2, yi, xi], vel_master[5, yi, xi], sig_master[5, yi, xi], tmb_master[5, yi, xi]]
            vel_sig_tmb_2[:, yi, xi] = [vel_master[0, yi, xi], sig_master[0, yi, xi], tmb_master[0, yi, xi], vel_master[3, yi, xi], sig_master[3, yi, xi], tmb_master[3, yi, xi]]
            vel_sig_tmb_3[:, yi, xi] = [vel_master[1, yi, xi], sig_master[1, yi, xi], tmb_master[1, yi, xi], vel_master[4, yi, xi], sig_master[4, yi, xi], tmb_master[4, yi, xi]]
        
    return vel_sig_tmb_1, vel_sig_tmb_2, vel_sig_tmb_3

## sort the components

In [8]:
averagevel = np.round(np.nanmean(params1g[1]), 0)
averagetemp = np.round(np.nanmean(params1g[0]), 0)
print(averagevel, ' km/s')
print(averagetemp, 'K')

8.0  km/s
1.0 K


In [9]:
# arr1 corresponds to `component 1' in the paper

# starting pixel is selected from a region with only one component, for clear component assignment

arr1, arr2, arr3 = sort_nearest_neighbour(x_start=226, y_start=215, mask=mask_npeaksavailable, w_vel=weight, rad_asign=radius_sample, 
                                             vel_norm=averagevel, tmb_norm=averagetemp)

Loaded arrays and calculated distance
Determined sorted indexes, starting calculation...


  0%|          | 0/51903 [00:00<?, ?it/s]

  vel_1_mean = np.nanmean(vel_sig_tmb_1[0, yi-rad_asign:yi+rad_asign+1, xi-rad_asign:xi+rad_asign+1])
  tmb_1_mean = np.nanmean(vel_sig_tmb_1[2, yi-rad_asign:yi+rad_asign+1, xi-rad_asign:xi+rad_asign+1])
  para_dist_min = np.nanmin([para_dist_1, para_dist_2, para_dist_3])


In [12]:
# we change the order of the arrays because we work with T, vel, sigma instead of vel, sigma, T
arr1_mod = np.zeros(np.shape(arr1)) * np.nan
arr1_mod[0, :, :] = arr1[2, :, :]
arr1_mod[1, :, :] = arr1[0, :, :]
arr1_mod[2, :, :] = arr1[1, :, :]
arr1_mod[3, :, :] = arr1[5, :, :]
arr1_mod[4, :, :] = arr1[3, :, :]
arr1_mod[5, :, :] = arr1[4, :, :]

arr2_mod = np.zeros(np.shape(arr2)) * np.nan
arr2_mod[0, :, :] = arr2[2, :, :]
arr2_mod[1, :, :] = arr2[0, :, :]
arr2_mod[2, :, :] = arr2[1, :, :]
arr2_mod[3, :, :] = arr2[5, :, :]
arr2_mod[4, :, :] = arr2[3, :, :]
arr2_mod[5, :, :] = arr2[4, :, :]

arr3_mod = np.zeros(np.shape(arr3)) * np.nan
arr3_mod[0, :, :] = arr3[2, :, :]
arr3_mod[1, :, :] = arr3[0, :, :]
arr3_mod[2, :, :] = arr3[1, :, :]
arr3_mod[3, :, :] = arr3[5, :, :]
arr3_mod[4, :, :] = arr3[3, :, :]
arr3_mod[5, :, :] = arr3[4, :, :]

fits.writeto('cluster1_HC3N_w{0}_r{1}.fits'.format(weight, radius_sample), arr1_mod, headercomp, overwrite=overwrite)
fits.writeto('cluster2_HC3N_w{0}_r{1}.fits'.format(weight, radius_sample), arr2_mod, headercomp, overwrite=overwrite)
fits.writeto('cluster3_HC3N_w{0}_r{1}.fits'.format(weight, radius_sample), arr3_mod, headercomp, overwrite=overwrite)

This code works, but we know that there are two different layers that in HC3N can be separated. So what we will do in the future is to first use a clustering algorithm to separate the two main bodies of emission and then use spandan's algorithm to assign the straglers to the groups.