In [1]:
from my_units import * 
from angular_fn import *
from data_cleaning_fn import *
from sky_patch_class import *

HomeDir = '../../'
DataDir = '../../../../data/' ### Set this to the directory where you store your data files
ListDir = HomeDir+'lists/'
FigDir = HomeDir+'figs/'

# Example of iterative background subtraction and computation of the effective weights

## Preamble

In [2]:
### Define the patch on the sky where the analysis is done. Currently only works with a circle selection on the sphere.
#sky_p = sky_patch(81.28, -69.78, 5*degree, 50*kpc, 'LMC_disc_5', np.array([1.871, 0.391]), pm_esc=0.2) ### For the LMC
sky_p = sky_patch(12.80, -73.15, 4*degree, 60*kpc, 'SMC_disc_4', np.array([0.686, -1.237]), pm_esc=0.2) ### For the SMC

In [3]:
### Parameters for data cleaning
beta_kernel_sub_0 = 0.1*degree; beta_kernel_sub = 0.06*degree;   # gaussian kernels for background subtraction 
n_sigma_out = 3;                                                 # number of sigmas for outlier removal
n_iter_sub = 3;                                                  # number of iterations for the background subtraction
disc_radius_no_edge = sky_p.disc_radius - beta_kernel_sub_0 - (n_iter_sub+1)*beta_kernel_sub
gmag_bin_size=0.1; rad_bin_size=1                                # g mag and radial bins used to compute the effective dispersion

In [4]:
### Import the data
data = pd.read_csv(DataDir+sky_p.data_file_name+'_clean.csv')  
data.shape

(2419001, 22)

## Execution

In [5]:
### Subtract the average pm and parallax and remove the outliers using a gaussian distance kernel of size beta_kernel_sub_0 = 0.06 deg, iterating 3 times
### Prepare the data for the iterative background subtraction and outlier removal
disc_pix, nb_pixel_list, n = fn_prepare_back_sub(data, sky_p.disc_center, sky_p.disc_radius, beta_kernel_sub)

In [6]:
### Iterative background subtraction and outlier removal
for i in range(n_iter_sub):
    fn_back_field_sub(data, disc_pix, nb_pixel_list, n, beta_kernel=beta_kernel_sub, sub=True) ### sub=True can be used only after this function has been already called once with sub=False
    data, f_out = fn_rem_outliers(data, sky_p.pm_esc, sky_p.distance/kpc, n_sigma_out)
    #data, f_out = fn_rem_outliers(data, sky_p.pm_esc, 50, n_sigma_out) ### this is what I was using before
    print('Iter '+str(i)+' -- fraction of outliers removed: '+str(f_out*100)[:7]+' %')
    
fn_back_field_sub(data, disc_pix, nb_pixel_list, n, beta_kernel=beta_kernel_sub, sub=True)
data.shape

sub is set to True.


100%|██████████| 246575/246575 [01:31<00:00, 2687.16it/s]


Iter 0 -- fraction of outliers removed: 3.76361 %
sub is set to True.


100%|██████████| 246575/246575 [01:33<00:00, 2625.05it/s]


Iter 1 -- fraction of outliers removed: 0.01529 %
sub is set to True.


100%|██████████| 246575/246575 [01:32<00:00, 2666.46it/s]


Iter 2 -- fraction of outliers removed: 0.00893 %
sub is set to True.


100%|██████████| 246575/246575 [01:34<00:00, 2606.43it/s]


(2327395, 23)

In [7]:
### Remove stars at the boundary to avoid edge effects due to gaussian kernel field subtraction
data = fn_rem_edges(data, sky_p.disc_center, disc_radius_no_edge)
data.shape ### it used to be (2296652, 23)

(2296580, 23)

In [8]:
### Compute the effective weights
fn_effective_w(data, sky_p.disc_center, gmag_bin_size, rad_bin_size)
data.shape

(2296580, 25)

In [88]:
# Exporting the final data file as a csv file
#data.to_csv(DataDir+sky_p.data_file_name+'_final.csv', index=False)

In [89]:
### Export the final data for the template scan - saving to a npy file makes it much faster to upload.
### Saving only the quantities that will be used to compute the template
#np.save(DataDir+sky_p.data_file_name+'_final', np.array([data['ra'].to_numpy(), data['dec'].to_numpy(), data['ecl_lon'].to_numpy(), data['ecl_lat'].to_numpy(), 
#                                                         (data['pm_eff_error'].to_numpy())**2, data['pmra_sub'].to_numpy()/data['pm_eff_error'].to_numpy()**2, data['pmdec_sub'].to_numpy()/data['pm_eff_error'].to_numpy()**2,
#                                                         (data['parallax_eff_error'].to_numpy())**2, data['parallax_sub'].to_numpy()/data['parallax_eff_error'].to_numpy()**2]).T)