In [1]:
from time import time as tictoc
import sys
from sky_patch_class import *
from my_units import * 
from angular_fn import *
from template_fn import *
from sim_setup_fn import *
from sim_injection_fn import *
from data_cleaning_fn import *
from sim_analysis_fn import *

HomeDir = '../'
DataDir = '../../../../data/' #HomeDir+'data/'
ListDir = HomeDir+'lists/'
ListResDir = HomeDir+'lists/sim/'

Notebook to generate mock data with proper motion noise (no signal). It can be run locally.

Takes as input:
* data_file_name+'_clean.npy', the cleaned data generated in the notebook data_cleaning.ipynb

where data_file_name are 'LMC_disc_5' or 'SMC_disc_4'. Generate as output 
* data_file_name+'_sim_noise_final.npy', to be used as input for the mock data template scan (data_template_scan.ipynb and data_template_scan_fine.ipynb)

# Noise only simulation - LMC

## Preamble

In [2]:
### Define the patch on the sky where the analysis is done. Currently only works with a circle selection on the sphere.
### Parameters taken from the paper Gaia Early Data Release 3: Structure and properties of the Magellanic Clouds (see Table 4)
sky_p = sky_patch(81.28, -69.78, 5*degree, 50*kpc, 'LMC_disc_5', np.array([1.871, 0.391]), pm_esc = 0.2, sigma_pm = 0.125) ### For the LMC

In [6]:
### Parameters for data cleaning
beta_kernel_sub_0 = 0.1*degree; beta_kernel_sub = 0.06*degree;   # gaussian kernels for background subtraction 
n_sigma_out = 3;                                                 # number of sigmas for outlier removal
n_iter_sub = 3;                                                  # number of iterations for the background subtraction
disc_radius_no_edge = sky_p.disc_radius - beta_kernel_sub_0 - 2*(n_iter_sub+1)*beta_kernel_sub
gmag_bin_size=0.1; rad_bin_size=1                                # g mag and radial bins used to compute the effective dispersion

gmag_bin_size_noise = 0.05                                       # g mag bin size used to inject the noise 

In [4]:
### Loading the data -- loading an npy file is much faster than loading the csv file with pd.rad_csv
data_np = np.load(DataDir+sky_p.data_file_name+'_clean.npy')
columns_df = ['ra', 'dec', 'pmra', 'pmdec', 'pmra_error', 'pmdec_error', 'phot_g_mean_mag', 'pmra_sub', 'pmdec_sub']
data = pd.DataFrame(data_np, columns=columns_df)

## Execution

In [7]:
print('Preparing the mock data..'); 
### Injecting the noise
fn_noise_inj(data, sky_p, gmag_bin_size_noise, rad_bin_size, noise=True)


Preparing the mock data..


In [8]:
### Subtract the average pm and remove the outliers using a gaussian distance kernel of size beta_kernel_sub_0 = 0.06 deg, iterating 3 times
### Prepare the data for the iterative background subtraction and outlier removal
disc_pix, nb_pixel_list, n = fn_prepare_back_sub(data, sky_p.disc_center, sky_p.disc_radius, beta_kernel_sub)

In [9]:
### Iterative background subtraction and outlier removal
for i in range(n_iter_sub):
    fn_back_field_sub(data, disc_pix, nb_pixel_list, n, beta_kernel=beta_kernel_sub, sub=False, sim=True) ### sub=True can be used only after this function has been already called once with sub=False
    data, f_out = fn_rem_outliers(data, sky_p.pm_esc, sky_p.distance/kpc, n_sigma_out, sim=True)
    print('Iter '+str(i)+' -- fraction of outliers removed: '+str(f_out*100)[:7]+' %')
    
fn_back_field_sub(data, disc_pix, nb_pixel_list, n, beta_kernel=beta_kernel_sub, sub=False, sim=True)
data.shape

Iter 0 -- fraction of outliers removed: 1.47078 %
Iter 1 -- fraction of outliers removed: 0.00340 %
Iter 2 -- fraction of outliers removed: 0.00162 %


(12525856, 12)

In [10]:
### Remove stars at the boundary to avoid edge effects due to gaussian kernel field subtraction
data = fn_rem_edges(data, sky_p.disc_center, disc_radius_no_edge)
data.shape 

(11998102, 12)

In [11]:
### Compute the effective weights
fn_effective_w(data, sky_p.disc_center, gmag_bin_size, rad_bin_size, sim=True)
data.shape

(11998102, 13)

In [12]:
### Export the final data for the template scan - saving to a npy file makes it much faster to upload.
### Saving only the quantities that will be used to compute the template
np.save(DataDir+sky_p.data_file_name+'_sim_noise_final', np.array([data['ra'].to_numpy(), data['dec'].to_numpy(), (data['pm_eff_error'].to_numpy())**2, 
                                                                   data['pmra_sim'].to_numpy()/data['pm_eff_error'].to_numpy()**2, 
                                                                   data['pmdec_sim'].to_numpy()/data['pm_eff_error'].to_numpy()**2]).T)

# Noise only simulation - SMC

## Preamble

In [2]:
### Define the patch on the sky where the analysis is done. Currently only works with a circle selection on the sphere.
### Parameters taken from the paper Gaia Early Data Release 3: Structure and properties of the Magellanic Clouds (see Table 4)
sky_p = sky_patch(12.80, -73.15, 4*degree, 60*kpc, 'SMC_disc_4', np.array([0.686, -1.237]), pm_esc = 0.2, sigma_pm = 0.105) ### For the SMC

In [3]:
### Parameters for data cleaning
beta_kernel_sub_0 = 0.1*degree; beta_kernel_sub = 0.06*degree;   # gaussian kernels for background subtraction 
n_sigma_out = 3;                                                 # number of sigmas for outlier removal
n_iter_sub = 3;                                                  # number of iterations for the background subtraction
disc_radius_no_edge = sky_p.disc_radius - beta_kernel_sub_0 - 2*(n_iter_sub+1)*beta_kernel_sub
gmag_bin_size=0.1; rad_bin_size=1                                # g mag and radial bins used to compute the effective dispersion

gmag_bin_size_noise = 0.05                                       # g mag bin size used to inject the noise 

In [4]:
### Loading the data -- loading an npy file is much faster than loading the csv file with pd.rad_csv
data_np = np.load(DataDir+sky_p.data_file_name+'_clean.npy')
columns_df = ['ra', 'dec', 'pmra', 'pmdec', 'pmra_error', 'pmdec_error', 'phot_g_mean_mag', 'pmra_sub', 'pmdec_sub']
data = pd.DataFrame(data_np, columns=columns_df)

## Execution

In [5]:
print('Preparing the mock data..'); 
### Injecting the noise
fn_noise_inj(data, sky_p, gmag_bin_size_noise, rad_bin_size, noise=True)

Preparing the mock data..


In [6]:
# For SMC only: cut on the pm to remove stars from the foreground globular clusters
if sky_p.data_file_name == 'SMC_disc_4':   
    orig_len = len(data)
    data = data[(data['pmra_sim'] < 0.685 + 2) & (data['pmra_sim'] > 0.685 - 2) &
                (data['pmdec_sim'] < -1.230 + 2) & (data['pmdec_sim'] > -1.230 - 2)]
    print(len(data)/orig_len)

0.9118251685393258


In [7]:
### Subtract the average pm and remove the outliers using a gaussian distance kernel of size beta_kernel_sub_0 = 0.06 deg, iterating 3 times
### Prepare the data for the iterative background subtraction and outlier removal
disc_pix, nb_pixel_list, n = fn_prepare_back_sub(data, sky_p.disc_center, sky_p.disc_radius, beta_kernel_sub)

In [8]:
### Iterative background subtraction and outlier removal
for i in range(n_iter_sub):
    fn_back_field_sub(data, disc_pix, nb_pixel_list, n, beta_kernel=beta_kernel_sub, sub=False, sim=True) ### sub=True can be used only after this function has been already called once with sub=False
    data, f_out = fn_rem_outliers(data, sky_p.pm_esc, sky_p.distance/kpc, n_sigma_out, sim=True)
    print('Iter '+str(i)+' -- fraction of outliers removed: '+str(f_out*100)[:7]+' %')
    
fn_back_field_sub(data, disc_pix, nb_pixel_list, n, beta_kernel=beta_kernel_sub, sub=False, sim=True)
data.shape

Iter 0 -- fraction of outliers removed: 0.17680 %
Iter 1 -- fraction of outliers removed: 0.00083 %
Iter 2 -- fraction of outliers removed: 0.00069 %


(2025193, 12)

In [9]:
### Remove stars at the boundary to avoid edge effects due to gaussian kernel field subtraction
data = fn_rem_edges(data, sky_p.disc_center, disc_radius_no_edge)
data.shape 

(2001813, 12)

In [10]:
### Compute the effective weights
fn_effective_w(data, sky_p.disc_center, gmag_bin_size, rad_bin_size, sim=True)
data.shape

(2001813, 13)

In [12]:
### Export the final data for the template scan - saving to a npy file makes it much faster to upload.
### Saving only the quantities that will be used to compute the template
np.save(DataDir+sky_p.data_file_name+'_sim_noise_final', np.array([data['ra'].to_numpy(), data['dec'].to_numpy(), (data['pm_eff_error'].to_numpy())**2, 
                                                                   data['pmra_sim'].to_numpy()/data['pm_eff_error'].to_numpy()**2, 
                                                                   data['pmdec_sim'].to_numpy()/data['pm_eff_error'].to_numpy()**2]).T)

In [13]:
# Exporting the final data as a csv file. This is not going to be needed in the analysis, but can be saved for reference.
data.to_csv(DataDir+sky_p.data_file_name+'_sim_noise_clean.csv', index=False)