In [1]:
import numpy as np
import pandas as pd
from astropy.coordinates import *
import astropy.units as u
from os import listdir
import sys

In [2]:
edr3_data = './edr3_data'
dr2_data = './dr2_data'
hist_res_dir = './hist_stats/'
#edr3_data = '/Users/crimondino/Dropbox (PI)/MyLensVelocity2/data/acc_catalog/edr3/'
#dr2_data = '/Users/crimondino/Dropbox (PI)/MyLensVelocity2/data/acc_catalog/dr2/'
#hist_res_dir = '/Users/crimondino/Dropbox (PI)/MyLensVelocity2/lists/hist_stats/'

In [5]:
### Read in the eDR3 file index from the command line
current_index = int(sys.argv[1]) # current index in list of edr3 files
print('\nReading in eDR3 file '+str(current_index)+'.'); sys.stdout.flush()


Reading in eDR3 file 0.


# Loading in a single EDR3 file

In [6]:
list_dr3_files = listdir(edr3_data)

In [7]:
healpix_edr3_start = np.empty((len(list_dr3_files)),dtype= int)
healpix_edr3_end = np.empty((len(list_dr3_files)), dtype = int)

for i,file in enumerate(list_dr3_files):
    int_1 = int(file[11:17])
    int_2 = int(file[18:24])
    healpix_edr3_start[i] = int_1
    healpix_edr3_end[i] = int_2
    

In [8]:
def get_source_ids(file_names):
    #given a list of EDR3 filenames, return the start and end source IDs corresponding to healpix level 12
    N_8 = 2**(59-16)
    
    start = np.array([x*N_8 for x in healpix_edr3_start], dtype = 'int')
    end = np.array([x*N_8 for x in healpix_edr3_end], dtype = 'int')
    return start, end

In [9]:
def load_dr3_file(idx):
    return pd.read_csv(edr3_data + '/' + list_dr3_files[idx], compression = 'gzip')

In [10]:
start, end = get_source_ids(list_dr3_files)

### Call the functions

In [13]:
dr3 = load_dr3_file(current_index)

14 corresponding files


# Generate Pair Catalogue

For each star, we first locate accidental pairs by on-sky proximity. This is the condition
$$|\theta_i - \theta_j| < \theta_\text{min}$$
where $i$ is the index of the foreground star, and $j$ is a background star for a given foreground $i$.
After this first cut, we then impose that the background candidate be behind the foreground at $n_\sigma$. 

$$\varpi_i - \varpi_j > n_\sigma \sqrt{\sigma_{\varpi_i}^2 + \sigma_{\varpi_j}^2}.$$

When $n_\sigma =2$, this corresponds to a 95% confidence level. We can tighten or relax these cuts in order to control the size/purity of the resulting pair catalogue.

The above assumes that $\sigma_{\varpi_i}$ and $\sigma_{\varpi_j}$ have zero correlation. A stricter condition would be to assume that they had correlation = 1. If so, then the above formula becomes 
$$\varpi_i - n_\sigma \sigma_{\varpi_i} > \varpi_i + n_\sigma \sigma_{\varpi_j}.$$
This results in fewer pairs.

In [14]:
def generate_pair_cat(df, angle_cutoff, n_sigma):
    #Note that angle_cutoff is measured in arcseconds.
    
    ra_arr = np.asarray(df['ra'])
    dec_arr = np.asarray(df['dec'])
    coord1 = SkyCoord(ra_arr, dec_arr, unit = u.degree)
    
    #Search df for on-sky neighbors within angle_cutoff arsec
    z = search_around_sky(coord1, coord1, angle_cutoff*u.arcsec, storekdtree = False)
    
    #The above snippet will count a foreground star as its own neighbor, so we must remove them:
    idx = z[0][z[0] != z[1]]
    dub = z[1][z[0] != z[1]]
    
    df_fore = df.iloc[idx]
    df_back = df.iloc[dub]
    
    df_fore.reset_index(inplace = True, drop=True)
    df_back.reset_index(inplace = True, drop=True)
    
    #Define a function to iterate over the foreground/background df's and check if they satisfy the parallax condition

    is_behind = lambda par1, par2, err1, err2 : par1-par2 > n_sigma*np.sqrt(err1**2 + err2**2)
    is_behind_list = is_behind(df_fore['parallax'], df_back['parallax'], df_fore['parallax_error'], df_back['parallax_error'])
    
    #Keep pairs that satisfy the parallax condition within n_sigma. 
    df_fore = df_fore[is_behind_list]
    df_back = df_back[is_behind_list]
    
    #Concatenate the foreground and background list into one catalogue.
    new_fg_cols = [x+"_fg" for x in df_fore.columns]
    df_fore.columns= new_fg_cols
    
    new_bg_cols = [x+"_bg" for x in df_back.columns]
    df_back.columns= new_bg_cols
    
    pair_cat = pd.concat([df_fore,df_back], axis = 1)
    pair_cat.reset_index(inplace =True, drop = True)
    return pair_cat

# Make pair catalogue

In [25]:
pair_cat = generate_pair_cat(dr3, 3, 2) # cutoff at 3 arcsec, 2 sigma (95% CL).

# Export catalog to .csv file

In [555]:
pair_cat_name = 'pairs_' + str(healpix_edr3_start[current_index]) +'-'+ str(healpix_edr3_end[current_index])
pair_cat.to_csv('./accidental_pairs/' + pair_cat_name)