In [48]:
import numpy as np
import math
from timeit import default_timer as timer
import scipy
import random
from random import choices
from scipy import integrate
from scipy import interpolate
from scipy.interpolate import griddata
from scipy.sparse import csr_matrix
from scipy.sparse import dok_matrix
import scipy.stats as st
from scipy.optimize import fsolve
import heapq
import sys
import importlib
import healpy as hp
import pandas as pd

from astropy.time import Time
from time import time as tictoc

import myUnits as myU

from astropy import units as u
from astropy.coordinates import SkyCoord
from astropy.coordinates import *

In [49]:
from astropy.coordinates import SkyCoord
from astroquery.gaia import Gaia
import astropy.units as u
import matplotlib.pyplot as plt

# Importing the Gaia data

In [50]:
df = pd.read_csv('./edr3_data/GaiaSource_603105-603165.csv')

#Drop unnecessary columns. This is simply done for readability, and is not necessary.
columns_to_keep = ['source_id', 'ra', 'dec', 'parallax', 'parallax_error']
df = df[columns_to_keep]
df

Unnamed: 0,source_id,ra,dec,parallax,parallax_error
0,5304967682164367744,134.252001,-56.426537,0.318967,0.070729
1,5304967682164878720,134.250017,-56.441186,1.529219,0.138131
2,5304967682164882432,134.260537,-56.437124,-0.175690,0.200169
3,5304967682164882944,134.242473,-56.435970,0.337269,0.276934
4,5304967682164884608,134.261234,-56.434013,0.342005,0.139389
...,...,...,...,...,...
509439,5305504213791424640,144.753855,-58.361383,1.215089,0.934963
509440,5305504213797704704,144.737992,-58.358070,0.190721,0.038808
509441,5305504213797704960,144.742173,-58.358693,0.295977,0.054708
509442,5305504213798151424,144.742861,-58.357398,,


# Creating the pair catalogue

For each star, we first locate accidental pairs by on-sky proximity. This is the condition
$$|\theta_i - \theta_j| < \theta_\text{min}$$
where $i$ is the index of the foreground star, and $j$ is a background star for a given foreground $i$.
After this first cut, we then impose that the background candidate be behind the foreground at $n_\sigma$. 

$$\varpi_i - \varpi_j > n_\sigma \sqrt{\sigma_{\varpi_i}^2 + \sigma_{\varpi_j}^2}.$$

When $n_\sigma =2$, this corresponds to a 95% confidence level. We can tighten or relax these cuts in order to control the size/purity of the resulting pair catalogue.

In [51]:
def generate_pair_cat(df, angle_cutoff, n_sigma):
    #Note that angle_cutoff is measured in arcseconds.
    
    ra_arr = np.asarray(df['ra'])
    dec_arr = np.asarray(df['dec'])
    coord1 = SkyCoord(ra_arr, dec_arr, unit = u.degree)
    
    #Search df for on-sky neighbors within angle_cutoff arsec
    z = search_around_sky(coord1, coord1, angle_cutoff*u.arcsec, storekdtree = False)
    
    #The above snippet will count a foreground star as its own neighbor, so we must remove them:
    idx = z[0][z[0] != z[1]]
    dub = z[1][z[0] != z[1]]
    
    df_fore = df.iloc[idx]
    df_back = df.iloc[dub]
    
    df_fore.reset_index(inplace = True, drop=True)
    df_back.reset_index(inplace = True, drop=True)
    
    #Define a function to iterate over the foreground/background df's and check if they satisfy the parallax condition

    is_behind = lambda par1, par2, err1, err2 : par1-par2 > n_sigma*np.sqrt(err1**2 + err2**2)
    is_behind_list = is_behind(df_fore['parallax'], df_back['parallax'], df_fore['parallax_error'], df_back['parallax_error'])
    
    #Keep pairs that satisfy the parallax condition within n_sigma. 
    df_fore = df_fore[is_behind_list]
    df_back = df_back[is_behind_list]
    
    #Concatenate the foreground and background list into one catalogue.
    new_cols = [x+".1" for x in df_back.columns]
    df_back.columns= new_cols
    pair_cat = pd.concat([df_fore,df_back], axis = 1)
    pair_cat.reset_index(inplace =True, drop = True)
    return pair_cat

In [55]:
pair_cat = generate_pair_cat(df, 3, 2) #cutoff at 3 arcsec, 95% CL

In [56]:
pair_cat

Unnamed: 0,source_id,ra,dec,parallax,parallax_error,source_id.1,ra.1,dec.1,parallax.1,parallax_error.1
0,5304967922683112448,134.275744,-56.389630,1.307634,0.196366,5304967926974456960,134.275101,-56.389528,0.295811,0.145955
1,5304968098772691712,134.246457,-56.383416,0.586528,0.161239,5304968098787833472,134.246647,-56.383230,0.042422,0.091903
2,5304968133147580160,134.281739,-56.376071,0.822383,0.022145,5304968133147579648,134.280492,-56.375880,0.183617,0.072940
3,5304968201871238016,134.258255,-56.369417,0.679974,0.185373,5304968201867055104,134.258391,-56.368877,0.226865,0.043337
4,5304968266280500608,134.333362,-56.386462,0.995317,0.026125,5304968270586659072,134.333619,-56.385991,0.443511,0.095909
...,...,...,...,...,...,...,...,...,...,...
12882,5305503973281272192,144.720171,-58.403162,4.689873,0.843620,5305503973279517568,144.721090,-58.403769,0.203858,0.024184
12883,5305504110718481280,144.736292,-58.383379,0.613025,0.079659,5305504110709492608,144.737333,-58.383246,0.124551,0.154694
12884,5305504179428964480,144.718119,-58.375064,0.879671,0.353307,5305504179437956736,144.717399,-58.374794,0.125856,0.127153
12885,5305504179437962368,144.735196,-58.371426,1.570155,0.055822,5305504179428982784,144.735782,-58.371606,0.546197,0.201775


In [54]:
len(pair_cat)/len(df)

0.025296205274770143