* `crossmatch_TOI_with_GaiaDR3.ipynb` attempts to cross-match based on position and magnitude difference.
* here result of previous cross-match from exofop or vizier is used

Last run: 241115

In [1]:
import sys

!which {sys.executable}

/ut3/jerome/miniconda3/envs/wakai/bin/python


In [2]:
import sys
sys.path.insert(0, '../code')

# TOI catalog

In [22]:
from catalog import get_tois

tois = get_tois(clobber=False)
tois.shape

(6258, 66)

In [24]:
tois.tail()

Unnamed: 0,TIC ID,TOI,Previous CTOI,Master,SG1A,SG1B,SG2,SG3,SG4,SG5,...,Stellar Mass (M_Sun) err,Sectors,Date TOI Alerted (UTC),Date TOI Updated (UTC),Date Modified,Comments,ra_deg,dec_deg,GaiaDR3_exofop,GaiaDR3_vizier
7336,444159424,7159.01,,3,4,3,3,1,4,4,...,,16175777,2024-10-24,2024-10-23,2024-11-13 12:03:06,found in faint-star QLP search,342.520542,58.906175,,
7337,154264903,7160.01,,3,4,3,3,1,4,4,...,,161756577677,2024-10-24,2024-10-23,2024-11-13 12:03:04,found in faint-star QLP search,342.05225,47.170483,,
7338,415461195,7161.01,,3,4,3,3,1,4,4,...,,1617577677,2024-10-24,2024-10-23,2024-11-13 12:03:08,found in faint-star QLP search,337.314042,56.014714,,
7339,341005781,7162.01,,3,4,3,3,1,4,4,...,0.028465,56577677,2024-10-24,2024-10-23,2024-11-13 12:03:04,found in faint-star QLP search,314.724625,59.595297,,
7340,343774169,7163.01,,3,4,3,3,1,4,4,...,0.107367,16175777,2024-10-24,2024-10-23,2024-11-13 12:03:09,found in faint-star QLP search,341.716958,54.011836,,


# cross-match/query TOI info from exofop

* CAUTION: might hit rate limit set by exofop

In [5]:
from catalog import get_tfop_info

tfop_info = get_tfop_info("TOI-4364")
tfop_info['basic_info']

{'tic_id': '4070275',
 'star_names': 'TIC 4070275, 2MASS J05201599-0414220, APASS 757852, Gaia DR2 3210444215030339584, TOI-4364, UCAC4 429-009115, WISE J052016.03-041421.5',
 'confirmed_planets': '',
 'k2_campaign': '',
 'tic_contamination_ratio': '0.165363'}

In [6]:
import math
import json
from urllib.request import urlopen
from tqdm import tqdm

def get_tfop_info(target_name: str) -> dict:
    base_url = "https://exofop.ipac.caltech.edu/tess"
    url = f"{base_url}/target.php?id={target_name.replace(' ','')}&json"
    response = urlopen(url)
    assert response.code == 200, "Failed to get data from ExoFOP-TESS"
    try:
        data_json = json.loads(response.read())
        return data_json
    except Exception:
        #raise ValueError(f"No TIC data found for {target_name}")
        return
        
def get_GaiaDR2_id_from_exofop(tfop_info):
    """Do not use swifter
    """
    if tfop_info:
        try:
            names = tfop_info['basic_info'].get('star_names').split(', ')
            gaia = [i for i in names if i[:4]=='Gaia']
            if len(gaia)>0:
                return int(gaia[0].split()[-1])
        except Exception as e:
            print(e)
            return None

def update_toi_GaiaDR2_exofop(tois):
    for i,row in tqdm(tois.iterrows()):
        if math.isnan(row['GaiaDR3_exofop']):
            tfop_info = get_tfop_info(f"TIC {row['TIC ID']}")
            tois.loc[i,'GaiaDR3_exofop'] = get_GaiaDR2_id_from_exofop(tfop_info)
    return tois

In [7]:
tois = update_toi_GaiaDR2_exofop(tois)

5188it [01:51, 38.63it/s]

invalid literal for int() with base 10: 'Gaia-2'


6258it [09:25, 11.07it/s]


# cross-match with vizier

* possibly limit is not as strict as in exofop server

In [13]:
try:
    import swifter
except:
    !{sys.executable} -m pip install swifter==1.3.4
    import swifter
assert swifter.__version__ == "1.3.4"

In [11]:
cols = ['ra_deg','dec_deg']
toi_coords = tois[cols]

## method 1

In [5]:
import warnings
warnings.simplefilter("ignore", category="UnitsWarning")
from catalog import Target

def get_GaiaDR3_id_from_vizier1(df_coords, param='Gaia', key='I/355/gaiadr3'):
    """
    Get Gaia ID by specifying catalog source using `key`.
    Try also key='I/345/gaia2'.

    Does not necessarily have GaiaDR3 entry.
    """
    return df_coords.swifter.apply(lambda coord: Target(*coord, verbose=False)\
            .query_vizier_param(param)\
            .get(key), axis=1)

def update_toi_GaiaDR2_vizier(tois):
    for i,row in tqdm(tois.iterrows()):
        if math.isnan(row['GaiaDR3_vizier']):
            tois.loc[i,'GaiaDR3_vizier'] = Target(row.ra_deg, row.dec_deg, verbose=False)\
            .query_vizier_param(param)\
            .get(key)
    return tois

## method 2 (recommended)

In [10]:
import warnings
warnings.simplefilter("ignore", category="UnitsWarning")
from catalog import Target

class TargetWithProcessing(Target):
    def process_target(self, param='GaiaDR3'):
        # Perform the Vizier query and process results
        try:
            res = self.query_vizier_param(param)
            if res:
                return set(res.values()).pop()
            return None
        except Exception as e:
            print(e)
            return None

def get_GaiaDR3_id_from_vizier2(df_coords):
    """
    Get Gaia ID without specifying catalog source.
    Just get the unique set from potentially multiple sources.

    Not good if TOI is unpublished.
    """
    return df_coords.swifter.apply(lambda coord: TargetWithProcessing(*coord, verbose=False)\
            .process_target(), axis=1)

def update_toi_GaiaDR2_vizier(tois):
    for i,row in tqdm(tois.iterrows()):
        if math.isnan(row['GaiaDR3_vizier']):
            gaiaid = TargetWithProcessing(row.ra_deg, row.dec_deg, verbose=False)\
                .process_target(param='GaiaDR3')
            # edit in place
            tois.loc[i,'GaiaDR3_vizier'] = gaiaid
    return tois

In [None]:
tois = update_toi_GaiaDR2_vizier(tois)

In [15]:
tois.to_csv('../data/TOIs.csv', index=False)

# Comparison

In [16]:
import pandas as pd

tois = pd.read_csv('../data/TOIs.csv')
#mismatch
sum((tois['GaiaDR3_exofop']-tois['GaiaDR3_vizier'])!=0)

763

In [17]:
import pandas as pd

matches = pd.read_csv('../data/TOIs_GaiaDR3_xmatch.csv')
matches.head()

Unnamed: 0,angDist,TIC ID,TOI,Previous CTOI,Master,SG1A,SG1B,SG2,SG3,SG4,...,E(BP-RP),b_E(BP-RP),B_E(BP-RP),Lib,RAJ2000,DEJ2000,e_RAJ2000,e_DEJ2000,RADEcorJ2000,GaiaDR3
0,0.031583,231663901,101.01,,5,5,5,5,5,5,...,0.0088,0.0015,0.0438,MARCS,318.736916,-55.871794,0.183824,0.186203,-0.0885,6462994429708755072
1,0.029564,149603524,102.01,,5,5,5,5,5,5,...,0.0005,0.0001,0.0013,MARCS,87.139972,-63.988441,0.270854,0.24398,-0.2165,4756649415309914240
2,0.035823,336732616,103.01,,5,5,5,5,5,5,...,0.018,0.0144,0.021,MARCS,312.457439,-24.428761,0.282852,0.230957,0.4094,6805886373600546176
3,0.016955,231670397,104.01,,5,5,5,5,5,5,...,,,,,319.949611,-58.148887,0.213587,0.200585,-0.1912,6453566701615683456
4,0.02429,144065872,105.01,,5,5,5,5,5,5,...,0.0174,0.0136,0.0234,MARCS,337.457231,-48.003069,0.180076,0.230102,0.1517,6518399301667782016


In [18]:
import numpy as np

np.sum(matches.GaiaDR3>0), np.sum(tois.GaiaDR3_vizier>0), np.sum(tois.GaiaDR3_exofop>0)

(7304, 5875, 6234)

In [19]:
import pandas as pd

tois_missing = pd.read_csv('../data/TOIs_GaiaDR3_xmatch_missing.csv')
tois_missing.shape

(15, 66)