In [2]:
from astroquery.simbad import Simbad
import pandas as pd
import numpy as np
from astropy.coordinates import SkyCoord
from astropy import units as u
from tqdm import tqdm

print("Libraries imported successfully!")

Simbad.TIMEOUT = 1200
Simbad.ROW_LIMIT = -1
Simbad.add_votable_fields('ids')

Libraries imported successfully!


In [3]:
def fetch_gaia_source_id_with_target_identifiers(target_ids):
    """Query directly Gaia DR3 source_id with tid

    Parameters:
    target_ids (array-like): star identifiers

    return:
    gaia_source_id (array-like): Gaia DR3 source_id

    """
    s = Simbad.query_objects(target_ids)
    gaia_source_id = np.full(len(target_ids), np.nan).astype(object)
    if s is None:
        print("No results found for the provided target identifiers.")
        return gaia_source_id
    
    for i in s:
        ids = i['ids'].split('|')
        index = i['object_number_id']-1
        for jd in ids:
            if 'Gaia DR3' in jd:
                gaia_source_id[index] = jd
    return gaia_source_id

In [4]:
from astroquery.mast import Catalogs
from astroquery.gaia import Gaia
import numpy as np
import pandas as pd
from astropy.table import Table
from astroquery import log

log.setLevel('ERROR')

class ResolveTIC:
    """A class to resolve TIC IDs to Gaia DR3 source IDs.
    This class provides methods to cross-match TIC IDs with Gaia DR3 data in two steps:
    1. Cross-match TIC IDs to Gaia DR2 source IDs.
    2. Cross-match Gaia DR2 source IDs to Gaia DR3 source IDs.
    """
    def __init__(self, tic_id):
        self.tic_id = tic_id

    def cross_match_tic_to_gaia_dr3(self) -> pd.DataFrame:
        if isinstance(self.tic_id, (str, int)):
            self.tic_id = [self.tic_id]

        if len(self.tic_id) == 0:
            raise ValueError("No TIC ID provided.")

        # Check for NaN values in the input list
        if any(pd.isna(self.tic_id)):
            raise ValueError("NaN values found in the input list.")

        # Remove 'TIC' prefix from TIC IDs, if present
        self.tic_id = np.core.defchararray.replace(np.array(self.tic_id, dtype=str), 'TIC ', '').tolist()

        gaia_dr2_ids = self._get_gaia_dr2_id_from_tic_id()
        dr2_neighbours = self._cross_match_gaia_dr2_to_dr3(gaia_dr2_ids['GAIA'])
        gaia_dr3_ids = self._find_best_neighbour_in_dr3_for_dr2(dr2_neighbours)
        
        # merge the two tables on the 'GAIA' and 'dr2_source_id' columns
        # return the merged table
        gaia_dr2_df = gaia_dr2_ids.to_pandas().astype({'ID':'Int64', 'GAIA':'Int64'})       

        gaia_dr3_df = gaia_dr3_ids.to_pandas().astype({'dr2_source_id':'Int64', 'dr3_source_id':'Int64', 'angular_distance':'float64'})
        gaia_dr3_df['dr3_source_id'] = 'Gaia DR3 ' + gaia_dr3_df['dr3_source_id'].astype(str)
        merged_table = pd.merge(gaia_dr2_df, gaia_dr3_df, left_on='GAIA', right_on='dr2_source_id', how='left')
        merged_table.rename(columns={'ID':'TIC'}, inplace=True)
        
        merged_table = merged_table.merge(pd.DataFrame(self.tic_id, columns=['TIC'], dtype='Int64'), on='TIC', how='right')
        # merged_table.to_csv('tests/data/1_3_tabel_with_TIC_dr2_dr3_ids_from_reolvetic_module.txt', index=False)
        return merged_table
    
    def _get_gaia_dr2_id_from_tic_id(self) -> Table:
        obs_table = Catalogs.query_criteria(ID=self.tic_id,catalog="Tic") # pylint:disable=no-member
        return obs_table['ID', 'GAIA']

    def _cross_match_gaia_dr2_to_dr3(self, dr2_source_ids) -> Table:
        """
        Cross-match a list of Gaia DR2 source IDs to their corresponding Gaia DR3 source IDs.

        Parameters:
        - dr2_source_ids: List or numpy array of Gaia DR2 source IDs.
        - angular_distance: Search radius in arcseconds (default is 1.0").

        Returns:
        - matched_dr3_ids: Dictionary mapping DR2 source IDs to lists of matched DR3 source IDs.
        """
        # Ensure input is a numpy array for easy indexing
        dr2_source_ids = np.array(dr2_source_ids)

        # Initialize Gaia TAP service
        # Gaia.MAIN_SERVICE = "https://gaia.ari.uni-heidelberg.de/tap"

        if isinstance(dr2_source_ids, (str, int)):
            dr2_source_ids = [dr2_source_ids]

        # Handle cases where there are no valid source IDs
        valid_ids = []
        for x in dr2_source_ids:
            if isinstance(x, str):
                try:
                    int(x)
                    valid_ids.append(str(int(x)))
                except ValueError:
                    # If conversion to int fails, skip this ID
                    continue
            elif isinstance(x, (int, np.integer)):
                valid_ids.append(str(x))
        # valid_ids = [str(int(x)) for x in dr2_source_ids if pd.notna(x)]
        if len(valid_ids) == 0:
            return Table(names=['dr2_source_id', 'dr3_source_id', 'angular_distance'], dtype=['int64', 'int64', 'float64'])

        # SQL Query
        query = f"""
        SELECT dr2_source_id, dr3_source_id, angular_distance
        FROM gaiadr3.dr2_neighbourhood
        WHERE dr2_source_id IN ({", ".join(valid_ids)})
        """

        job = Gaia.launch_job_async(query)
        dr3_sources = job.get_results()
        return dr3_sources

    def _find_best_neighbour_in_dr3_for_dr2(self, dr2_neighbours: Table) -> Table:
        """
        Find the best match in Gaia DR3 for a list of Gaia DR2 neighbours.

        Parameters:
        - dr2_neighbours: Table containing Gaia DR2 neighbours, containing columns 'dr2_source_id', 'dr3_source_id', and 'angular_distance'.

        Returns:
        - matched_dr3_ids: Table containing the best match in DR3 for each DR2 neighbour.
        """
        # Group the DR2 neighbours by DR2 source ID
        dr2_groups = dr2_neighbours.group_by('dr2_source_id')

        # Initialize an empty table to store the best matches
        best_matches = Table(names=['dr2_source_id', 'dr3_source_id', 'angular_distance'], dtype=['int64', 'int64', 'float64'])

        # Iterate over each group of DR2 neighbours
        for group in dr2_groups.groups:
            # Find the index of the minimum angular distance
            min_dist_idx = np.argmin(group['angular_distance'])

            # Append the best match to the results table
            best_matches.add_row(group[min_dist_idx])

        return best_matches

The Gaia archive will be intermittently unavailable from 08:00 to 09:00 on Monday, September 1st, as a result of scheduled maintenance (the deployment of a new version).


# STEP 1. Get Gaia DR3 ID directly

In [5]:
toi_data = pd.read_csv('TOI_2025.08.02_05.03.17.csv', comment='#')

# add prefix 'TOI' to each id of tid column
star_names = ['TIC ' + str(tid) for tid in toi_data['tid']]

print(f"Querying {len(star_names)} stars for Gaia DR3 source IDs...")
print("-" * 50)

# Fetch Gaia source IDs for the target identifiers
gaia_source_ids = fetch_gaia_source_id_with_target_identifiers(star_names)

toi_data['gaia_source_id'] = gaia_source_ids
print(f"end= {len(toi_data[toi_data['gaia_source_id'].notna()])} stars with Gaia DR3 source IDs")

Querying 7658 stars for Gaia DR3 source IDs...
--------------------------------------------------
end= 5433 stars with Gaia DR3 source IDs


# STEP2. Get Gaia DR3 ID using Gaia DR2 as bridge

In [6]:
# Filter out stars without Gaia DR3 source IDs
index_toi_without_gaia = toi_data.index[toi_data['gaia_source_id'].isna()]

toi_data['tid'] = star_names

In [7]:
print(f"Resolving {len(index_toi_without_gaia)} stars without Gaia DR3 source IDs...")
print("Matching TIC to Gaia DR2, then to Gaia DR3...")
returned_df = ResolveTIC(toi_data.loc[index_toi_without_gaia, 'tid']).cross_match_tic_to_gaia_dr3()
toi_data.loc[index_toi_without_gaia, 'gaia_source_id'] = returned_df['dr3_source_id'].values

Resolving 2225 stars without Gaia DR3 source IDs...
Matching TIC to Gaia DR2, then to Gaia DR3...


In [8]:
print(f"In the end, {len(toi_data[toi_data['gaia_source_id'].notna()])} out of {len(toi_data)} stars with Gaia DR3 source IDs")

In the end, 7639 out of 7658 stars with Gaia DR3 source IDs


# STEP 3. Get Apogee [Fe/H]

In [9]:
import pyvo

service_url = "https://datalab.noirlab.edu/tap"

svc = pyvo.dal.TAPService(service_url)
svc.TIMEOUT = 1200

gaia_id_only_digits = toi_data['gaia_source_id'].dropna().drop_duplicates().str.replace('Gaia DR3 ', '', regex=False).values
adql = f""" 
select gaiaedr3_source_id, fe_h, fe_h_flag
from sdss_dr17.apogee2_allstar
where gaiaedr3_source_id in ({", ".join(gaia_id_only_digits)})
"""
job = svc.submit_job(adql, maxrec=None)
job.run()
job.wait(phases=["COMPLETED", "ERROR", "ABORTED"], timeout=3600)
if job.phase == "COMPLETED":
    apogee_result = job.fetch_result()

apogee_result = apogee_result.to_table().to_pandas().drop_duplicates(subset='gaiaedr3_source_id')
apogee_result['gaiaedr3_source_id'] = 'Gaia DR3 ' + apogee_result['gaiaedr3_source_id'].astype(str)
print(f"Number of stars with Apogee [Fe/H]: {apogee_result['fe_h'].notna().sum()}")
print(f"Number of stars with Apogee [Fe/H] flag is 0 (good): {((apogee_result['fe_h_flag'] == 0) & (apogee_result['fe_h'].notna())).sum()}")

# remove rows where fe_h_flag is not 0
apogee_result = apogee_result[(apogee_result['fe_h_flag'] == 0) & (apogee_result['fe_h'].notna())]

toi_data_with_apogee = toi_data.merge(apogee_result, left_on='gaia_source_id', right_on='gaiaedr3_source_id', how='left')
toi_data_with_apogee = toi_data_with_apogee.rename(columns={'fe_h': 'fe_h_apogee'})
print(f"Number of stars with Apogee [Fe/H]: {toi_data_with_apogee['fe_h_apogee'].notna().sum()}")

Number of stars with Apogee [Fe/H]: 420
Number of stars with Apogee [Fe/H] flag is 0 (good): 412
Number of stars with Apogee [Fe/H]: 473


# STEP 4. Get GALAH [Fe/H]

In [10]:
# Filter out stars without APOGEE [Fe/H]
index_toi_without_apogee_feh = toi_data_with_apogee.index[toi_data_with_apogee['fe_h_apogee'].isna()]
print(f"Filtered out {len(index_toi_without_apogee_feh)} stars without APOGEE [Fe/H]")


Filtered out 7185 stars without APOGEE [Fe/H]


In [11]:
# This server is hard to connect. You may need to try multiple times. It WILL eventually WORK.
service_url = "https://datacentral.org.au/vo/tap"

svc = pyvo.dal.TAPService(service_url)
svc._session.timeout = (1200, 1200)
gaia_id_only_digits = toi_data_with_apogee.loc[index_toi_without_apogee_feh, 'gaia_source_id'].dropna().drop_duplicates().str.replace('Gaia DR3 ', '', regex=False).values
adql = f""" 
select gaiadr3_source_id, fe_h, flag_fe_h
from galah_dr4.mainstartable
where gaiadr3_source_id in ({", ".join(gaia_id_only_digits)})
"""
job = svc.submit_job(adql, maxrec=None) # async
job.run()
job.wait(phases=["COMPLETED", "ERROR", "ABORTED"], timeout=3600)
if job.phase == "COMPLETED":
    galah_result = job.fetch_result()

# galah_result = svc.search(adql, maxrec=None) # sync

galah_result = galah_result.to_table().to_pandas().drop_duplicates(subset='gaiadr3_source_id')
galah_result['gaiadr3_source_id'] = 'Gaia DR3 ' + galah_result['gaiadr3_source_id'].astype(str)
print(f"Number of stars with Galah [Fe/H]: {galah_result['fe_h'].notna().sum()}")
print(f"Number of stars with Galah [Fe/H] flag is 0 (good): {((galah_result['flag_fe_h'] == 0) & (galah_result['fe_h'].notna())).sum()}")

# remove rows where fe_h_flag is not 0
galah_result = galah_result[(galah_result['flag_fe_h'] == 0) & (galah_result['fe_h'].notna())]

toi_data_with_galah = toi_data_with_apogee.merge(galah_result, left_on='gaia_source_id', right_on='gaiadr3_source_id', how='left')
toi_data_with_galah = toi_data_with_galah.rename(columns={'fe_h': 'fe_h_galah'})
print(f"Number of stars with Galah [Fe/H]: {toi_data_with_galah['fe_h_galah'].notna().sum()}")


Number of stars with Galah [Fe/H]: 322
Number of stars with Galah [Fe/H] flag is 0 (good): 259
Number of stars with Galah [Fe/H]: 275


In [12]:
print(f"Number of stars with either Apogee or Galah [Fe/H]: {toi_data_with_galah['fe_h_apogee'].notna().sum() + toi_data_with_galah['fe_h_galah'].notna().sum()}")

Number of stars with either Apogee or Galah [Fe/H]: 748


# STEP 5. Get [Fe/H] Simbad (various sources)

In [13]:
# Configure SIMBAD to include metallicity measurements
# Add Fe_H (metallicity) field to the query fields
simbad = Simbad()
simbad.add_votable_fields('ids', 'otype', 'mesfe_h')


# Show available VOTable fields that include metallicity-related measurements
print("Available metallicity-related fields in SIMBAD:")
available_fields = simbad.get_votable_fields()
metallicity_fields = [field for field in available_fields if 'fe' in field.lower() or 'metal' in field.lower()]
for field in metallicity_fields:
    print(f"- {field}")

Available metallicity-related fields in SIMBAD:
- mesfe_h.bibcode
- mesfe_h.catno
- mesfe_h.compstar
- mesfe_h.fe_h
- mesfe_h.fe_h_prec
- mesfe_h.flag
- mesfe_h.log_g
- mesfe_h.log_g_prec
- mesfe_h.mespos
- mesfe_h.teff


In [16]:
# Filter out stars without Apogee and Galah [Fe/H]
mask_toi_without_apogee_and_galah_feh = toi_data_with_galah['fe_h_apogee'].isna() & toi_data_with_galah['fe_h_galah'].isna()
print(f"Number of stars without Apogee and Galah [Fe/H]: {(mask_toi_without_apogee_and_galah_feh).sum()}")

Number of stars without Apogee and Galah [Fe/H]: 6910


In [17]:
# add prefix 'TOI' to each id of tid column
star_names = toi_data_with_galah['tid']

print(f"Querying {len(star_names)} stars for metallicity data...")
print("-" * 50)

results = simbad.query_objects(star_names)
if results is not None:
    print(f"Successfully retrieved data for {len(results)} stars.")

Querying 7658 stars for metallicity data...
--------------------------------------------------
Successfully retrieved data for 14610 stars.


In [18]:
# Multiple metallicity measurements can be present, so we drop duplicates based on 'object_number_id' and keep the first occurrence
results_no_duplicates = results.to_pandas().drop_duplicates(subset='object_number_id').reset_index(drop=True)
print(f"Number of stars after removing duplicates: {len(results_no_duplicates)}")

Number of stars after removing duplicates: 7658


In [19]:
toi_data_final = toi_data_with_galah.copy()
toi_data_final.loc[results_no_duplicates['object_number_id'] - 1, 'fe_h_simbad'] = results_no_duplicates['mesfe_h.fe_h'].values
toi_data_final.loc[~mask_toi_without_apogee_and_galah_feh, 'fe_h_simbad'] = np.nan
toi_data_final['fe_h'] = toi_data_final['fe_h_apogee'].combine_first(toi_data_final['fe_h_galah']).combine_first(toi_data_final['fe_h_simbad'])
print(f"Number of stars with metallicity [Fe/H]: {toi_data_final['fe_h'].notna().sum()}")

toi_data_final.drop(columns=['gaiadr3_source_id', 'gaiaedr3_source_id']).to_csv('TOI_2025.08.02_05.03.17_with_gaiaID_metallicity.csv', index=False)

Number of stars with metallicity [Fe/H]: 1758


# STEP 6. Get Gaia DR3 [M/H], distance and other parameters

## 6.1 RVS spectra (GSP-Spec)

In [19]:
toi_data_final = pd.read_csv('TOI_2025.08.02_05.03.17_with_gaiaID_metallicity.csv', comment='#')

In [30]:
index_toi_without_feh = toi_data_final.index[toi_data_final['fe_h'].isna()]
print(f"Filtered out {len(index_toi_without_feh)} stars without [Fe/H]")

gaia_id = toi_data_final.loc[index_toi_without_feh, 'gaia_source_id'].dropna().drop_duplicates()

gaia_id = gaia_id.str.replace('Gaia DR3 ', '', regex=False).values

parameter_query = ("""select sp.source_id, sp.mh_gspspec, sp.flags_gspspec, sp.logg_gspspec,
                        r_med_geo, r_lo_geo, r_hi_geo
                        FROM gaiadr3.astrophysical_parameters AS sp
                        JOIN gaiadr3.gaia_source as g on sp.source_id = g.source_id
                        JOIN external.gaiaedr3_distance as dist on g.source_id = dist.source_id 
                        where 
                        sp.source_id in ("""+(len(gaia_id)-1)*"{},"+"{})").format(*gaia_id)

Gaia.MAIN_GAIA_TABLE = "gaiadr3.gaia_source"
Gaia.ROW_LIMIT = -1

job = Gaia.launch_job_async(parameter_query)
r = job.get_results().to_pandas()

Filtered out 5900 stars without [Fe/H]


In [53]:
flags = r['flags_gspspec']

# Check flags, ref source: Fig. 32 in https://www.aanda.org/articles/aa/pdf/2023/06/aa43790-22.pdf

masks = []
for i, flag in enumerate(flags):
    if len(flag) < 1:
        masks.append(False)
        continue
    # Check condition 1: positions 3, 6 are all 0
    cond1 = flag[2] == "0" and flag[5] == "0"

    # check condition 2: position 2, 5, 13 are all 0
    cond2 = flag[1] == "0" and flag[4] == "0" and flag[12] == "0"

    # Check condition 3: position 8 <= 2
    cond3 = int(flag[7]) <= 2

    if cond1 and cond3:
        masks.append(True)
    else:
        masks.append(False)

masks = np.array(masks)
gspspec_results = r[masks].copy()

print(f"Number of stars with GSP-SPEC [M/H]: {gspspec_results['mh_gspspec'].notna().sum()}")


Number of stars with GSP-SPEC [M/H]: 1492


In [54]:
def calculate_calibrated_gspspec(row):
    if row['logg_gspspec'] < 3.5:
        return row['mh_gspspec'] + 0.274 - 0.1373 * row['logg_gspspec'] - 0.0050 * (row['logg_gspspec'] ** 2) + 0.0048 * (row['logg_gspspec'] ** 3)
    else:
        return row['mh_gspspec']

# Apply the clibrations to GSP-SPEC [M/H]
calibrated_gspspec = gspspec_results.apply(calculate_calibrated_gspspec, axis=1)
gspspec_results['calibrated_gspspec'] = calibrated_gspspec

print(f"Number of stars with GSP-SPEC [M/H]: {gspspec_results['calibrated_gspspec'].notna().sum()}")


Number of stars with GSP-SPEC [M/H]: 1492


## 6.2 GSP-PHOT

In [55]:
gaia_id_without_gspspec = r[~masks]['source_id'].values

parameter_query = ("""select sp.source_id, l, b, g.phot_g_mean_mag, g.mh_gspphot, g.teff_gspphot, g.logg_gspphot, g.azero_gspphot, g.ebpminrp_gspphot, g.ag_gspphot, 
                        sp.mg_gspphot, g.libname_gspphot, g.ruwe,
                        r_med_geo, r_lo_geo, r_hi_geo
                        FROM gaiadr3.astrophysical_parameters AS sp
                        JOIN gaiadr3.gaia_source as g on sp.source_id = g.source_id
                        JOIN external.gaiaedr3_distance as dist on g.source_id = dist.source_id 
                        where 
                        sp.source_id in ("""+(len(gaia_id_without_gspspec)-1)*"{},"+"{})").format(*gaia_id_without_gspspec)

job = Gaia.launch_job_async(parameter_query)
r1 = job.get_results().to_pandas()


In [56]:
gspphot_results = r1[r1['ruwe'] <= 1.25].copy()

print(f"Number of stars with GSP-PHOT [M/H]: {gspphot_results['mh_gspphot'].notna().sum()}")

Number of stars with GSP-PHOT [M/H]: 2563


In [57]:
import gdr3apcal

gspphot_results['calibrated_gspphot'] = np.nan
mask_of_notna = gspphot_results['mh_gspphot'].notna()

# Instantiate calibration object
calib = gdr3apcal.GaiaDR3_GSPPhot_cal()
# Apply calibrations to [M/H] and/or Teff, returning a numpy array of calibrated values.
metal_calib = calib.calibrateMetallicity(gspphot_results[mask_of_notna].reset_index(drop=True))

# Add the numpy array as a new column to the original DataFrame
gspphot_results.loc[mask_of_notna, 'calibrated_gspphot'] = metal_calib

Automatically adding "cos(b)" from "b" [assuming degrees].
Automatically adding "cos(b)" from "b" [assuming degrees].
Automatically adding "cos(b)" from "b" [assuming degrees].
Automatically adding "cos(b)" from "b" [assuming degrees].


## 6.3 Combine

In [58]:
# combine gspspec and gspphot results
gspspec_results = gspspec_results.rename(columns={'calibrated_gspspec': 'mh_gspspec_gaiadr3'})
gspphot_results = gspphot_results.rename(columns={'calibrated_gspphot': 'mh_gspphot_gaiadr3'})

# check if gspspec_results and gspphot_results have common source_id
common_source_ids = set(gspspec_results['source_id']).intersection(set(gspphot_results['source_id']))
if common_source_ids:
    print(f"Common source IDs found: {len(common_source_ids)}")

# Combine the two results, since no common source_id, we can use concat
merged_results = pd.concat([gspspec_results, gspphot_results], axis=0, ignore_index=True)


In [59]:
merged_results['source_id'] = 'Gaia DR3 ' + merged_results['source_id'].astype(str)

In [60]:
toi_data_final_with_gaia = toi_data_final.copy()
toi_data_final_with_gaia = toi_data_final_with_gaia.merge(merged_results, left_on='gaia_source_id', right_on='source_id', how='left')

In [61]:
toi_data_final_with_gaia['fe_h'] = toi_data_final_with_gaia['fe_h'].combine_first(toi_data_final_with_gaia['mh_gspspec_gaiadr3']).combine_first(toi_data_final_with_gaia['mh_gspphot_gaiadr3'])

In [62]:
toi_data_final_with_gaia[mask_toi_without_apogee_and_galah_feh]['mh_gspphot_gaiadr3'].notna().sum()

2236

In [101]:
print('Summary of metallicity data:')
print(f'Total number of stars with [Fe/H]: {toi_data_final_with_gaia["fe_h"].notna().sum()}')
print('-' * 50)
print(f'Total number of stars with Apogee [Fe/H]: {toi_data_final_with_gaia["fe_h_apogee"].notna().sum()}')
print(f'Total number of stars with Galah [Fe/H]: {toi_data_final_with_gaia["fe_h_galah"].notna().sum()}')
print(f'Total number of stars with SIMBAD [Fe/H]: {toi_data_final_with_gaia["fe_h_simbad"].notna().sum()}')
print(f'Total number of stars with GSP-SPEC [M/H]: {toi_data_final_with_gaia["mh_gspspec_gaiadr3"].notna().sum()}')
print(f'Total number of stars with GSP-PHOT [M/H]: {toi_data_final_with_gaia["mh_gspphot_gaiadr3"].notna().sum()}')


Summary of metallicity data:
Total number of stars with [Fe/H]: 5567
--------------------------------------------------
Total number of stars with Apogee [Fe/H]: 473
Total number of stars with Galah [Fe/H]: 275
Total number of stars with SIMBAD [Fe/H]: 1010
Total number of stars with GSP-SPEC [M/H]: 1573
Total number of stars with GSP-PHOT [M/H]: 2236


In [95]:
cols1 = ['toi', 'tid', 'pl_orbper', 'pl_trandurh', 'pl_trandep', 'pl_rade', 'pl_eqt', 'pl_insol',
         'st_teff', 'st_logg', 'st_rad', 'ra', 'dec', 'tfopwg_disp','fe_h', 'gaia_source_id']

df = toi_data_final_with_gaia[cols1].copy()


In [99]:
df = df.dropna()
df.to_csv('output_data/TOI_2025.08.02_05.03.17_final.csv', index=False)

In [97]:
df

Unnamed: 0,toi,tid,pl_orbper,pl_trandurh,pl_trandep,pl_rade,pl_eqt,pl_insol,st_teff,st_logg,st_rad,ra,dec,tfopwg_disp,fe_h,gaia_source_id
7,1007.01,TIC 65212867,6.998921,3.953000,2840.000000,14.775200,1282.000000,448.744000,6596.0,3.71000,2.700000,112.752393,-4.463359,PC,0.154000,Gaia DR3 3060208664203492992
10,101.01,TIC 231663901,1.430370,1.616599,18960.712294,13.187450,1525.904809,1281.240825,5600.0,4.48851,0.890774,318.737012,-55.871863,KP,-0.261609,Gaia DR3 6462994429708755072
12,1011.01,TIC 114018671,2.470498,2.191000,250.000000,1.446560,1364.000000,575.597000,5413.7,4.46000,0.940000,113.984761,-32.841999,PC,-0.110000,Gaia DR3 5592015297342611968
17,1016.01,TIC 175310067,12.688942,7.920100,3232.852788,14.060834,1158.131037,425.159418,7601.0,4.31000,1.536410,116.110771,-36.098285,FP,0.223936,Gaia DR3 5587573167284647296
19,1018.01,TIC 291555748,4.959120,2.509000,7440.000000,22.912300,1419.000000,673.550000,5889.0,3.86000,2.000000,111.716019,-50.266567,FP,-0.118993,Gaia DR3 5505717481529404416
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7639,981.01,TIC 127476180,1.605559,2.253000,1168.000000,4.703080,2124.000000,3384.430000,6711.0,4.25000,1.480000,117.113226,-28.204828,FP,0.292080,Gaia DR3 5601388187294617856
7645,987.01,TIC 52548453,5.214676,4.983063,3754.150648,14.447677,1607.354148,1037.070000,6080.0,3.78000,2.270000,100.925700,-30.491967,FP,0.080000,Gaia DR3 5584423341345392896
7646,988.01,TIC 113553629,4.304380,3.037000,672.002000,13.756400,1319.000000,503.880000,6056.9,4.17000,4.820000,113.424039,-30.996054,FP,0.288790,Gaia DR3 5593167070135870080
7655,997.01,TIC 341729521,8.413486,3.556833,17479.605331,20.510696,857.731431,127.916421,5786.0,4.52000,0.926261,121.319521,-59.579798,FP,-0.480000,Gaia DR3 5291265022797336448


In [87]:
df1[~df1.index.isin(df.index)]

Unnamed: 0,toi,tid,pl_orbper,pl_trandurh,pl_trandep,pl_rade,pl_eqt,pl_insol,st_teff,st_logg,st_rad,ra,dec,tfopwg_disp,fe_h,gaia_source_id


In [86]:
df1 = df1.dropna()
df1

Unnamed: 0,toi,tid,pl_orbper,pl_trandurh,pl_trandep,pl_rade,pl_eqt,pl_insol,st_teff,st_logg,st_rad,ra,dec,tfopwg_disp,fe_h,gaia_source_id
7,1007.01,TIC 65212867,6.998921,3.953000,2840.000000,14.775200,1282.000000,448.744000,6596.0,3.71000,2.700000,112.752393,-4.463359,PC,0.154000,Gaia DR3 3060208664203492992
10,101.01,TIC 231663901,1.430370,1.616599,18960.712294,13.187450,1525.904809,1281.240825,5600.0,4.48851,0.890774,318.737012,-55.871863,KP,-0.261609,Gaia DR3 6462994429708755072
12,1011.01,TIC 114018671,2.470498,2.191000,250.000000,1.446560,1364.000000,575.597000,5413.7,4.46000,0.940000,113.984761,-32.841999,PC,-0.110000,Gaia DR3 5592015297342611968
17,1016.01,TIC 175310067,12.688942,7.920100,3232.852788,14.060834,1158.131037,425.159418,7601.0,4.31000,1.536410,116.110771,-36.098285,FP,0.223936,Gaia DR3 5587573167284647296
19,1018.01,TIC 291555748,4.959120,2.509000,7440.000000,22.912300,1419.000000,673.550000,5889.0,3.86000,2.000000,111.716019,-50.266567,FP,-0.118993,Gaia DR3 5505717481529404416
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7639,981.01,TIC 127476180,1.605559,2.253000,1168.000000,4.703080,2124.000000,3384.430000,6711.0,4.25000,1.480000,117.113226,-28.204828,FP,0.292080,Gaia DR3 5601388187294617856
7645,987.01,TIC 52548453,5.214676,4.983063,3754.150648,14.447677,1607.354148,1037.070000,6080.0,3.78000,2.270000,100.925700,-30.491967,FP,0.080000,Gaia DR3 5584423341345392896
7646,988.01,TIC 113553629,4.304380,3.037000,672.002000,13.756400,1319.000000,503.880000,6056.9,4.17000,4.820000,113.424039,-30.996054,FP,0.288790,Gaia DR3 5593167070135870080
7655,997.01,TIC 341729521,8.413486,3.556833,17479.605331,20.510696,857.731431,127.916421,5786.0,4.52000,0.926261,121.319521,-59.579798,FP,-0.480000,Gaia DR3 5291265022797336448
