## nexsci data from vizier

In [1]:
import sys
sys.path.insert(0, '../code/')
from kenmon import get_nexsci_data

df = get_nexsci_data(table_name="pscomppars", clobber=False)
df.head()

  'ylabel': 'Probability ($10^{-3}\,$Myr$^{-1}$)',


Column definitions:  https://exoplanetarchive.ipac.caltech.edu/docs/API_PS_columns.html
Loaded:  ../data/nexsci_pscomppars.csv


Unnamed: 0,objectid,pl_name,pl_letter,hostid,hostname,hd_name,hip_name,tic_id,disc_pubdate,disc_year,...,pl_angseperr1,pl_angseperr2,pl_angseplim,pl_angsepformat,pl_angsepstr,pl_angsepsymerr,pl_angsep_reflink,pl_ndispec,sky_coord.ra,sky_coord.dec
0,3.3499,Kepler-24 e,e,2.424906,Kepler-24,,,TIC 122376177,2014-03,2014,...,,,0.0,,0.119,,<a refstr=CALCULATED_VALUE href=/docs/pscp_cal...,0,290.413262,38.343728
1,3.2441,Kepler-1065 b,b,2.430631,Kepler-1065,,,TIC 378012771,2016-05,2016,...,,,0.0,,0.0405,,<a refstr=CALCULATED_VALUE href=/docs/pscp_cal...,0,294.511319,38.790465
2,3.1274,TOI-1260 c,c,2.572931,TOI-1260,,,TIC 355867695,2021-08,2021,...,,,0.0,,0.893,,<a refstr=CALCULATED_VALUE href=/docs/pscp_cal...,0,157.144071,65.854199
3,3.13149,HD 28109 c,c,2.57308,HD 28109,HD 28109,,TIC 29781292,2022-05,2022,...,,,0.0,,2.21,,<a refstr=CALCULATED_VALUE href=/docs/pscp_cal...,0,65.238306,-68.102688
4,3.12788,K2-350 b,b,2.208571,K2-350,,,TIC 294319820,2021-08,2021,...,,,,,,,,0,204.035509,-14.009252


In [None]:
import matplotlib.pyplot as plt

In [23]:
hostnames = df.hostname.unique()
len(hostnames)

3305

In [25]:
#TOI-1430: Orell-Miquel estimated a much older age than Zhang
df.query("hostname=='HD 235088'")[["st_age","pl_name"]]

Unnamed: 0,st_age,pl_name
2995,3.4,HD 235088 b


In [26]:
#TOI-2076: Hedges+ 154-204; Osborn+ 260-420 Myr
df.query("hostname=='TOI-2076'")[["st_age","pl_name"]]

Unnamed: 0,st_age,pl_name
2687,2.7,TOI-2076 d
3137,2.7,TOI-2076 b
4125,2.7,TOI-2076 c


In [27]:
# use default parameters
df = df.drop_duplicates(subset='hostname', keep='first')
df.shape

(3305, 684)

In [28]:
d = df.query("pl_name=='DS Tuc A b'")
ra, dec = d[['ra','dec']].squeeze().values
ra, dec

(354.9154673, -69.196043)

In [10]:
from kenmon import Target

t = Target(ra, dec)
t.query_vizier_param("EW", use_regex=True)



Found 11 references in Vizier using `EW`.


{'II/368/sstsl2:DEWdeg': -69.19591,
 'VI/129/fuse:Preview': 'Preview',
 'J/ApJ/898/27/table2:EW': nan,
 'J/A+A/460/695/stars:EWLi': 216,
 'J/A+A/480/735/stars:l_EWLi': '',
 'J/A+A/480/735/stars:EWLi': 0.215,
 'J/A+A/480/735/stars:n_EWLi': '',
 'J/A+A/684/A29/extend:Skew': 4.341,
 'J/AJ/161/234/table2:DEWdeg': -69.1959085,
 'J/MNRAS/427/2917/ptypes:Skew': 0.59,
 'J/MNRAS/494/2429/table1:EWLi': 216}

In [11]:
t.query_vizier_param("EWLi", use_regex=False)

Found 3 references in Vizier using `EWLi`.


{'J/A+A/460/695/stars:EWLi': 216,
 'J/A+A/480/735/stars:EWLi': 0.215,
 'J/MNRAS/494/2429/table1:EWLi': 216}

Cannot just use mean of all measurements!

## binary system catalog

## search

In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from concurrent.futures import ProcessPoolExecutor, as_completed
from kenmon import Target
import time
import random

regex_query = 'EWLi'

def process_row(row, max_retries=3, timeout=30):
    """Process each row to query Vizier with retries, rate limiting, and timeouts."""
    t = Target(row.ra, row.dec, verbose=False)
    
    for attempt in range(max_retries):
        try:
            # Apply rate limiting with random sleep to prevent server overload
            time.sleep(random.uniform(0.5, 2.0))  
            
            p = t.query_vizier_param(param=regex_query, use_regex=True)
            return row.hostname, p
        except Exception as e:
            print(f"Attempt {attempt+1} failed for TOI {row.hostname}: {e}")
            time.sleep(2 ** attempt)  # Exponential backoff
    return row.hostname, f"Failed after {max_retries} retries"

# Parallel execution with limited workers
data = {}
max_workers = min(4, len(df))  # Adjust based on server limits

with ProcessPoolExecutor(max_workers=max_workers) as executor:
    futures = {executor.submit(process_row, row): row for _, row in df.iterrows()}
    
    for future in tqdm(as_completed(futures), total=len(futures)):
        name, result = future.result()
        data[name] = result

In [35]:
import pandas as pd

data_df = pd.DataFrame(data)
data_df.index.unique()

Index(['J/AJ/168/41/table3:EWLi', 'J/AJ/168/41/table3:e_EWLi',
       'J/AJ/168/41/table3:r_EWLi', 'J/AJ/163/156/table5:EWLi',
       'J/AJ/163/156/table5:r_EWLi', 'J/ApJ/838/150/table3:l_EWLi',
       'J/ApJ/838/150/table3:EWLi', 'J/ApJ/838/150/table3:r_EWLi',
       'J/A+A/676/A129/catalog:EWLi', 'J/AJ/165/205/stars:EWLi',
       'J/AJ/133/2524/table1:u_EWLi', 'J/AJ/133/2524/table1:EWLi',
       'J/A+A/664/A163/tableb1:EWLi', 'J/A+A/664/A163/tableb1:e_EWLi',
       'J/A+A/460/695/stars:EWLi', 'J/AJ/161/171/table7:l_EWLi',
       'J/AJ/161/171/table7:EWLi', 'J/AJ/161/171/table7:r_EWLi',
       'J/A+A/573/A126/sample:EWLi', 'J/AJ/164/115/table7:EWLi',
       'J/AJ/164/115/table7:e_EWLi', 'J/AJ/121/1040/table1:EWLi',
       'J/AJ/121/1040/table1:u_EWLi', 'J/AJ/124/404/table1:EWLi',
       'J/A+A/480/735/stars:l_EWLi', 'J/A+A/480/735/stars:EWLi',
       'J/A+A/480/735/stars:n_EWLi', 'J/MNRAS/494/2429/table1:EWLi',
       'J/A+A/685/A83/tablec1:l_EWLiunv', 'J/A+A/685/A83/tablec1:EWLiunv',

In [36]:
data_df.to_csv('../data/nexsci_EWLi_vizier.csv')

In [41]:
import pandas as pd

data_df = pd.read_csv('../data/nexsci_EWLi_vizier.csv', index_col=0)
data_df

Unnamed: 0,Kepler-1065,TOI-1260,K2-350,HD 28109,Kepler-24,K2-332,EPIC 212587672,K2-342,K2-340,LTT 3780,...,HD 15337,TOI-1295,TOI-6130,TRAPPIST-1,EPIC 201595106,HIP 56998,WASP-19,TOI-2443,Kepler-974,TOI-6038 A
J/AJ/168/41/table3:EWLi,,,,,,,,,,,...,,,,,,,,,,
J/AJ/168/41/table3:e_EWLi,,,,,,,,,,,...,,,,,,,,,,
J/AJ/168/41/table3:r_EWLi,,,,,,,,,,,...,,,,,,,,,,
J/AJ/163/156/table5:EWLi,,,,,,,,,,,...,,,,,,,,,,
J/AJ/163/156/table5:r_EWLi,,,,,,,,,,,...,,,,,,,,,,
J/ApJ/838/150/table3:l_EWLi,,,,,,,,,,,...,,,,,,,,,,
J/ApJ/838/150/table3:EWLi,,,,,,,,,,,...,,,,,,,,,,
J/ApJ/838/150/table3:r_EWLi,,,,,,,,,,,...,,,,,,,,,,
J/A+A/676/A129/catalog:EWLi,,,,,,,,,,,...,,,,,,,,,,
J/AJ/165/205/stars:EWLi,,,,,,,,,,,...,,,,,,,,,,


In [42]:
refs = {}
for i in data_df.index:
    count=data_df.loc[i].dropna().shape
    refs[i] = count

In [43]:
refs = pd.Series(refs).sort_values(ascending=False)
refs.head(20)

J/A+A/460/695/stars:EWLi           (4,)
J/A+A/676/A129/catalog:EWLi        (3,)
J/A+A/664/A163/tableb1:EWLi        (2,)
J/A+A/664/A163/tableb1:e_EWLi      (2,)
J/A+A/480/735/stars:EWLi           (1,)
J/AJ/121/1040/table1:EWLi          (1,)
J/AJ/164/115/table7:e_EWLi         (1,)
J/AJ/164/115/table7:EWLi           (1,)
J/A+A/573/A126/sample:EWLi         (1,)
J/AJ/161/171/table7:r_EWLi         (1,)
J/AJ/161/171/table7:EWLi           (1,)
J/MNRAS/494/2429/table1:EWLi       (1,)
J/A+A/685/A83/tablec1:f_EWLiunv    (1,)
J/AJ/133/2524/table1:EWLi          (1,)
J/AJ/133/2524/table1:u_EWLi        (1,)
J/AJ/165/205/stars:EWLi            (1,)
J/ApJ/838/150/table3:r_EWLi        (1,)
J/ApJ/838/150/table3:EWLi          (1,)
J/AJ/163/156/table5:r_EWLi         (1,)
J/AJ/163/156/table5:EWLi           (1,)
dtype: object

In [44]:
for ref in refs.index:
    x = data_df.T[ref].dropna()
    try:
        if (x.astype(float)>50).any():
            print(data_df.T[ref].dropna())
    except Exception as e:
        print(e)

HIP 67522    220.0
TOI-833        0.0
HIP 94235    165.0
DS Tuc A     216.0
Name: J/A+A/460/695/stars:EWLi, dtype: object
TOI-1807        104.0
TOI-2076    89.400002
Name: J/A+A/664/A163/tableb1:EWLi, dtype: object
TOI-2048    53.099998
Name: J/AJ/164/115/table7:EWLi, dtype: object
HIP 94235    165.0
Name: J/A+A/573/A126/sample:EWLi, dtype: object
DS Tuc A    216.0
Name: J/MNRAS/494/2429/table1:EWLi, dtype: object
V1298 Tau    220.0
Name: J/ApJ/838/150/table3:r_EWLi, dtype: object
V1298 Tau    352.0
Name: J/ApJ/838/150/table3:EWLi, dtype: object
could not convert string to float: 'Goodman'
TOI-1227    513
Name: J/AJ/163/156/table5:EWLi, dtype: object
CoRoTID 223977153    64.0
Name: J/A+A/685/A83/tablec1:EWLiunv, dtype: object


## query without regex

In [None]:
from kenmon import Target

d = df.query("pl_name=='DS Tuc A b'")
ra, dec = d[['ra','dec']].squeeze().values
t = Target(ra, dec)
t.query_vizier_param("EW", use_regex=True)

In [21]:
import numpy as np

params = ["B-V", "log_R_HK_", "EWLi", "Teff", "Prot"]

for param in params:
    p = t.query_vizier_param(param)
    v = np.nanmean(list(p.values()))
    print(p, v)



Found 2 references in Vizier with `B-V`.
{'I/239/tyc_main': np.float32(0.835), 'V/145/sky2kv5': np.float32(0.835)} 0.835
Found 0 references in Vizier with `log_R_HK_`.
{} nan
Found 1 references in Vizier with `EWLi`.
{'J/A+A/664/A163/tableb1': np.float32(89.4)} 89.4


  v = np.nanmean(list(p.values()))


## load data

In [12]:
import pandas as pd

data = pd.read_csv('../data/youth_indicators_nexsci.csv')
data

Unnamed: 0,hostname,bv,rhk,li,teff,prot
0,OGLE-TR-10,0.606000,,,5702.290032,
1,KOI-13,0.114667,,,7847.512429,
2,GJ 1132,,,,3359.359888,
3,KELT-7,0.425826,,,6717.155085,
4,HATS-61,0.787000,,,5479.358262,24.000000
...,...,...,...,...,...,...
78,TOI-1853,0.824000,,,4997.199256,
79,HATS-35,0.644000,,,6106.953087,
80,TOI-150,0.665000,,,5788.523313,
81,HD 213885,0.606222,,,5810.933325,1.066915


In [13]:
len(data.li.dropna())

0

In [14]:
len(data.rhk.dropna())

0

In [15]:
len(data.prot.dropna())

35

In [None]:
from tqdm import tqdm
from kenmon import Target
import pandas as pd
import numpy as np

params = ["B-V", "log_R_HK_", "EWLi", "Teff", "Prot", "ruwe"]

for i,row in tqdm(df.iterrows()):
    if data.hostname.isin([row.hostname]).sum()==0:
        d = {}
        d['hostname'] = row.hostname

        t = Target(row.ra, row.dec, verbose=False)

        for param in params:
            p = t.query_vizier_param(param, use_regex=False)
            try:
                v = np.nanmean(list(map(float, p.values())))
            except:
                vals = []
                for x in p.values(): 
                    if isinstance(x, (int, float, str)) and str(x).replace('.', '', 1).isdigit():
                        vals.append(x)
                v = np.nanmean(vals)
            d[param] = v
        s = pd.Series(d).rename({"B-V": "bv", 
                                 "log_R_HK_": "rhk", 
                                 "EWLi": "li", 
                                 "Teff": "teff", 
                                 "Prot": "prot",
                                 #ruwe
                                }
                               ).to_frame().T
        data = pd.concat([data,s], ignore_index=True)
        # break

In [None]:
data.tail()

In [None]:
data.to_csv('../data/youth_indicators_nexsci.csv', index=False)