In [1]:
import numpy as np
import matplotlib.pyplot as plt
from astropy.table import Table, vstack, Column, join
from astropy import units as u
from astroquery.simbad import Simbad
from astroquery.gaia import Gaia
import time

Created TAP+ (v1.2.1) - Connection:
	Host: gea.esac.esa.int
	Use HTTPS: True
	Port: 443
	SSL Port: 443
Created TAP+ (v1.2.1) - Connection:
	Host: geadata.esac.esa.int
	Use HTTPS: True
	Port: 443
	SSL Port: 443


In [2]:
sample = Table.read('Gr8stars_GaiaeDR3_TIC_hires.csv')
sample.sort(['_RAJ2000','_DEJ2000'])

In [3]:
table = sample['_RAJ2000', '_DEJ2000']
table.rename_column('_RAJ2000', 'ra_j2000')
table.rename_column('_DEJ2000', 'dec_j2000')

In [4]:
# names:
table['primary_name'] = sample['EDR3Name'] # CHANGE THIS LATER
table['gaiadr2_source_id'] = sample['GAIA']
table['gaiaedr3_source_id'] = sample['Source']
table['hip'] = sample['HIP']
#table['hip'][table['hip'] < 0] = np.nan # replace -1s with nans
table.add_column(Column(name='hd', dtype='<U29', length=len(table)))
table['tic'] = sample['TIC']
table['2mass'] = sample['2MASS']
table['2mass'][table['2mass']=='--'] = '' # replace --s with empty
table['wisea'] = sample['WISEA']
table['wisea'][table['wisea']=='--'] = '' # replace --s with empty


In [5]:
# gaia:
table['ra'] = sample['RA_ICRS']
table['ra_error'] = sample['e_RA_ICRS']
table['dec'] = sample['DE_ICRS']
table['dec_error'] = sample['e_DE_ICRS']
table['parallax'] = sample['Plx']
table['parallax_error'] = sample['e_Plx']
table['pmra'] = sample['pmRA']
table['pmra_error'] = sample['e_pmRA']
table['pmdec'] = sample['pmDE']
table['pmdec_error'] = sample['e_pmDE']
table['gmag'] = sample['Gmag']
table['bp_rp'] = sample['BPRP']
table['radial_velocity'] = sample['RVDR2']
table['radial_velocity_error'] = sample['e_RVDR2']
table['ref_epoch'] = 'J2016.0'

### add SIMBAD alternative identifiers:

In [6]:
# correct some failed matches by hand (ugh):
# for most of these the TIC x-match failed, therefore SIMBAD does not include the TIC ID
# in three cases there is no Gaia DR2 ID so HIP, 2MASS, WISEA must also be copied as SIMBAD query on DR2 ID will fail
table['tic'][table['gaiaedr3_source_id'] == 411413822074322432] = 252191219
table['gaiadr2_source_id'][table['gaiaedr3_source_id'] == 411413822074322432] = 411413822074322432
table['tic'][table['gaiaedr3_source_id'] == 883596170039715072] = 101637926
table['gaiadr2_source_id'][table['gaiaedr3_source_id'] == 883596170039715072] = 883596170039715072
table['tic'][table['gaiaedr3_source_id'] == 1030087329727496320] = 459753450
table['hip'][table['gaiaedr3_source_id'] == 1030087329727496320] = 43630  # from TIC
table['2mass'][table['gaiaedr3_source_id'] == 1030087329727496320] = '08530805+5457112'  # from TIC
table['wisea'][table['gaiaedr3_source_id'] == 1030087329727496320] = 'J085308.10+545711.5'  # from TIC
table['tic'][table['gaiaedr3_source_id'] == 3890250298609911168] = 63189338
table['hip'][table['gaiaedr3_source_id'] == 3890250298609911168] = 51145  # from TIC
table['2mass'][table['gaiaedr3_source_id'] == 3890250298609911168] = '10265302+1713099'  # from TIC
table['wisea'][table['gaiaedr3_source_id'] == 3890250298609911168] = 'J102652.99+171309.1'  # from TIC
table['tic'][table['gaiaedr3_source_id'] == 762815470562110464] = 166646191 # this one is ALSO TIC 353969903
table['hip'][table['gaiaedr3_source_id'] == 762815470562110464] = 54035  # from TIC
table['2mass'][table['gaiaedr3_source_id'] == 762815470562110464] = '11032027+3558203'  # from TIC
table['wisea'][table['gaiaedr3_source_id'] == 762815470562110464] = 'J110319.67+355722.4'  # from SIMBAD
table['tic'][table['gaiaedr3_source_id'] == 778947814402602752] = 252803603
table['gaiadr2_source_id'][table['gaiaedr3_source_id'] == 778947814402602752] = 778947814402405120
table['tic'][table['gaiaedr3_source_id'] == 4445793563751403264] = 276883893 # magnitude changed considerably DR2 to eDR3 hence failed checks but seems ok to me
table['gaiadr2_source_id'][table['gaiaedr3_source_id'] == 4445793563751403264] = 4445793563749783424
table['tic'][table['gaiaedr3_source_id'] == 4468557611984384512] = 398120047
table['gaiadr2_source_id'][table['gaiaedr3_source_id'] == 4468557611984384512] = 4468557611977678336

In [7]:
# more failed matches by hand:
# these all have correct TIC x-match but TIC and DR2 IDs are not ingested properly in SIMBAD
# so we'll query on an alternative identifier
weird_ones_map = {'Gaia DR2 386648181647057152':'HD 225291A',
                 'Gaia DR2 2551241944269355776': 'HD 3972A',
                 'Gaia DR2 417565761431676288': 'HIP 3585',
                 'Gaia DR2 2572433347264096768': 'HIP 8110',
                 'Gaia DR2 90783037268826368': 'HIP 9434',  # bright double
                 'Gaia DR2 139281533098990336': 'HD 19771',  # bright double
                 'Gaia DR2 450345089271313536': 'HD 21903',  # bright double
                 'Gaia DR2 2989285658784560640': 'HIP 23951',  # bright double
                 'Gaia DR2 3235618049144649984': 'HD 33866',
                 'Gaia DR2 3319596479531147392': 'HD 38767',  # bright double
                 'Gaia DR2 3049638303010222080': 'HD 52590',
                 'Gaia DR2 901953203859813888': 'HD 67501A',  # co-moving twin pair?
                 'Gaia DR2 901953199567368960': 'HD 67501B',  # co-moving twin pair?
                 'Gaia DR2 903616971110812544': 'HD 71974',  # bright double
                 'Gaia DR2 811174736439695232': 'HD 80441', # bright double
                 'Gaia DR2 3850860191466194688': 'HD 82267',
                 'Gaia DR2 1070274204844445056': 'HIP 49230',
                 'Gaia DR2 746691643641649536': 'HIP 49315',
                 'Gaia DR2 4022691525057422208': 'HIP 56054',
                 'Gaia DR2 1466569467151572992': 'HD 114723',  # bright double ?
                 'Gaia DR2 1603382317472430080': 'HIP 71109',
                 'Gaia DR2 1186325496788069632': 'HD 131473',  # bright double
                 'Gaia DR2 4409032079673958528': 'HIP 78024',
                 'Gaia DR2 1191393008443719808': 'TYC 956-1098-1',  # HD 143597. no survey other than Tycho and Gaia resolves them
                 'Gaia DR2 1191393008441035648': 'TYC 956-1098-2',  # HD 143597
                 'Gaia DR2 4148904396147803136': 'HD 162739',  # duplicated in TIC as TIC 207281346
                 'Gaia DR2 4185205008718146048': 'HD 179518',  # this HD is TIC as 6990809 with a different DR2 ID but no Gaia info, diff V mag ??
                 'Gaia DR2 4516054074788081152': 'HD 180054',  # duplicated in TIC as TIC 352587633
                 'Gaia DR2 2087620020176419072': 'HIP 99388',
                 'Gaia DR2 1804783160869679872': 'HIP 101315',
                 'Gaia DR2 1731257443928035200': 'HIP 104047',
                 'Gaia DR2 1787693589076214016': 'HD 205160B',  # associated twin stars w slightly different PMRA?
                 'Gaia DR2 1787693589076213632': 'HD 205160A',  # associated twin stars w slightly different PMRA?
                 'Gaia DR2 2681975358571981312': 'HD 209965',  # bright double
                 'Gaia DR2 1929623635232510208': 'HIP 112722',
                 'Gaia DR2 2643881159024617088': 'HIP 115891'}

In [8]:
# hacky alternative to rerunning all the simbad code:
table2 = Table.read('Gr8stars_GaiaeDR3_formatted-names.csv')
table2.sort(['ra_j2000','dec_j2000'])
table['hd'] = table2['hd']
table['primary_name'] = table2['primary_name']

### add Gaia DR2:

In [9]:
# gaia DR2:
table['primary_name','gaiadr2_source_id'].write('gaiadr2.vot', format='votable', overwrite=True)  # write out the DR2 source_ids
#Gaia.ROW_LIMIT = 5000
query = "SELECT TOP 5000 * FROM tap_upload.tbl AS tbl, gaiadr2.gaia_source AS gaia WHERE tbl.gaiadr2_source_id=gaia.source_id"
j = Gaia.launch_job(query=query, upload_resource='gaiadr2.vot', 
                    upload_table_name="tbl", verbose=True)
r = j.get_results()

Launched query: 'SELECT TOP 5000 * FROM tap_upload.tbl AS tbl, gaiadr2.gaia_source AS gaia WHERE tbl.gaiadr2_source_id=gaia.source_id'
------>https
host = gea.esac.esa.int:443
context = /tap-server/tap/sync
Content-type = multipart/form-data; boundary====1625749584722===
200 200
[('Date', 'Thu, 08 Jul 2021 13:06:25 GMT'), ('Server', 'Apache/2.4.6 (CentOS) OpenSSL/1.0.2k-fips PHP/5.4.16 mod_jk/1.2.43'), ('Cache-Control', 'no-cache, no-store, max-age=0, must-revalidate'), ('Pragma', 'no-cache'), ('Expires', '0'), ('X-XSS-Protection', '1; mode=block'), ('X-Frame-Options', 'SAMEORIGIN'), ('X-Content-Type-Options', 'nosniff'), ('Set-Cookie', 'JSESSIONID=D8C35B29184F812DD2D115482FEA5C82; Path=/tap-server; Secure; HttpOnly'), ('Content-Encoding', 'gzip'), ('Content-Disposition', 'attachment;filename="1625749585325O-result.vot"'), ('Transfer-Encoding', 'chunked'), ('Content-Type', 'application/x-votable+xml')]
Retrieving sync. results...
Query finished.


In [10]:
r.columns

<TableColumns names=('primary_name','gaiadr2_source_id','solution_id','designation','source_id','random_index','ref_epoch','ra','ra_error','dec','dec_error','parallax','parallax_error','parallax_over_error','pmra','pmra_error','pmdec','pmdec_error','ra_dec_corr','ra_parallax_corr','ra_pmra_corr','ra_pmdec_corr','dec_parallax_corr','dec_pmra_corr','dec_pmdec_corr','parallax_pmra_corr','parallax_pmdec_corr','pmra_pmdec_corr','astrometric_n_obs_al','astrometric_n_obs_ac','astrometric_n_good_obs_al','astrometric_n_bad_obs_al','astrometric_gof_al','astrometric_chi2_al','astrometric_excess_noise','astrometric_excess_noise_sig','astrometric_params_solved','astrometric_primary_flag','astrometric_weight_al','astrometric_pseudo_colour','astrometric_pseudo_colour_error','mean_varpi_factor_al','astrometric_matched_observations','visibility_periods_used','astrometric_sigma5d_max','frame_rotator_object_type','matched_observations','duplicated_source','phot_g_n_obs','phot_g_mean_flux','phot_g_mean_fl

In [11]:
dr2_table = r['gaiadr2_source_id', 'teff_val']
dr2_table.rename_column('teff_val', 'gaia_teff')
dr2_table['gaia_teff_lower'] = r['teff_percentile_lower']
dr2_table['gaia_teff_upper'] = r['teff_percentile_upper']
dr2_table['gaia_radius'] = r['radius_val']
dr2_table['gaia_radius_lower'] = r['radius_percentile_lower']
dr2_table['gaia_radius_upper'] = r['radius_percentile_upper']
dr2_table['gaia_lum'] = r['lum_val']
dr2_table['gaia_lum_lower'] = r['lum_percentile_lower']
dr2_table['gaia_lum_upper'] = r['lum_percentile_upper']

In [12]:
table_with_dr2 = join(table, dr2_table, keys='gaiadr2_source_id', join_type='left')

In [13]:
assert len(table_with_dr2) == len(table)

In [14]:
table_with_dr2.sort(['ra_j2000','dec_j2000']) # get it in the same order as sample
for i in np.random.randint(0,len(table),42):
    #print(table[i], table_with_dr2[i])
    assert table[i]['gaiadr2_source_id'] == table_with_dr2[i]['gaiadr2_source_id']

In [15]:
table = table_with_dr2

### add hires info:

In [16]:
#table.remove_column('spectrum_hires') # where did this come from?

In [17]:
# spectra:
table.add_column(Column(name='spectrum_hires', dtype='<U29', length=len(table)))
table['spectrum_hires_resolution'] = sample['hires_maxres']
table['spectrum_hires_snr'] = sample['hires_maxres_snr']
table['spectrum_hires_nobs'] = sample['n_hires_obs_maxres_iodout']

In [18]:
# add a link:
MIN_SNR = 100 # if combined SNR is below this limit, ignore the spectra
table['spectrum_hires'][sample['hires_maxres_snr'] >= MIN_SNR] = 'true'
bad = sample['hires_maxres_snr'] < MIN_SNR
table['spectrum_hires_resolution'][bad] = 0
table['spectrum_hires_nobs'][bad] = 0
table['spectrum_hires_snr'][bad] = 0

### write it out:

In [19]:
table[17]

ra_j2000,dec_j2000,primary_name,gaiadr2_source_id,gaiaedr3_source_id,hip,hd,tic,2mass,wisea,ra,ra_error,dec,dec_error,parallax,parallax_error,pmra,pmra_error,pmdec,pmdec_error,gmag,bp_rp,radial_velocity,radial_velocity_error,ref_epoch,gaia_teff,gaia_teff_lower,gaia_teff_upper,gaia_radius,gaia_radius_lower,gaia_radius_upper,gaia_lum,gaia_lum_lower,gaia_lum_upper,spectrum_hires,spectrum_hires_resolution,spectrum_hires_snr,spectrum_hires_nobs
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,K,K,K,Rsun,Rsun,Rsun,Lsun,Lsun,Lsun,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1
float64,float64,str29,int64,int64,int64,str7,int64,str16,str19,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,str7,float32,float32,float32,float32,float32,float32,float32,float32,float32,str29,float64,float64,int64
2.1705774156078,36.6271256247555,HD400,2877522230262577024,2877522230262577024,-1,400,612002469,--,--,2.16994537178,0.017,36.62657221496,0.0112,30.6686,0.0252,-114.128,0.026,-124.518,0.015,6.082332,0.675087,-15.1,0.15,J2016.0,6221.5,6140.1333,6295.0,1.4688498,1.4347496,1.508037,2.9122581,2.9054701,2.9190462,,0.0,0.0,0


In [20]:
table.write('Gr8stars_GaiaeDR3_formatted.csv', overwrite=True)

#### debug etc:

In [21]:
table[np.isin(table['primary_name'].data,simbad_failed)]

NameError: name 'simbad_failed' is not defined