In [1]:
#use the corv_wd_models.yml environment
import pandas as pd
from astropy import table
from astropy.table import Table, Column, MaskedColumn, join, join_skycoord
from astropy.coordinates import SkyCoord
import astropy.units as u
import numpy as np
from tqdm import tqdm

In [2]:
MsunCGS=1.989*10**33 #g
RsunCGS=6.955*10**10 #cm
cCGS=2.99792458*10**10 #cm/s
GCGS=6.6743*10**(-8) #cm^3 g^-1 s^-2

# This notebook matches the SDSS-V DA WD sample with several other datasets so we can check our measurements against them later

# Import the SDSS-V DA WD Sample

In [3]:
#import back in the data, either from the backup file or from the latest version of the table
backup=False
if backup==True:
    SDSS_DA_df_full=pd.read_csv('csv/SDSSV_DA_df_nb00.csv')
else:
    SDSS_DA_df_full=pd.read_csv('csv/SDSSV_DA_df.csv')


In [4]:
#just keep the RA, DEC, and GAIA_DR3_SOURCE_ID columns for matching
SDSS_DA_df=SDSS_DA_df_full[['ra','dec','gaia_dr3_source_id']]
#drop repeat rows
SDSS_DA_df=SDSS_DA_df.drop_duplicates()

In [5]:
#turn the DA sample dataframe into an astropy table for coordinate matching
SDSS_DA_astpy=Table.from_pandas(SDSS_DA_df)

# Create a column with SkyCoord positions for easy matching
SDSS_DA_astpy['SkyCoord'] = SkyCoord(SDSS_DA_astpy['ra'], SDSS_DA_astpy['dec'], unit='deg')

# Falcon 2010 SPY Spectroscopic Catalog
- Paper: https://ui.adsabs.harvard.edu/abs/2010ApJ...712..585F/abstract
- Catalog: https://cdsarc.cds.unistra.fr/viz-bin/cat/J/ApJ/712/585
- Measured using high resolution SPY spectra
- Contains: RV measurements
    - Adp-V: Adopted apparent velocity in km/s
    - e_Adp-V: The 1$\sigma$ error in Adp-V in km/s

In [6]:
#read in the Falcon data
falcon_astpy = Table.read('data/falcon2010.fit')['Name','DA','Adp-V','e_Adp-V','_RA','_DE']

#the falcon data set contains multiple observations for the same WD, but each of these observations 
#has the same measured RV
#so remove these duplicates
falcon=falcon_astpy.to_pandas()
falcon=falcon.drop_duplicates(subset='Name')
falcon_astpy=Table.from_pandas(falcon)

# Create a column with SkyCoord positions for easy matching
falcon_astpy['SkyCoord'] = SkyCoord(falcon_astpy['_RA'], falcon_astpy['_DE'], unit='deg')

# Match targets within 3 arcsec
join_func = table.join_skycoord(3 * u.arcsecond)
match_tab_astpy = table.join(SDSS_DA_astpy, falcon_astpy, join_funcs={'SkyCoord': join_skycoord(3 * u.arcsec)})

#Convert the matched table to a pandas dataframe
match_tab_falcon=match_tab_astpy.to_pandas()

#rename the Falcon RV and RV_error columns
match_tab_falcon=match_tab_falcon.rename(columns={"Adp-V": "rv_falcon","e_Adp-V":"e_rv_falcon"})

# Initialize an array to flag that WD is in Falcon dataset
match_tab_falcon['falcon_flag'] = np.full(len(match_tab_falcon), True)

#drop all extra columns
match_tab_falcon=match_tab_falcon.drop(['ra','dec','SkyCoord_id','SkyCoord_1.ra','SkyCoord_1.dec','Name','DA','_RA','_DE',
                                        'SkyCoord_2.ra','SkyCoord_2.dec'],axis=1)


In [7]:
#one object in the Falcon dataset is likely a binary
display(match_tab_falcon.query('gaia_dr3_source_id==3868927607051816320'))
#this object is present in more datasets
#But I only remove it from the Falcon set
rem_ind=match_tab_falcon.query('gaia_dr3_source_id==3868927607051816320').index[0]
match_tab_falcon['falcon_flag'].loc[rem_ind]=False

Unnamed: 0,gaia_dr3_source_id,rv_falcon,e_rv_falcon,falcon_flag
5,3868927607051816320,12.578,4.258,True


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  match_tab_falcon['falcon_flag'].loc[rem_ind]=False


In [8]:
match_tab_falcon

Unnamed: 0,gaia_dr3_source_id,rv_falcon,e_rv_falcon,falcon_flag
0,2497895053130247040,45.823002,1.601,True
1,3634151534873010176,61.868999,1.488,True
2,3650552739370519680,30.596001,1.551,True
3,3877952432852336128,19.207001,3.037,True
4,3874412413432647680,48.132,2.905,True
5,3868927607051816320,12.578,4.258,False
6,3690323316193465344,50.179001,0.269,True
7,3653928313083020416,40.785,1.387,True
8,4420242631507777920,50.907001,3.076,True
9,4436059415517432064,25.105,0.733,True


# Raddi 2022 SPY/SDSS Spectroscopic Catalog
- Paper: https://ui.adsabs.harvard.edu/abs/2022A%26A...658A..22R/abstract
- Catalog: https://cdsarc.cds.unistra.fr/viz-bin/cat/J/A+A/658/A22#/browse
- Measured using SPY and SDSS DR12 spectra
- Contains: RV, Teff, Logg, Radius, Mass measurements
    - table 2 contains Gaia EDR3 source ID
        - SourceID1: Gaia EDR3 source ID of white dwarf
    - table5 contains these physical parameters for H-rich WD
        - logTeff: White dwarf effective temperature in K
        - e_logTeff: Error on logTeff in K
        - logg: White dwarf surface gravity in log(cm/s^2)
        - e_logg: Error on logg in log(cm/s^2)
        - Mass: White dwarf mass in Msun
        - e_Mass: Error on Mass in Msun
        - Radius: White dwarf radius in Rsun
        - RV: White dwarf radial velocity in km/s
            - White dwarf radial velocities are corrected for the effect of the gravitational redshift. The gravitational redshift correction is $v_g=GM/cR$. The measured RV is $v_{app}=v_{rad}+v_g$.
            - The SDSS sample is also corrected for a systematic offset of +15±13 km/s with respect to the SPY sample.
        - e_RV: Error on RV in km/s

In [9]:
#read in the Raddi data
raddi_astpy2 = Table.read('data/raddi2022/table2.dat.fits')
raddi_astpy5 = Table.read('data/raddi2022/table5.dat.gz.fits')

#calculate the gravitational redshift correction and add back to the RV
raddi_astpy5['Vg']=(GCGS*raddi_astpy5['Mass']*MsunCGS/(cCGS*raddi_astpy5['Radius']*RsunCGS))*10**(-5) #km/s
raddi_astpy5['rv_raddi']=raddi_astpy5['RV']+raddi_astpy5['Vg']+15 #measured RV, without grav redshift correction and adding back in the offset relative to SDSS
raddi_astpy5['e_rv_raddi']=np.abs(raddi_astpy5['RV'])#no error given for raddi radius measurement so can't calculate Vg error so just take total RV error to be raddi RV error
raddi_astpy5['teff_raddi']=10**raddi_astpy5['logTeff']
raddi_astpy5['e_teff_raddi']=10**raddi_astpy5['e_logTeff']

raddi2=raddi_astpy2.to_pandas()
raddi5=raddi_astpy5.to_pandas()

#merge the two dataframes on star system identifier
raddi=pd.merge(left=raddi2[['System','SourceID1']],right=raddi5,on='System',how="inner")

#SourceID1 is Gaia EDR3 source ID of white dwarf
#rename columns
raddi=raddi.rename(columns={"SourceID1": "gaia_dr3_source_id","logg":"logg_raddi","e_logg":"e_logg_raddi",
                           "Mass":"mass_raddi","e_Mass":"e_mass_raddi","Radius":"radius_raddi"})
#raddi doesn't give errors on the radius so set the error to 0
raddi['e_radius_raddi']=np.full(len(raddi),0)

#merge the SDSS-V DA dataframe with the raddi dataframe on GAIA DR3 ID
match_tab_raddi=pd.merge(left=SDSS_DA_df,right=raddi,on='gaia_dr3_source_id',how="inner")

# Initialize an array to flag that WD is in raddi dataset
match_tab_raddi['raddi_flag'] = np.full(len(match_tab_raddi), True)

#drop all extra columns
match_tab_raddi=match_tab_raddi.drop(['ra','dec','System','logTeff','e_logTeff','taucool', 'E_taucool',
       'e_taucool', 'l_MprogC08', 'MprogC08', 'E_MprogC08', 'e_MprogC08',
       'l_tauprogC08', 'tauprogC08', 'E_tauprogC08', 'e_tauprogC08',
       'l_MprogC18', 'MprogC18', 'E_MprogC18', 'e_MprogC18', 'l_tauprogC18',
       'tauprogC18', 'E_tauprogC18', 'e_tauprogC18', 'Name', 'SpType', 'RV',
       'e_RV', 'Sample', 'Vg'],axis=1)



In [10]:
match_tab_raddi

Unnamed: 0,gaia_dr3_source_id,logg_raddi,e_logg_raddi,mass_raddi,e_mass_raddi,radius_raddi,rv_raddi,e_rv_raddi,teff_raddi,e_teff_raddi,e_radius_raddi,raddi_flag
0,1382482764934977664,8.07,0.08,0.67,0.04,0.013,24.673618,23.14,26730.064087,1.051962,0,True
1,1424795481360349824,8.48,0.05,0.90,0.03,0.009,74.178214,4.49,13899.526312,1.049542,0,True
2,1427870433130526720,7.95,0.08,0.57,0.04,0.013,11.596063,31.32,7620.790100,1.030386,0,True
3,1429017434212861312,7.88,0.06,0.58,0.03,0.015,-16.481624,56.10,29040.226545,1.039920,0,True
4,1613297915194682368,7.91,0.23,0.58,0.13,0.014,41.376832,0.00,20323.570109,1.185769,0,True
...,...,...,...,...,...,...,...,...,...,...,...,...
272,4231922059473951616,8.59,0.02,0.96,0.01,0.008,174.001857,82.60,10185.913881,1.013911,0,True
273,1733926920721491968,7.90,0.05,0.56,0.03,0.014,-5.992714,46.46,15275.660582,1.035142,0,True
274,1734034565485635072,7.85,0.06,0.54,0.03,0.014,29.587740,9.97,15100.801542,1.032761,0,True
275,1731395432637342336,7.49,0.05,0.46,0.01,0.020,6.023689,23.62,31117.163371,1.030386,0,True


# Anguiano 2017 SDSS DR12 Spectroscopic Catalog
- Paper: https://ui.adsabs.harvard.edu/abs/2017MNRAS.469.2102A/abstract
- Catalog: https://cdsarc.cds.unistra.fr/viz-bin/cat/J/MNRAS/469/2102#/browse
- Measured using SDSS DR12 spectra
    - WE FIND THE RVs FROM THIS DATASET TO BE UNRELIABLE
- Contains: RV, Teff, Logg, & Mass measurements (Logg+Mass can be used to get Radius $R=\sqrt{GM/g}$)
    - Teff: Effective temperature in K
    - e_Teff: rms uncertainty on Teff in K
    - Mass: Mass in Msun
    - e_Mass: rms uncertainty on Mass in Msun
    - logg: Surface gravity in log(cm/s^2)
    - e_logg: rms uncertainty on logg in log(cm/s^2)
    - RVfit: Radial velocity in km/s
    - e_RVfit: rms uncertainty on RVfit in km/s
    - Vgrav: Gravitational redshift in km/s

Given log$g$ and mass, the radius is (in solar units)
$$R=\frac{1}{R_\odot}\sqrt{\frac{GM_\odot m}{10^{\log{g}}}}$$

Thus, the error on $R$ is
$$dR=R\sqrt{\frac{1}{4 m^2}dm^2+\frac{1}{4}(\ln{10})^2 d(\log g)^2}$$

In [11]:
#read in the Anguiano data
ang_astpy = Table.read('data/anguiano_table1.dat.fits')

#add the gravitational redshift correction back to the RV
ang_astpy['rv_anguiano']=ang_astpy['RVfit']+ang_astpy['Vgrav'] #measured RV, without grav redshift correction
ang_astpy['e_rv_anguiano']=ang_astpy['e_RVfit']#set the RV error as just the kinematic RV error since no Vg error given

# Create a column with SkyCoord positions for easy matching
ang_astpy['SkyCoord'] = SkyCoord(ang_astpy['RAdeg'], ang_astpy['DEdeg'], unit='deg')

# Match targets within 3 arcsec
join_func = table.join_skycoord(3 * u.arcsecond)
match_tab_astpy_ang = table.join(SDSS_DA_astpy, ang_astpy, join_funcs={'SkyCoord': join_skycoord(3 * u.arcsec)})

#Convert the matched table to a pandas dataframe
match_tab_ang=match_tab_astpy_ang.to_pandas()

#rename columns
match_tab_ang=match_tab_ang.rename(columns={'Teff':'teff_anguiano','e_Teff':'e_teff_anguiano', 'Mass':'mass_anguiano',
                                            'e_Mass':'e_mass_anguiano', 'logg':'logg_anguiano', 'e_logg':'e_logg_anguiano',
                                           'S/N':'snr_anguiano'})
#calculate the radius in solar units
match_tab_ang['radius_anguiano']=(1/RsunCGS)*np.sqrt(GCGS*MsunCGS*match_tab_ang['mass_anguiano']/10**match_tab_ang['logg_anguiano'])
#calculate errors on the radius
match_tab_ang['e_radius_anguiano']=match_tab_ang['radius_anguiano']*np.sqrt(((1/(4*(match_tab_ang['mass_anguiano'])**2))*(match_tab_ang['e_mass_anguiano'])**2)+((1/4)*((np.log(10))**2)*(match_tab_ang['e_logg_anguiano'])**2))

# Initialize an array to flag that WD is in anguiano dataset
match_tab_ang['anguiano_flag'] = np.full(len(match_tab_ang), True)




In [12]:
#some of the Angiuano WDs are VERY close together with very similar derived Teff, Logg, M, and R
#these are likely the same WD
#keep only the higher Anguiano SNR observation
print(len(match_tab_ang))
duplicates_catids=[]
for i in range(len(match_tab_ang)):
    name=match_tab_ang['gaia_dr3_source_id'][i]
    if len(match_tab_ang.query('gaia_dr3_source_id== @name'))>1:
        duplicates_catids.append(name)
duplicates_catids=np.unique(duplicates_catids)

for cat_id in duplicates_catids:
    repeat_df=match_tab_ang.query('gaia_dr3_source_id== @cat_id')
    max_snr=max(repeat_df['snr_anguiano'])
    max_snr_df=repeat_df.query('snr_anguiano== @max_snr')
    #drop all entries for this WD then add back in the max_snr entry
    match_tab_ang=match_tab_ang.drop(repeat_df.index.to_list())
    match_tab_ang=match_tab_ang.append(max_snr_df)
    #reset the indices
    indices=np.arange(0,len(match_tab_ang))
    match_tab_ang.set_index(indices,inplace=True)

print(len(match_tab_ang))

1585
1567


  match_tab_ang=match_tab_ang.append(max_snr_df)
  match_tab_ang=match_tab_ang.append(max_snr_df)
  match_tab_ang=match_tab_ang.append(max_snr_df)
  match_tab_ang=match_tab_ang.append(max_snr_df)
  match_tab_ang=match_tab_ang.append(max_snr_df)
  match_tab_ang=match_tab_ang.append(max_snr_df)
  match_tab_ang=match_tab_ang.append(max_snr_df)
  match_tab_ang=match_tab_ang.append(max_snr_df)
  match_tab_ang=match_tab_ang.append(max_snr_df)
  match_tab_ang=match_tab_ang.append(max_snr_df)
  match_tab_ang=match_tab_ang.append(max_snr_df)
  match_tab_ang=match_tab_ang.append(max_snr_df)
  match_tab_ang=match_tab_ang.append(max_snr_df)
  match_tab_ang=match_tab_ang.append(max_snr_df)
  match_tab_ang=match_tab_ang.append(max_snr_df)
  match_tab_ang=match_tab_ang.append(max_snr_df)
  match_tab_ang=match_tab_ang.append(max_snr_df)
  match_tab_ang=match_tab_ang.append(max_snr_df)


In [13]:
#drop all extra columns
match_tab_ang=match_tab_ang.drop(['ra','dec','SkyCoord_id','SkyCoord_1.ra', 'SkyCoord_1.dec', 'SDSS',
       '---', 'MPJ', 'n_SDSS', 'RAdeg', 'DEdeg','Dist', 'e_Dist', 'pmRA*', 'e_pmRA*',
       'pmDE', 'e_pmDE', 'RVfit', 'e_RVfit', 'Vgrav', 'Age1', 'e_Age1',
       'E_Age1', 'Age2', 'e_Age2', 'E_Age2', 'Age3', 'e_Age3', 'E_Age3','SkyCoord_2.ra', 'SkyCoord_2.dec'],axis=1)


In [14]:
#the Anguiano catalog has two WDs where the Vgrav is wrong, remove these WDs
display(match_tab_ang.query("rv_anguiano>20000"))

#set the ang_flag=False to remove these objects from the anguiano comparison
rem_ind=match_tab_ang.query("rv_anguiano>20000").index
match_tab_ang['anguiano_flag'].loc[rem_ind]=False

Unnamed: 0,gaia_dr3_source_id,teff_anguiano,e_teff_anguiano,mass_anguiano,e_mass_anguiano,logg_anguiano,e_logg_anguiano,snr_anguiano,rv_anguiano,e_rv_anguiano,radius_anguiano,e_radius_anguiano,anguiano_flag
448,901961789498071808,23076,518,0.512,0.036,7.72,0.079,22.14,29998.02,11.94,0.016363,0.001596,True
1319,3668868374001785472,23076,321,0.512,0.021,7.72,0.046,40.98,29970.71,6.2,0.016363,0.000929,True


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  match_tab_ang['anguiano_flag'].loc[rem_ind]=False


In [15]:
match_tab_ang

Unnamed: 0,gaia_dr3_source_id,teff_anguiano,e_teff_anguiano,mass_anguiano,e_mass_anguiano,logg_anguiano,e_logg_anguiano,snr_anguiano,rv_anguiano,e_rv_anguiano,radius_anguiano,e_radius_anguiano,anguiano_flag
0,2160210705546460032,16525,118,0.535,0.014,7.830,0.028,81.98,-12.30,5.40,0.014737,0.000513,True
1,1382375154530149248,6911,119,0.577,0.154,7.971,0.240,16.39,-3.18,11.40,0.013011,0.003992,True
2,1382482764934977664,27743,348,0.662,0.038,8.050,0.065,28.94,26.07,9.24,0.012725,0.001020,True
3,1424795481360349824,14228,584,0.826,0.076,8.370,0.119,43.46,73.89,9.13,0.009834,0.001421,True
4,1428280108586688768,18120,296,0.562,0.037,7.880,0.062,29.00,20.60,11.40,0.014259,0.001121,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1562,3650517280121226496,10925,471,0.733,0.210,8.224,0.356,7.39,104.04,37.62,0.010959,0.004758,True
1563,3651439564217693952,14128,605,0.606,0.128,7.991,0.207,13.89,112.62,17.89,0.013030,0.003397,True
1564,4011063704262176128,7659,166,0.555,0.215,7.925,0.331,9.77,22.24,16.55,0.013455,0.005752,True
1565,5175652848305921792,15964,783,0.685,0.107,8.130,0.169,10.43,96.63,25.78,0.011805,0.002475,True


# Gentile 2021 Gaia EDR3 Photometric Catalog
- Paper: https://ui.adsabs.harvard.edu/abs/2021MNRAS.508.3877G/abstract
    - https://ui.adsabs.harvard.edu/abs/2021yCat..75083877G/abstract
- Catalog: https://cdsarc.cds.unistra.fr/viz-bin/cat/J/MNRAS/508/3877#/browse
- Measured using Gaia photometry
- Contains: Teff, Logg, & Mass measurements (Logg+Mass can be used to get Radius $R=\sqrt{GM/g}$)
    - TeffH: Effective temperature in K from fitting the dereddened G, BP, and RP absolute fluxes with pure-H model atmospheres
    - e_TeffH: Uncertainty on teff_H in K
    - loggH: Log of surface gravity in log(cm/s^2) from fitting the dereddened G, BP, and RP absolute fluxes with pure-H model atmospheres
    - e_loggH: Uncertainty on logg_H in log(cm/s^2)
    - MassH: Stellar mass in Msun resulting from the adopted mass-radius relation and best fit parameters
    - e_MassH: Uncertainty on mass_H in Msun
    - The validity range of the grid is 1500 < 𝑇 < 140 000 and 6.5 < log 𝑔 < 9.5
    - In our catalogue we always pick the warmer, massive solution, leading to incorrect parameters for ultra-cool white dwarfs

In [16]:
gaia_df = pd.read_csv('data/J_MNRAS_508_3877_maincat.dat.gz.txt',sep='|')
#drop first empty row
gaia_df=gaia_df.drop(0)
#drop the last row of NaNs
gaia_df=gaia_df.dropna()
#remove spaces from column names
gaia_df.columns=gaia_df.columns.str.replace(' ', '')
#reformat EDR3Name column to get just the Gaia source id
gaia_df['EDR3Name']=gaia_df['EDR3Name'].str.replace('Gaia EDR3 ', '')
gaia_df['EDR3Name']=gaia_df['EDR3Name'].str.replace(' ', '')
#keep only the relevant columns
gaia_df=gaia_df[['EDR3Name','TeffH','e_TeffH','loggH','e_loggH','MassH','e_MassH']]
#rename to GAIA_DR3_SOURCE_ID
gaia_df=gaia_df.rename(columns={"EDR3Name": "gaia_dr3_source_id","TeffH": "teff_gentile","e_TeffH": "e_teff_gentile",
                                "loggH": "logg_gentile","e_loggH": "e_logg_gentile","MassH":"mass_gentile",
                               "e_MassH":"e_mass_gentile"})

#set rows where WD parameters couldn't be measured to 0
gaia_df=gaia_df.replace(['      ---','     ---'],0)

#convert all columns to numeric values
gaia_df['teff_gentile']=pd.to_numeric(gaia_df['teff_gentile'])
gaia_df['e_teff_gentile']=pd.to_numeric(gaia_df['e_teff_gentile'])
gaia_df['logg_gentile']=pd.to_numeric(gaia_df['logg_gentile'])
gaia_df['e_logg_gentile']=pd.to_numeric(gaia_df['e_logg_gentile'])
gaia_df['mass_gentile']=pd.to_numeric(gaia_df['mass_gentile'])
gaia_df['e_mass_gentile']=pd.to_numeric(gaia_df['e_mass_gentile'])

#convert gaia_dr3_source_id column back to integers otherwise matching won't work
gaia_df['gaia_dr3_source_id']=gaia_df['gaia_dr3_source_id'].astype(int)

#merge the SDSS-V DA dataframe with the Gaia dataframe on GAIA DR3 ID
match_tab_gentile=pd.merge(left=SDSS_DA_df,right=gaia_df,on='gaia_dr3_source_id',how="inner")

#calculate the radius in solar units
match_tab_gentile['radius_gentile']=(1/RsunCGS)*np.sqrt(GCGS*MsunCGS*match_tab_gentile['mass_gentile']/10**match_tab_gentile['logg_gentile'])
#calculate errors on the radius
match_tab_gentile['e_radius_gentile']=match_tab_gentile['radius_gentile']*np.sqrt(((1/(4*(match_tab_gentile['mass_gentile'])**2))*(match_tab_gentile['e_mass_gentile'])**2)+((1/4)*((np.log(10))**2)*(match_tab_gentile['e_logg_gentile'])**2))

# Initialize an array to flag that WD is in Gentile dataset
match_tab_gentile['gentile_flag'] = np.full(len(match_tab_gentile), True)

#drop all extra columns
match_tab_gentile=match_tab_gentile.drop(['ra','dec'],axis=1)


  gaia_df = pd.read_csv('data/J_MNRAS_508_3877_maincat.dat.gz.txt',sep='|')


In [17]:
match_tab_gentile

Unnamed: 0,gaia_dr3_source_id,teff_gentile,e_teff_gentile,logg_gentile,e_logg_gentile,mass_gentile,e_mass_gentile,radius_gentile,e_radius_gentile,gentile_flag
0,2064465331787360768,19836.66,6355.97,7.943700,0.585069,0.592054,0.298462,0.013600,0.009781,True
1,2064629919226999552,17338.00,4519.11,8.137220,0.477212,0.699200,0.273944,0.011828,0.006899,True
2,2064573324440163456,39100.06,13074.69,8.328240,0.306762,0.856075,0.176891,0.010504,0.003865,True
3,2064576313737415168,18895.76,3490.50,7.993879,0.348547,0.617745,0.188651,0.013113,0.005630,True
4,2067154909024075648,18029.26,2490.83,7.929864,0.247835,0.580184,0.134709,0.013680,0.004214,True
...,...,...,...,...,...,...,...,...,...,...
8418,4317819721799528576,0.00,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,,True
8419,1819807540607001472,35921.77,9377.87,8.001683,0.399182,0.663781,0.208626,0.013471,0.006543,True
8420,1836429338917096704,10888.68,159.81,7.275531,0.042046,0.319508,0.011880,0.021562,0.001118,True
8421,1836410471114858624,29401.75,3993.43,8.499843,0.269090,0.942986,0.159434,0.009048,0.002906,True


# Koester 2009 SPY Spectroscopic Catalog
- Paper: https://ui.adsabs.harvard.edu/abs/2009A%26A...505..441K/abstract
- Catalog: https://cdsarc.cds.unistra.fr/viz-bin/cat/J/A+A/505/441#/browse
- Measured using SPY spectra
- Contains: Teff and Logg
    - Teff: Effective temperature in K
    - e_Teff: rms uncertainty on Teff in K
    - logg: Surface gravity in log(cm/s^2)
    - e_logg: rms uncertainty on logg in log(cm/s^2)
    - Since most stars have more than one spectrum observed, the parameters are the weighted averages of the individual solutions, with the inverse square of the formal 1σ uncertainties as weights. The 1σ final uncertainties given in Table 1 are obtained from the individual values and should only be used as an indicator of the quality of the data. As is well known, with spectra of the quality used here, the systematic errors from the reduction and fitting process are usually much larger than the purely statistical uncertainties. We estimated more realistic uncertainties by comparing the differences between solutions from several spectra of the same object.


In [18]:
#read in the Koester data
koester_astpy = Table.read('data/koester2009_table1.dat.fits')

#convert the RA in h, min, s to degrees
coords=[]
for i in range(len(koester_astpy)):
    hour=koester_astpy['RAh'][i]
    minit=koester_astpy['RAm'][i]
    sec=koester_astpy['RAs'][i]
    sign=koester_astpy['DE-'][i]
    deg=koester_astpy['DEd'][i]
    arcmin=koester_astpy['DEm'][i]
    arcsec=koester_astpy['DEs'][i]
    coord=SkyCoord(str(hour)+'h'+str(minit)+'m'+str(sec)+'s',str(sign)+str(deg)+'d'+str(arcmin)+'m'+str(arcsec)+'s')
    coords.append(coord)
koester_astpy['SkyCoord'] =coords

# Match targets within 3 arcsec
join_func = table.join_skycoord(3 * u.arcsecond)
match_tab_astpy = table.join(SDSS_DA_astpy, koester_astpy, join_funcs={'SkyCoord': join_skycoord(3 * u.arcsec)})

#Convert the matched table to a pandas dataframe
match_tab_koester=match_tab_astpy.to_pandas()

#rename columns
match_tab_koester=match_tab_koester.rename(columns={"Teff": "teff_koester","e_Teff": "e_teff_koester","logg": "logg_koester","e_logg": "e_logg_koester"})

#drop all extra columns
match_tab_koester=match_tab_koester.drop(['ra','dec','SkyCoord_id','SkyCoord_1.ra', 'SkyCoord_1.dec', 'n_Name', 'Name', 'RAh',
       'RAm', 'RAs', 'DE-', 'DEd', 'DEm', 'DEs', 'mag', 'n_mag', 'ONames','Nsp', 'Rem', 'SkyCoord_2.ra', 'SkyCoord_2.dec'],axis=1)

# Initialize an array to flag that WD is in Koester dataset
match_tab_koester['koester_flag'] = np.full(len(match_tab_koester), True)




In [19]:
match_tab_koester

Unnamed: 0,gaia_dr3_source_id,teff_koester,e_teff_koester,logg_koester,e_logg_koester,koester_flag
0,3632418223511334016,32936,141,7.911,0.024,True
1,2497895053130247040,7948,13,7.793,0.019,True
2,1738521539295354240,35559,79,7.681,0.011,True
3,2558916466707621504,41083,155,7.742,0.016,True
4,3796545519645331584,23942,45,7.628,0.009,True
5,3634151534873010176,20188,75,8.47,0.013,True
6,3650552739370519680,15775,72,8.025,0.014,True
7,3877952432852336128,14755,66,7.808,0.012,True
8,3874412413432647680,9486,16,8.02,0.018,True
9,3868927607051816320,22924,93,7.324,0.012,True


# Kepler 2019 SDSS DR14 Spectroscopic Catalog
- Paper: https://ui.adsabs.harvard.edu/abs/2019MNRAS.486.2169K/abstract
- Catalog: https://cdsarc.cds.unistra.fr/viz-bin/cat/J/MNRAS/486/2169
- Measured using SDSS DR14 spectra
- Contains: RVs, Teff, Logg, & Mass measurements (Logg+Mass can be used to get Radius $R=\sqrt{GM/g}$)
    - Teff: Effective temperature in K
    - e_Teff: Effective temperature error in K
    - logg: Surface gravity in log(cm/s^2)
    - e_logg: Surface gravity error in log(cm/s^2)
    - RV: Radial velocity in km/s
    - e_RV: Radial velocity error in km/s
    - Mass: Mass in Msun (with ML2/alpha=0.7 model)
    - e_Mass: Mass error in Msun

In [20]:
#read in the Kepler data
kepler_df = pd.read_csv('data/kepler_table3.dat.gz.txt',delimiter='|')
#drop the last row of NaNs and the first row of dashes
kepler_df=kepler_df.dropna()
kepler_df=kepler_df.drop([0])
#reset the indices
indices=np.arange(0,len(kepler_df))
kepler_df.set_index(indices,inplace=True)
#remove space from column names
kepler_df.columns = kepler_df.columns.str.replace(' ', '')

#turn into astropy table
kepler_astpy=Table.from_pandas(kepler_df)

#convert the SDSS name to coordinates
coords=[]
for i in range(len(kepler_astpy)):
    hour=kepler_astpy['SDSSJ'][i][:2]
    minit=kepler_astpy['SDSSJ'][i][2:4]
    sec=kepler_astpy['SDSSJ'][i][4:9]
    sign=kepler_astpy['SDSSJ'][i][9:10]
    deg=kepler_astpy['SDSSJ'][i][10:12]
    arcmin=kepler_astpy['SDSSJ'][i][12:14]
    arcsec=kepler_astpy['SDSSJ'][i][14:]
    coord=SkyCoord(str(hour)+'h'+str(minit)+'m'+str(sec)+'s',str(sign)+str(deg)+'d'+str(arcmin)+'m'+str(arcsec)+'s')
    coords.append(coord)
kepler_astpy['SkyCoord'] =coords

# Match targets within 3 arcsec
join_func = table.join_skycoord(3 * u.arcsecond)
match_tab_astpy = table.join(SDSS_DA_astpy, kepler_astpy, join_funcs={'SkyCoord': join_skycoord(3 * u.arcsec)})

#Convert the matched table to a pandas dataframe
match_tab_kepler=match_tab_astpy.to_pandas()

#rename columns
match_tab_kepler=match_tab_kepler.rename(columns={"Teff": "teff_kepler","e_Teff": "e_teff_kepler",
                                                  "logg": "logg_kepler","e_logg": "e_logg_kepler",
                                                 "RV":"rv_kepler","e_RV":"e_rv_kepler",
                                                 "Mass":"mass_kepler","e_Mass":"e_mass_kepler",
                                                 "S/N":"snr_kepler"})

#convert to floats
match_tab_kepler['teff_kepler']=match_tab_kepler['teff_kepler'].astype(float)
match_tab_kepler['e_teff_kepler']=match_tab_kepler['e_teff_kepler'].astype(float)
match_tab_kepler['logg_kepler']=match_tab_kepler['logg_kepler'].astype(float)
match_tab_kepler['e_logg_kepler']=match_tab_kepler['e_logg_kepler'].astype(float)
match_tab_kepler['rv_kepler']=match_tab_kepler['rv_kepler'].astype(float)
match_tab_kepler['e_rv_kepler']=match_tab_kepler['e_rv_kepler'].astype(float)
match_tab_kepler['mass_kepler']=match_tab_kepler['mass_kepler'].astype(float)
match_tab_kepler['e_mass_kepler']=match_tab_kepler['e_mass_kepler'].astype(float)

#calculate the radius in solar units
match_tab_kepler['radius_kepler']=(1/RsunCGS)*np.sqrt(GCGS*MsunCGS*match_tab_kepler['mass_kepler']/10**match_tab_kepler['logg_kepler'])
#calculate errors on the radius
match_tab_kepler['e_radius_kepler']=match_tab_kepler['radius_kepler']*np.sqrt(((1/(4*(match_tab_kepler['mass_kepler'])**2))*(match_tab_kepler['e_mass_kepler'])**2)+((1/4)*((np.log(10))**2)*(match_tab_kepler['e_logg_kepler'])**2))

# Initialize an array to flag that WD is in kepler dataset
match_tab_kepler['kepler_flag'] = np.full(len(match_tab_kepler), True)



In [21]:
#some of the Kepler WDs are VERY close together with very similar derived Teff, Logg, M, and R
#these are likely the same WD
#keep only the higher Kepler SNR observation
print(len(match_tab_kepler))
duplicates_catids=[]
duplicates_ind=[]
for i in range(len(match_tab_kepler)):
    name=match_tab_kepler['gaia_dr3_source_id'][i]
    if len(match_tab_kepler.query('gaia_dr3_source_id== @name'))>1:
        duplicates_catids.append(name)
        duplicates_ind.append(i)
duplicates_catids=np.unique(duplicates_catids)

for cat_id in duplicates_catids:
    repeat_df=match_tab_kepler.query('gaia_dr3_source_id== @cat_id')
    max_snr=max(repeat_df['snr_kepler'])
    max_snr_df=repeat_df.query('snr_kepler== @max_snr')
    #reset the indices
    indices=np.arange(0,len(max_snr_df))
    max_snr_df.set_index(indices,inplace=True)
    #drop all entries for this WD then add back in the max_snr entry
    match_tab_kepler=match_tab_kepler.drop(repeat_df.index.to_list())
    match_tab_kepler=match_tab_kepler.append(max_snr_df.loc[0]) #if there are multiple observations with same max snr, this will just keep first entry
    #reset the indices
    indices=np.arange(0,len(match_tab_kepler))
    match_tab_kepler.set_index(indices,inplace=True)
        

print(len(match_tab_kepler))

1556
1523


  match_tab_kepler=match_tab_kepler.append(max_snr_df.loc[0]) #if there are multiple observations with same max snr, this will just keep first entry
  match_tab_kepler=match_tab_kepler.append(max_snr_df.loc[0]) #if there are multiple observations with same max snr, this will just keep first entry
  match_tab_kepler=match_tab_kepler.append(max_snr_df.loc[0]) #if there are multiple observations with same max snr, this will just keep first entry
  match_tab_kepler=match_tab_kepler.append(max_snr_df.loc[0]) #if there are multiple observations with same max snr, this will just keep first entry
  match_tab_kepler=match_tab_kepler.append(max_snr_df.loc[0]) #if there are multiple observations with same max snr, this will just keep first entry
  match_tab_kepler=match_tab_kepler.append(max_snr_df.loc[0]) #if there are multiple observations with same max snr, this will just keep first entry
  match_tab_kepler=match_tab_kepler.append(max_snr_df.loc[0]) #if there are multiple observations with sam

In [22]:
#drop all extra columns
match_tab_kepler=match_tab_kepler.drop(['ra','dec','SkyCoord_id','SkyCoord_1.ra', 'SkyCoord_1.dec', 'P-M-F', 'SDSSJ', 'SNg',
                                          'umag', 'e_umag', 'gmag', 'e_gmag', 'rmag', 'e_rmag', 'imag', 'e_imag',
                                          'zmag', 'e_zmag', 'E(B-V)', 'PM', 'GLONGLAT', 'Type', 'DT(SP)',
                                          'd', 'z', 'Teff3D', 'logg3D', 'DTeff', 'Dlogg', 'Mass3D', 'e_',
                                          'SkyCoord_2.ra', 'SkyCoord_2.dec'],axis=1)

In [23]:
match_tab_kepler

Unnamed: 0,gaia_dr3_source_id,teff_kepler,e_teff_kepler,logg_kepler,e_logg_kepler,rv_kepler,e_rv_kepler,snr_kepler,mass_kepler,e_mass_kepler,radius_kepler,e_radius_kepler,kepler_flag
0,2160210705546460032,11483.0,23.0,8.252,0.012,0.0,0.0,000,0.000,0.000,0.000000,,True
1,1382375154530149248,6880.0,56.0,7.680,0.139,-21.0,9.0,014,0.448,0.062,0.016027,0.002794,True
2,1382482764934977664,27091.0,118.0,8.082,0.019,26.0,7.0,033,0.679,0.010,0.012421,0.000287,True
3,1424795481360349824,14590.0,153.0,8.501,0.019,79.0,6.0,037,0.909,0.012,0.008872,0.000203,True
4,1428280108586688768,17976.0,139.0,7.959,0.027,19.0,9.0,023,0.597,0.013,0.013419,0.000442,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1518,2850536087452025216,12262.0,74.0,8.177,0.030,-8.0,8.0,026,0.706,0.016,0.011353,0.000413,True
1519,3662011643397066112,11799.0,40.0,8.095,0.020,3.0,5.0,037,0.657,0.011,0.012037,0.000295,True
1520,3795787436442112896,13128.0,137.0,8.415,0.037,45.0,11.0,021,0.854,0.024,0.009494,0.000426,True
1521,5176405257856675072,10984.0,120.0,8.230,0.142,25.0,28.0,008,0.732,0.086,0.010876,0.001889,True


# Merge all of the matched dataframes into one containing all the comparison data

In [24]:
#merge the dataframes on gaia_dr3_source_id
match_tab_tmp1=pd.merge(left=match_tab_falcon,right=match_tab_raddi,on=['gaia_dr3_source_id'],how="outer")
match_tab_tmp2=pd.merge(left=match_tab_tmp1,right=match_tab_ang,on=['gaia_dr3_source_id'],how="outer")
match_tab_tmp3=pd.merge(left=match_tab_tmp2,right=match_tab_gentile,on=['gaia_dr3_source_id'],how="outer")
match_tab_tmp4=pd.merge(left=match_tab_tmp3,right=match_tab_koester,on=['gaia_dr3_source_id'],how="outer")
match_tab_tmp5=pd.merge(left=match_tab_tmp4,right=match_tab_kepler,on=['gaia_dr3_source_id'],how="outer")
#check that each gaia id has only 1 row
source_ids=match_tab_tmp5['gaia_dr3_source_id'].unique()
mults=[]
for i in range(len(source_ids)):
    source_id=source_ids[i]
    if len(match_tab_tmp5.query('gaia_dr3_source_id==@source_id'))>1:
        mults.append(source_id)
print(len(mults))


0


In [25]:
#check that the merged dataframe still has just 1 entry per object
print(len(match_tab_falcon))
print(len(match_tab_raddi))
print(len(match_tab_ang))
print(len(match_tab_gentile))
print(len(match_tab_koester))
print(len(match_tab_kepler))
print('')
print(len(match_tab_tmp5.query('falcon_flag==True'))) #add 1 since set binary flag=False
print(len(match_tab_tmp5.query('raddi_flag==True')))
print(len(match_tab_tmp5.query('anguiano_flag==True'))) #add 2 since set 2 with crazy RVs to False
print(len(match_tab_tmp5.query('gentile_flag==True')))
print(len(match_tab_tmp5.query('koester_flag==True')))
print(len(match_tab_tmp5.query('kepler_flag==True')))

15
277
1567
8423
21
1523

14
277
1565
8423
21
1523


In [26]:
#now merge with the full SDSS catalog of individual observations, not just individual objects
match_tab=pd.merge(left=SDSS_DA_df_full,right=match_tab_tmp5,on=['gaia_dr3_source_id'],how="outer")
#replace all nans in flag columns with False
match_tab['falcon_flag']=match_tab['falcon_flag'].fillna(False)
match_tab['raddi_flag']=match_tab['raddi_flag'].fillna(False)
match_tab['anguiano_flag']=match_tab['anguiano_flag'].fillna(False)
match_tab['gentile_flag']=match_tab['gentile_flag'].fillna(False)
match_tab['koester_flag']=match_tab['koester_flag'].fillna(False)
match_tab['kepler_flag']=match_tab['kepler_flag'].fillna(False)

display(match_tab)

Unnamed: 0,sdss_id,gaia_dr3_source_id,fieldid,mjd,catalogid21,catalogid25,catalogid31,ra,dec,l,...,logg_kepler,e_logg_kepler,rv_kepler,e_rv_kepler,snr_kepler,mass_kepler,e_mass_kepler,radius_kepler,e_radius_kepler,kepler_flag
0,66900799,2064465331787360768,15371,59367,4208244852,27021597767779661,63050395004738987,309.097870,39.790707,79.360954,...,,,,,,,,,,False
1,66902678,2064629919226999552,15371,59387,4208245421,27021597767780227,63050395004761024,308.850861,39.829388,79.277802,...,,,,,,,,,,False
2,66902096,2064573324440163456,15371,59367,4208256829,27021597767791601,63050395004755297,309.594055,40.293499,79.991142,...,,,,,,,,,,False
3,66902096,2064573324440163456,15371,59387,4208256829,27021597767791601,63050395004755297,309.594055,40.293499,79.991142,...,,,,,,,,,,False
4,66902096,2064573324440163456,15371,59392,4208256829,27021597767791601,63050395004755297,309.594055,40.293499,79.991142,...,,,,,,,,,,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16646,63702277,1819807540607001472,100449,60123,5321044003,27021598877377235,63050394873272002,296.992432,15.482511,53.187588,...,,,,,,,,,,False
16647,64007290,1836429338917096704,102417,60128,5381644556,27021598937878993,63050394895212270,302.195618,26.686472,65.310837,...,,,,,,,,,,False
16648,64007290,1836429338917096704,102417,60129,5381644556,27021598937878993,63050394895212270,302.195618,26.686472,65.310837,...,,,,,,,,,,False
16649,64007023,1836410471114858624,102417,60129,5381663899,27021598937898324,63050394895197599,302.599670,26.807383,65.608597,...,,,,,,,,,,False


In [27]:
#check that every row has a unique spec file
print(len(match_tab['spec_file'].unique()))

#check that the length of the matched table is the same as the DA df
print(len(SDSS_DA_df_full))

16651
16651


In [28]:
#add a flag to show if the WD has SnowWhite measurements
SnowWhite_flags=[]
for i in range(len(match_tab)):
    if match_tab['logg_snowwhite'][i]>0:
        SnowWhite_flags.append(True)
    else:
        SnowWhite_flags.append(False)
match_tab['snowwhite_flag']=SnowWhite_flags

# Save the table as a .csv file and import back in if needed

In [29]:
#sort by gaia dr3 source id
match_tab=match_tab.sort_values(by=['gaia_dr3_source_id'])
#reset the indices
indices=np.arange(0,len(match_tab))
match_tab.set_index(indices,inplace=True)

In [30]:
#save to backup directory
match_tab.to_csv('csv/SDSSV_DA_df_nb01.csv',index=False)

In [31]:
#save overall result
match_tab.to_csv('csv/SDSSV_DA_df.csv',index=False)

In [32]:
#import back in the data, either from the backup file or from the latest version of the table
backup=False
if backup==True:
    match_tab=pd.read_csv('csv/SDSSV_DA_df_nb01.csv')
else:
    match_tab=pd.read_csv('csv/SDSSV_DA_df.csv')
display(match_tab)

Unnamed: 0,sdss_id,gaia_dr3_source_id,fieldid,mjd,catalogid21,catalogid25,catalogid31,ra,dec,l,...,e_logg_kepler,rv_kepler,e_rv_kepler,snr_kepler,mass_kepler,e_mass_kepler,radius_kepler,e_radius_kepler,kepler_flag,snowwhite_flag
0,114816794,74698071455360,15049,59193,4401953037,27021597960268717,63050396551223308,44.726467,0.511271,176.115112,...,0.145,0.0,0.0,0.0,0.000,0.000,0.000000,,True,True
1,114817212,152935195517952,104621,59884,4492129808,27021598049957823,63050396551224778,45.925846,0.886115,176.967087,...,0.018,16.0,5.0,41.0,0.573,0.009,0.013530,0.000300,True,True
2,114817969,288175125714560,104585,59885,4401953415,27021597960269086,63050396551227300,44.287430,0.774349,175.361862,...,0.022,26.0,7.0,31.0,0.579,0.011,0.013507,0.000365,True,True
3,114817969,288175125714560,15049,59193,4401953415,27021597960269086,63050396551227300,44.287430,0.774349,175.361862,...,0.022,26.0,7.0,31.0,0.579,0.011,0.013507,0.000365,True,True
4,114817994,294840915182080,104585,59885,4401953649,27021597960269316,63050396551227438,44.417908,0.850883,175.420166,...,,,,,,,,,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16646,114806414,6916382546251879552,101077,59845,4367485665,27021597926212580,63050396551106108,316.439819,-1.489261,48.305843,...,,,,,,,,,False,True
16647,114810228,6916820495476689792,101059,59797,5169691564,27021598726295377,63050396551152137,312.982452,-1.913672,45.894997,...,,,,,,,,,False,True
16648,114811145,6916929484567006976,101059,59797,5169762309,27021598726365798,63050396551163720,313.308960,-1.559500,46.424793,...,,,,,,,,,False,True
16649,114811773,6917009272175089920,101077,59845,4367464136,27021597926191174,63050396551171615,315.295898,-1.861153,47.265293,...,,,,,,,,,False,True


# Make smaller datasets and save as .csv


In [33]:
#make list of all unique object ids
SDSSV_obj_ids=match_tab['gaia_dr3_source_id'].unique()
np.savetxt('csv/SDSSV_obj_ids.txt', SDSSV_obj_ids.astype(str), fmt="%s")

#break up object ids into sets of 500
#save each set of ids
set_size=400
num_id_sets=int(len(SDSSV_obj_ids)/set_size)+1
print(num_id_sets)
np.savetxt('csv/SDSSV_num_id_sets.txt', np.array([num_id_sets]), fmt="%s")
id_SDSSV_dict={}
for i in tqdm(range(num_id_sets)):
    id_SDSSV_dict['set_'+str(i)]=SDSSV_obj_ids[set_size*i:set_size*i+set_size]
    #save list
    np.savetxt('csv/SDSSV_short_sets/SDSSV_obj_ids_'+str(i)+'.txt', id_SDSSV_dict['set_'+str(i)].astype(str), fmt="%s")
    

22


100%|█████████████████████████████████████████| 22/22 [00:00<00:00, 1732.11it/s]


In [34]:
#make a .csv for each set of ids
for i in range(num_id_sets):
    obj_ids=id_SDSSV_dict['set_'+str(i)]
    #search dataframe for objects in obj_id list
    short_set=match_tab[match_tab['gaia_dr3_source_id'].isin(obj_ids)]
    #save to overall result
    short_set.to_csv('csv/SDSSV_short_sets/set_'+str(i)+'.csv',index=False)
    #save to backup
    short_set.to_csv('csv/SDSSV_short_sets/set_'+str(i)+'_nb01.csv',index=False)

### Import back in if needed

In [35]:
#import back in the data, either from the backup file or from the latest version of the table
backup=False
if backup==True:
    #read in all object id lists
    SDSSV_num_id_sets=np.genfromtxt('csv/SDSSV_num_id_sets.txt',dtype=int)
    id_SDSSV_dict={}
    for i in tqdm(range(SDSSV_num_id_sets)):
        id_SDSSV_dict['set_'+str(i)]=np.genfromtxt('csv/SDSSV_short_sets/SDSSV_obj_ids_'+str(i)+'.txt',dtype=str)
    
    #read in all data tables
    SDSSV_dict={}
    size=0
    for i in tqdm(range(SDSSV_num_id_sets)):
        SDSSV_dict['set_'+str(i)]=pd.read_csv('csv/SDSSV_short_sets/set_'+str(i)+'_nb01.csv')
        size=size+len(SDSSV_dict['set_'+str(i)])
else:
    #read in all object id lists
    SDSSV_num_id_sets=np.genfromtxt('csv/SDSSV_num_id_sets.txt',dtype=int)
    id_SDSSV_dict={}
    for i in tqdm(range(SDSSV_num_id_sets)):
        id_SDSSV_dict['set_'+str(i)]=np.genfromtxt('csv/SDSSV_short_sets/SDSSV_obj_ids_'+str(i)+'.txt',dtype=str)
    
    #read in all data tables
    SDSSV_dict={}
    size=0
    for i in tqdm(range(SDSSV_num_id_sets)):
        SDSSV_dict['set_'+str(i)]=pd.read_csv('csv/SDSSV_short_sets/set_'+str(i)+'.csv')
        size=size+len(SDSSV_dict['set_'+str(i)])

print(size)
print(len(match_tab))

100%|█████████████████████████████████████████| 22/22 [00:00<00:00, 2008.77it/s]
100%|██████████████████████████████████████████| 22/22 [00:00<00:00, 154.67it/s]

16651
16651



