In [2]:
from astropy.io import fits
import pandas as pd

#to import SDSS-V data
import fsspec
import requests
import aiohttp

# This notebook gets the sample of all SDSS-V spectra flagged by SnowWhite as DA WDs

# Get Sample of SDSS-V Spectra

The data can be accessed from SAS: https://data.sdss5.org/sas/sdsswork/

You must use the common SDSS username and password

**LATEST link to data:** (has both plate and FPS program data)

https://data.sdss5.org/sas/sdsswork/bhm/boss/spectro/redux/v6_1_0/spectra


The spectra directory is organized as follows:

    - Plate number (ex. 15000/)
        - MJD (ex. 15000/59146/)
            - spectum fits file labeled by spec-PLATE-MJD-CATALOGID.fits (ex. spec-015000-59146-4375786564.fits)
            - the most recent spAll file also has a SPEC_FILE column which gives the name of the fits file

Download and open spAll-master.fits

    - This is a table that has information about objects observed in the plate and FPS programs

In [9]:
#to open the latest version of the spAll file from the SDSS-V database without downloading it
#for now, use the downloaded version
"""
username = 'sdss5'
password = 'panoPtic-5'

file='spAll-lite-v6_1_0.fits.gz'

url='https://data.sdss5.org/sas/sdsswork/bhm/boss/spectro/redux/v6_1_0/'+file

#this is a very large file, so use with
with fits.open(url, use_fsspec=True, fsspec_kwargs={"auth":aiohttp.BasicAuth(username, password)}) as master_tab:

    #get the file info
    #master_tab.info()

    #see the table columns
    #master_tab[1].header

    # turn the important columns into a pandas dataframe
    master_df=pd.DataFrame({'CATALOGID':master_tab[1].data['CATALOGID'].tolist(),
                        'SPEC_FILE':master_tab[1].data['SPEC_FILE'].tolist(), #file name of spectrum
                        'SN_MEDIAN_ALL':master_tab[1].data['SN_MEDIAN_ALL'].tolist(), #median S/N for all good pixels in all filters
                       'MJD':master_tab[1].data['MJD'].tolist(), #observation time
                       #'GAIA_BP':master_tab[1].data['GAIA_BP'].tolist(), #BP=blue photometer
                       #'GAIA_RP':master_tab[1].data['GAIA_RP'].tolist(), #RP=red photometer
                       #'GAIA_G':master_tab[1].data['GAIA_G'].tolist(), #G passband magnitude
                       #'PARALLAX':master_tab[1].data['PARALLAX'].tolist(), #Gaia parallax
                        #'XCSAO_RV':master_tab[1].data['XCSAO_RV'].tolist(), #pipeline measured RV
                        #'XCSAO_ERV':master_tab[1].data['XCSAO_ERV'].tolist() #pipeline measured RV error
                        #'XCSAO_RXC':master_tab[1].data['XCSAO_RXC'].tolist(), #quality of fit, takes into the acount both the SNR and the faithfulness of the template match
                        #'XCSAO_TEFF':master_tab[1].data['XCSAO_TEFF'].tolist(), #pipeline measured Teff
                        #'XCSAO_ETEFF':master_tab[1].data['XCSAO_ETEFF'].tolist(), #pipeline measured Teff error
                        #'XCSAO_LOGG':master_tab[1].data['XCSAO_LOGG'].tolist(),#pipeline measured logg
                        #'XCSAO_ELOGG':master_tab[1].data['XCSAO_ELOGG'].tolist(), ##pipeline measured logg error
                       })
display(master_df)
"""

'\nusername = \'sdss5\'\npassword = \'panoPtic-5\'\n\nfile=\'spAll-lite-v6_1_0.fits.gz\'\n\nurl=\'https://data.sdss5.org/sas/sdsswork/bhm/boss/spectro/redux/v6_1_0/\'+file\n\n#this is a very large file, so use with\nwith fits.open(url, use_fsspec=True, fsspec_kwargs={"auth":aiohttp.BasicAuth(username, password)}) as master_tab:\n\n    #get the file info\n    #master_tab.info()\n\n    #see the table columns\n    #master_tab[1].header\n\n    # turn the important columns into a pandas dataframe\n    master_df=pd.DataFrame({\'CATALOGID\':master_tab[1].data[\'CATALOGID\'].tolist(),\n                        \'SPEC_FILE\':master_tab[1].data[\'SPEC_FILE\'].tolist(), #file name of spectrum\n                        \'SN_MEDIAN_ALL\':master_tab[1].data[\'SN_MEDIAN_ALL\'].tolist(), #median S/N for all good pixels in all filters\n                       \'MJD\':master_tab[1].data[\'MJD\'].tolist(), #observation time\n                       #\'GAIA_BP\':master_tab[1].data[\'GAIA_BP\'].tolist(), #BP=b

In [9]:
#to open a version of the spallv6_1_0 file that I downloaded from the SDSS-V database earlier
#this version will be missing the latest observations


#this is a very large file, so use with to open the fits file
with fits.open('data/spAll-lite-v6_1_0.fits') as master_tab:

    #get the file info
    #master_tab.info()

    #see the table columns
    #master_tab[1].header

    # turn the important columns into a pandas dataframe
    master_df=pd.DataFrame({'CATALOGID':master_tab[1].data['CATALOGID'].tolist(),
                        'SPEC_FILE':master_tab[1].data['SPEC_FILE'].tolist(), #file name of spectrum
                        'SN_MEDIAN_ALL':master_tab[1].data['SN_MEDIAN_ALL'].tolist(), #median S/N for all good pixels in all filters
                       'MJD':master_tab[1].data['MJD'].tolist(), #observation time
                       #'GAIA_BP':master_tab[1].data['GAIA_BP'].tolist(), #BP=blue photometer
                       #'GAIA_RP':master_tab[1].data['GAIA_RP'].tolist(), #RP=red photometer
                       #'GAIA_G':master_tab[1].data['GAIA_G'].tolist(), #G passband magnitude
                       #'PARALLAX':master_tab[1].data['PARALLAX'].tolist(), #Gaia parallax
                        #'XCSAO_RV':master_tab[1].data['XCSAO_RV'].tolist(), #pipeline measured RV
                        #'XCSAO_ERV':master_tab[1].data['XCSAO_ERV'].tolist() #pipeline measured RV error
                        #'XCSAO_RXC':master_tab[1].data['XCSAO_RXC'].tolist(), #quality of fit, takes into the acount both the SNR and the faithfulness of the template match
                        #'XCSAO_TEFF':master_tab[1].data['XCSAO_TEFF'].tolist(), #pipeline measured Teff
                        #'XCSAO_ETEFF':master_tab[1].data['XCSAO_ETEFF'].tolist(), #pipeline measured Teff error
                        #'XCSAO_LOGG':master_tab[1].data['XCSAO_LOGG'].tolist(),#pipeline measured logg
                        #'XCSAO_ELOGG':master_tab[1].data['XCSAO_ELOGG'].tolist(), ##pipeline measured logg error
                       })
display(master_df)


Unnamed: 0,CATALOGID,SPEC_FILE,SN_MEDIAN_ALL,MJD
0,4375786564,spec-015000-59146-4375786564.fits,18.596743,59146
1,4375786575,spec-015000-59146-4375786575.fits,4.368863,59146
2,4375787016,spec-015000-59146-4375787016.fits,5.072113,59146
3,4375787024,spec-015000-59146-4375787024.fits,3.618558,59146
4,4375787075,spec-015000-59146-4375787075.fits,2.997865,59146
...,...,...,...,...
2674651,0,spec-112361-59940-u022430.5-060419.7.fits,0.578343,59940
2674652,0,spec-112361-59940-u022514.3-042418.4.fits,0.539469,59940
2674653,0,spec-112361-59940-u022535.2-043100.8.fits,0.509017,59940
2674654,0,spec-112361-59940-u022546.6-050550.5.fits,0.602519,59940


# Get the SDSS-V WD Classifications
Download the SnowWhite classifications (allStar-SnowWhite-0.3.0-v6_0_9-1.0.fits) from

https://data.sdss5.org/sas/sdsswork/mwm/spectro/astra/0.3.0/v6_0_9-1.0/summary/

In [11]:
#to open the latest version of the SnowWhite classifications from the SDSS-V database without downloading it
#for now, use the downloaded version
"""
username = 'sdss5'
password = 'panoPtic-5'

file='allStar-SnowWhite-0.3.0-v6_0_9-1.0.fits'

url='https://data.sdss5.org/sas/sdsswork/mwm/spectro/astra/0.3.0/v6_0_9-1.0/summary/'+file

#this is a very large file, so use with
with fits.open(url, use_fsspec=True, fsspec_kwargs={"auth":aiohttp.BasicAuth(username, password)}) as SWfile:

    #get the file info
    #SWfile.info()

    #see the table columns
    #SWfile[1].header

    # turn the important columns into a pandas dataframe
    SW_df=pd.DataFrame({'CATALOGID':SWfile[1].data['CAT_ID  '].tolist(), #SDSS-V catalog identifier  
                    'RA':SWfile[1].data['RA      '].tolist(), #SDSS-V catalog right ascension (J2000) [deg]   
                    'DEC':SWfile[1].data['DEC     '].tolist(), #SDSS-V catalog declination (J2000) [deg] 
                    'GAIA_DR3_SOURCE_ID':SWfile[1].data['GAIA_DR3_SOURCE_ID'].tolist(), #Gaia (DR3) source identifier
                    #'PLX':SWfile[1].data['PLX     '].tolist(), #Gaia parallax [mas]
                    #'GAIA_V_RAD':SWfile[1].data['GAIA_V_RAD'].tolist(), #Gaia radial velocity [km/s]
                    #'GAIA_E_V_RAD':SWfile[1].data['GAIA_E_V_RAD'].tolist(), #Gaia radial velocity error [km/s]
                    #'G_MAG':SWfile[1].data['G_MAG   '].tolist(), #Gaia mean apparent G magnitude [mag]
                    #'BP_MAG':SWfile[1].data['BP_MAG  '].tolist(), #Gaia mean apparent BP magnitude [mag]
                    #'RP_MAG':SWfile[1].data['RP_MAG  '].tolist(), #Gaia mean apparent RP magnitude [mag]
                    #'XCSAO_TEFF':SWfile[1].data['XCSAO_TEFF'].tolist(), #Effective temperature from XCSAO [K]
                    #'XCSAO_E_TEFF':SWfile[1].data['XCSAO_E_TEFF'].tolist(), #Error in effective temperature from XCSAO [K] 
                    #'XCSAO_LOGG':SWfile[1].data['XCSAO_LOGG'].tolist(), #Surface gravity from XCSAO   
                    #'XCSAO_E_LOGG':SWfile[1].data['XCSAO_E_LOGG'].tolist(), #Error in surface gravity from XCSAO 
                    #'XCSAO_V_RAD':SWfile[1].data['XCSAO_V_RAD'].tolist(), #Radial velocity from XCSAO [km/s]  
                    #'XCSAO_E_V_RAD':SWfile[1].data['XCSAO_E_V_RAD'].tolist(), #Error in radial velocity from XCSAO [km/s]
                    'SNR_SW':SWfile[1].data['SNR     '].tolist(), #Mean signal-to-noise ratio
                    'WD_TYPE':SWfile[1].data['WD_TYPE '].tolist(), #White dwarf type 
                    'TEFF':SWfile[1].data['TEFF    '].tolist(), #Stellar effective temperature [K]  
                    'E_TEFF':SWfile[1].data['E_TEFF  '].tolist(), #Error in stellar effective temperature [K] 
                    'LOGG':SWfile[1].data['LOGG    '].tolist(), #Surface gravity [log10(cm/s^2)]  
                    'E_LOGG':SWfile[1].data['E_LOGG  '].tolist(), #Error in surface gravity [log10(cm/s^2)] 
                    #'V_REL':SWfile[1].data['V_REL   '].tolist() #Relative velocity [km/s] 
                    #'CONDITIONED_ON_PARALLAX':SWfile[1].data['CONDITIONED_ON_PARALLAX'].tolist(), #Parallax used to constrain solution [mas] 
                    #'CONDITIONED_ON_PHOT_G_MEAN_MAG':SWfile[1].data['CONDITIONED_ON_PHOT_G_MEAN_MAG'].tolist() #G mag used to constrain solution
                   })
display(SW_df)
"""

'\nusername = \'sdss5\'\npassword = \'panoPtic-5\'\n\nfile=\'allStar-SnowWhite-0.3.0-v6_0_9-1.0.fits\'\n\nurl=\'https://data.sdss5.org/sas/sdsswork/mwm/spectro/astra/0.3.0/v6_0_9-1.0/summary/\'+file\n\n#this is a very large file, so use with\nwith fits.open(url, use_fsspec=True, fsspec_kwargs={"auth":aiohttp.BasicAuth(username, password)}) as SWfile:\n\n    #get the file info\n    #SWfile.info()\n\n    #see the table columns\n    #SWfile[1].header\n\n    # turn the important columns into a pandas dataframe\n    SW_df=pd.DataFrame({\'CATALOGID\':SWfile[1].data[\'CAT_ID  \'].tolist(), #SDSS-V catalog identifier  \n                    \'RA\':SWfile[1].data[\'RA      \'].tolist(), #SDSS-V catalog right ascension (J2000) [deg]   \n                    \'DEC\':SWfile[1].data[\'DEC     \'].tolist(), #SDSS-V catalog declination (J2000) [deg] \n                    \'GAIA_DR3_SOURCE_ID\':SWfile[1].data[\'GAIA_DR3_SOURCE_ID\'].tolist(), #Gaia (DR3) source identifier\n                    #\'PLX\':S

In [10]:
#open the WD classification files
SWfile=fits.open('data/allStar-SnowWhite-0.3.0-v6_0_9-1.0.fits')

#SWfile.info()

#SWfile[1].header

# turn the important columns into a pandas dataframe
SW_df=pd.DataFrame({'CATALOGID':SWfile[1].data['CAT_ID  '].tolist(), #SDSS-V catalog identifier  
                    'RA':SWfile[1].data['RA      '].tolist(), #SDSS-V catalog right ascension (J2000) [deg]   
                    'DEC':SWfile[1].data['DEC     '].tolist(), #SDSS-V catalog declination (J2000) [deg] 
                    'GAIA_DR3_SOURCE_ID':SWfile[1].data['GAIA_DR3_SOURCE_ID'].tolist(), #Gaia (DR3) source identifier
                    #'PLX':SWfile[1].data['PLX     '].tolist(), #Gaia parallax [mas]
                    #'GAIA_V_RAD':SWfile[1].data['GAIA_V_RAD'].tolist(), #Gaia radial velocity [km/s]
                    #'GAIA_E_V_RAD':SWfile[1].data['GAIA_E_V_RAD'].tolist(), #Gaia radial velocity error [km/s]
                    #'G_MAG':SWfile[1].data['G_MAG   '].tolist(), #Gaia mean apparent G magnitude [mag]
                    #'BP_MAG':SWfile[1].data['BP_MAG  '].tolist(), #Gaia mean apparent BP magnitude [mag]
                    #'RP_MAG':SWfile[1].data['RP_MAG  '].tolist(), #Gaia mean apparent RP magnitude [mag]
                    #'XCSAO_TEFF':SWfile[1].data['XCSAO_TEFF'].tolist(), #Effective temperature from XCSAO [K]
                    #'XCSAO_E_TEFF':SWfile[1].data['XCSAO_E_TEFF'].tolist(), #Error in effective temperature from XCSAO [K] 
                    #'XCSAO_LOGG':SWfile[1].data['XCSAO_LOGG'].tolist(), #Surface gravity from XCSAO   
                    #'XCSAO_E_LOGG':SWfile[1].data['XCSAO_E_LOGG'].tolist(), #Error in surface gravity from XCSAO 
                    #'XCSAO_V_RAD':SWfile[1].data['XCSAO_V_RAD'].tolist(), #Radial velocity from XCSAO [km/s]  
                    #'XCSAO_E_V_RAD':SWfile[1].data['XCSAO_E_V_RAD'].tolist(), #Error in radial velocity from XCSAO [km/s]
                    'SNR_SnowWhite':SWfile[1].data['SNR     '].tolist(), #Mean signal-to-noise ratio
                    'WD_TYPE':SWfile[1].data['WD_TYPE '].tolist(), #White dwarf type 
                    'Teff_SnowWhite':SWfile[1].data['TEFF    '].tolist(), #Stellar effective temperature [K]  
                    'E_Teff_SnowWhite':SWfile[1].data['E_TEFF  '].tolist(), #Error in stellar effective temperature [K] 
                    'Logg_SnowWhite':SWfile[1].data['LOGG    '].tolist(), #Surface gravity [log10(cm/s^2)]  
                    'E_Logg_SnowWhite':SWfile[1].data['E_LOGG  '].tolist(), #Error in surface gravity [log10(cm/s^2)] 
                    #'V_REL':SWfile[1].data['V_REL   '].tolist() #Relative velocity [km/s] 
                    #'CONDITIONED_ON_PARALLAX':SWfile[1].data['CONDITIONED_ON_PARALLAX'].tolist(), #Parallax used to constrain solution [mas] 
                    #'CONDITIONED_ON_PHOT_G_MEAN_MAG':SWfile[1].data['CONDITIONED_ON_PHOT_G_MEAN_MAG'].tolist() #G mag used to constrain solution
                   })
            
#display(SW_df)

# Merge the dataframes

In [11]:
#match the master_df to the SW_df on SDSS catalog ID to add the spectrum file name to the SW_df

#the merged dataframe is longer because multiple objects have the same CATALOGID in master_df
#but different spec files. These are multiple observations of the same WD (different MJD).

SDSSV_WD_df=pd.merge(left=SW_df,right=master_df,on='CATALOGID',how="inner")

# Select only DA WDs

In [12]:
SDSS_DA_df=SDSSV_WD_df.query("WD_TYPE=='DA'")

# Save the table of SDSS-V DA WDs as a .csv file and import back in if needed
Update the version depending on if downloaded new spAll file

In [13]:
#SDSS_DA_df.to_csv('csv/SDSS_DA_df00.csv',index=False)

In [14]:
SDSS_DA_df=pd.read_csv('csv/SDSS_DA_df00.csv')
display(SDSS_DA_df)

Unnamed: 0,CATALOGID,RA,DEC,GAIA_DR3_SOURCE_ID,SNR_SnowWhite,WD_TYPE,Teff_SnowWhite,E_Teff_SnowWhite,Logg_SnowWhite,E_Logg_SnowWhite,SPEC_FILE,SN_MEDIAN_ALL,MJD
0,27021597834828397,319.599915,67.870514,2222278922912824704,3.509845,DA,17801.755859,551.948730,7.374730,0.002764,spec-101748-59846-27021597834828397.fits,2.602669,59846
1,4326216000,16.303913,85.355789,573779490120147840,15.676339,DA,17813.910156,294.154114,8.106911,0.000441,spec-015194-59243-4326216000.fits,3.239547,59243
2,4326216000,16.303913,85.355789,573779490120147840,15.676339,DA,17813.910156,294.154114,8.106911,0.000441,spec-015350-59329-4326216000.fits,11.403949,59329
3,4546314280,121.032425,21.809814,671029850488515840,13.292489,DA,6697.394531,56.627350,7.616725,0.000887,spec-015051-59204-4546314280.fits,9.331862,59204
4,27021598146874635,206.471344,-1.762060,3658556359386995328,9.797779,DA,8522.503906,10.557338,7.295166,0.001763,spec-103296-59647-27021598146874635.fits,7.058509,59647
...,...,...,...,...,...,...,...,...,...,...,...,...,...
13069,27021598149816737,141.786545,1.372557,3844028577830244608,20.629152,DA,14529.976562,65.001434,8.752006,0.000217,spec-101297-59662-27021598149816737.fits,13.991203,59662
13070,27021598085727572,100.447601,7.583184,3133656586395507200,76.639496,DA,16902.187500,230.000473,7.945325,0.000147,spec-103471-59913-27021598085727572.fits,6.399482,59913
13071,27021598085727572,100.447601,7.583184,3133656586395507200,76.639496,DA,16902.187500,230.000473,7.945325,0.000147,spec-103471-59973-27021598085727572.fits,4.549924,59973
13072,27021598085727572,100.447601,7.583184,3133656586395507200,76.639496,DA,16902.187500,230.000473,7.945325,0.000147,spec-103471-60004-27021598085727572.fits,0.484897,60004
