In [1]:
# Require packages
from astropy.io import fits
import os

# [Datamodel](https://data.sdss.org/datamodel/files/SPECTRO_REDUX/specObj.html)


# Dowload data

Download from [Optical Spectra Catalog Data](https://www.sdss.org/dr14/data_access/bulk/#OpticalSpectraPer-ObjectFiles)

It says that:

> Catalogs of parameters derived from the SDSS/BOSS/SEQUELS/eBOSS optical spectra and matched to photometric data are documented on the optical spectra data page. These can be directly downloaded from the links on that page, or via wget commands. For example, to download the redshifts and classifications of all SDSS spectra (4.5 GB)

```bash
wget --spider https://data.sdss.org/sas/dr14/sdss/spectro/redux/specObj-dr14.fits
```

## [Optical API Arguments](https://dr14.sdss.org/optical/spectrum/view/data/access):

You may download lite or full version of the spectrum directly by using the URL of the flat file on the SAS:

`https://dr14.sdss.org/sas/dr14/SURVEY/spectro/redux/RUN2D/spectra/SPEC/PLATE4/spec-PLATE4-MJD-FIBERID4.fit`

**SURVEY** Should be replaced by the appropriate survey (currently either 'sdss' for DR8, 'boss' for DR9 through DR12, or 'eboss' for DR13 and later)

**RUN2D** Should be replaced by the reduction number.

**PLATE4** Should be replaced by the zero-padded, 4-digit plate number.

**MJD** Should be replaced by the MJD number.

**FIBERID4** Should be replaced by the zero-padded, 4-digit fiber number.


## The FITS file

After download the fits file. It has at least 4 HDUs: [datamodel](https://data.sdss.org/datamodel/files/BOSS_SPECTRO_REDUX/RUN2D/spectra/PLATE4/spec.html)




In [2]:
if not os.path.isfile('specObj-dr14.fits'):
    ! wget https://data.sdss.org/sas/dr14/sdss/spectro/redux/specObj-dr14.fits

# Importing the data
It includes the Redshifts and classifications of all SDSS spectra

In [3]:
#Importing the data. This takes a while. 
sd = fits.open('specObj-dr14.fits')
table = sd[1]
datatable = table.data

# Header

The header includes the column names for the datatable. 

This displays the header and we get for each column (133 total colums) of the table. The zeroth is the survey and the 68th and 69th are CLASS and SUBCLASS.

> CLASS Best spectroscopic classification ("STAR", "GALAXY" or "QSO")

> SUBCLASS Best spectroscopic subclassification

Plate is 61, MJD is 63, Fiber ID is 64, survey is 0 and RUN2D is 28.



## [Optical Spectra Data Quality Flags](http://www.sdss.org/dr12/spectro/quality/)

- Plate Quality. “good”: a good science quality plate 5 in the fitfile

- Spectrum Quality. SPECPRIMARY: set to 1 if this is the best observation of a particular position on the sky; 0 otherwise. It is column 13 of the data

        - SN_MEDIAN and SN_MedianAll
        - ZWARNING_NOQSO. 
        
 #### [ZWARNING](http://www.sdss.org/dr12/algorithms/bitmasks/#ZWARNING)
        
        
> To test if the spectrum is “good” the primary indicator to use is zWarning, which should be 0 or 16 for the data to be good with no identified problems. 

Spectra with zWarning equal to zero have no known problems. ZWarning is number 89.





## SDSS SUBCLASS

From [The Optical Spectra Catalogs](http://www.sdss.org/dr14/spectro/catalogs/)

> STAR: 
identified with a stellar template, chosen among the following subclasses: O, OB, B6, B9, A0, A0p, F2, F5, F9, G0, G2, G5, K1, K3, K5, K7, M0V, M2V,M1, M2, M3, M4, M5, M6, M7, M8, L0, L1, L2, L3, L4, L5, L5.5, L9, T2, Carbon, Carbon_lines, CarbonWD, CV

In [4]:
sd[1].header

XTENSION= 'BINTABLE'           /Binary table written by MWRFITS v1.11           
BITPIX  =                    8 /Required value                                  
NAXIS   =                    2 /Required value                                  
NAXIS1  =                 1159 /Number of bytes per row                         
NAXIS2  =              4851200 /Number of rows                                  
PCOUNT  =                    0 /Normally 0 (no varying arrays)                  
GCOUNT  =                    1 /Required value                                  
TFIELDS =                  133 /Number of columns in table                      
COMMENT                                                                         
COMMENT  *** End of mandatory fields ***                                        
COMMENT                                                                         
COMMENT                                                                         
COMMENT  *** Column names **

In [5]:
%time typesob = set([ (i[68],i[69]) for i in datatable[0:1000]] )
typesob

CPU times: user 209 ms, sys: 12 ms, total: 221 ms
Wall time: 210 ms


{('GALAXY', ''),
 ('GALAXY', 'AGN'),
 ('GALAXY', 'AGN BROADLINE'),
 ('GALAXY', 'BROADLINE'),
 ('GALAXY', 'STARBURST'),
 ('GALAXY', 'STARFORMING'),
 ('GALAXY', 'STARFORMING BROADLINE'),
 ('QSO', ''),
 ('QSO', 'AGN BROADLINE'),
 ('QSO', 'BROADLINE'),
 ('QSO', 'STARBURST BROADLINE'),
 ('STAR', 'A0'),
 ('STAR', 'A0p'),
 ('STAR', 'B9'),
 ('STAR', 'CV'),
 ('STAR', 'CarbonWD'),
 ('STAR', 'F2'),
 ('STAR', 'F5'),
 ('STAR', 'F9'),
 ('STAR', 'G0'),
 ('STAR', 'G2'),
 ('STAR', 'G5'),
 ('STAR', 'K1'),
 ('STAR', 'K3'),
 ('STAR', 'K5'),
 ('STAR', 'K7'),
 ('STAR', 'L1'),
 ('STAR', 'L4'),
 ('STAR', 'L5'),
 ('STAR', 'L5.5'),
 ('STAR', 'L9'),
 ('STAR', 'M0'),
 ('STAR', 'M2'),
 ('STAR', 'M3'),
 ('STAR', 'M4'),
 ('STAR', 'M5'),
 ('STAR', 'M6'),
 ('STAR', 'M7'),
 ('STAR', 'M8'),
 ('STAR', 'OB'),
 ('STAR', 'T2'),
 ('STAR', 'WD')}

In [6]:
%time typesstar = np.unique([ i[69] for i in datatable[0:] if i[68] == 'STAR'], return_counts=True  )
typesstar

CPU times: user 9min 4s, sys: 1.18 s, total: 9min 5s
Wall time: 9min 4s


(array(['A0', 'A0 (19510)', 'A0IVn (25642)', 'A0p', 'A1III (225180)',
        'A1Iae (12953)', 'A1V (95418)', 'A1m (78209)', 'A2II (39866)',
        'A2Ia (14489)', 'A3Iae (223385)', 'A4 (G_165-39)', 'A4V (136729)',
        'A4V (97603)', 'A4p (G_37-26)', 'A5II (34578)', 'A5Ia (17378)',
        'A6IV (28527)', 'A8V (155514)', 'A9V (154660)', 'Am (78362)',
        'B0.5Iae (185859)', 'B0.5Ibe... (187459)', 'B0IVe (5394)',
        'B1Ve (212571)', 'B2.5V (175426)', 'B2.5Ve (187811)',
        'B2III (35468)', 'B2IV-V (176819)', 'B2Iaevar (41117)',
        'B2Ve (164284)', 'B2Vne (202904)', 'B2Vne (58343)', 'B3II (175156)',
        'B3Ib/II (51309)', 'B3V (29763)', 'B3Ve (25940)', 'B5 (338529)',
        'B5III (209419)', 'B5Ib (191243)', 'B5V (173087)', 'B6',
        'B6IIIpe (109387)', 'B6IV (174959)', 'B7IVe (209409)',
        'B8III (220575)', 'B8IV (171301)', 'B8Ib (208501)', 'B9',
        'B9 (105262)', 'B9.5V+... (37269)', 'B9III (15318)',
        'B9Vn (177756)', 'CV', 'CalciumWD', 

In [7]:
typesstarzip = zip(typesstar[0],typesstar[1])
typesstarzip

[('A0', 70339),
 ('A0 (19510)', 489),
 ('A0IVn (25642)', 913),
 ('A0p', 1769),
 ('A1III (225180)', 2621),
 ('A1Iae (12953)', 165),
 ('A1V (95418)', 2075),
 ('A1m (78209)', 649),
 ('A2II (39866)', 11276),
 ('A2Ia (14489)', 190),
 ('A3Iae (223385)', 256),
 ('A4 (G_165-39)', 2834),
 ('A4V (136729)', 3314),
 ('A4V (97603)', 6163),
 ('A4p (G_37-26)', 495),
 ('A5II (34578)', 1593),
 ('A5Ia (17378)', 362),
 ('A6IV (28527)', 1391),
 ('A8V (155514)', 2906),
 ('A9V (154660)', 4299),
 ('Am (78362)', 574),
 ('B0.5Iae (185859)', 101),
 ('B0.5Ibe... (187459)', 284),
 ('B0IVe (5394)', 228),
 ('B1Ve (212571)', 237),
 ('B2.5V (175426)', 257),
 ('B2.5Ve (187811)', 46),
 ('B2III (35468)', 215),
 ('B2IV-V (176819)', 1045),
 ('B2Iaevar (41117)', 161),
 ('B2Ve (164284)', 45),
 ('B2Vne (202904)', 37),
 ('B2Vne (58343)', 176),
 ('B3II (175156)', 579),
 ('B3Ib/II (51309)', 349),
 ('B3V (29763)', 572),
 ('B3Ve (25940)', 30),
 ('B5 (338529)', 2173),
 ('B5III (209419)', 582),
 ('B5Ib (191243)', 423),
 ('B5V (1730

In [110]:
parsenames = [ i.split('(')[0].replace('.','').replace(' ','').replace('/','-') for i in typesstar[0] ]
parsenames = zip(parsenames, typesstar[0])
parsenames

[('A0', 'A0'),
 ('A0', 'A0 (19510)'),
 ('A0IVn', 'A0IVn (25642)'),
 ('A0p', 'A0p'),
 ('A1III', 'A1III (225180)'),
 ('A1Iae', 'A1Iae (12953)'),
 ('A1V', 'A1V (95418)'),
 ('A1m', 'A1m (78209)'),
 ('A2II', 'A2II (39866)'),
 ('A2Ia', 'A2Ia (14489)'),
 ('A3Iae', 'A3Iae (223385)'),
 ('A4', 'A4 (G_165-39)'),
 ('A4V', 'A4V (136729)'),
 ('A4V', 'A4V (97603)'),
 ('A4p', 'A4p (G_37-26)'),
 ('A5II', 'A5II (34578)'),
 ('A5Ia', 'A5Ia (17378)'),
 ('A6IV', 'A6IV (28527)'),
 ('A8V', 'A8V (155514)'),
 ('A9V', 'A9V (154660)'),
 ('Am', 'Am (78362)'),
 ('B05Iae', 'B0.5Iae (185859)'),
 ('B05Ibe', 'B0.5Ibe... (187459)'),
 ('B0IVe', 'B0IVe (5394)'),
 ('B1Ve', 'B1Ve (212571)'),
 ('B25V', 'B2.5V (175426)'),
 ('B25Ve', 'B2.5Ve (187811)'),
 ('B2III', 'B2III (35468)'),
 ('B2IV-V', 'B2IV-V (176819)'),
 ('B2Iaevar', 'B2Iaevar (41117)'),
 ('B2Ve', 'B2Ve (164284)'),
 ('B2Vne', 'B2Vne (202904)'),
 ('B2Vne', 'B2Vne (58343)'),
 ('B3II', 'B3II (175156)'),
 ('B3Ib-II', 'B3Ib/II (51309)'),
 ('B3V', 'B3V (29763)'),
 ('B3Ve',

Now we can create a file with the list of urls to download the SUBCLASS data. 

In [111]:
def createurlfiles(limitperclass,listtypes):
    #listsoftype = typesstar
    for index,[name,sub] in enumerate(listtypes):
        timeslist =[] #Create a list of good candidates for each class.
        if not os.path.exists('list{subclass}.txt'.format(subclass=name)):
            print(name)
            if os.path.exists('list{subclass}.txt'.format(subclass=name)):
                nameoffile = 'list{subclass}index:{index}.txt'.format(subclass=name, index=index)
            else:
                nameoffile = 'list{subclass}.txt'.format(subclass=name)
            with open(nameoffile, 'w') as f:
                    for index,i in enumerate(datatable[0:]):
                        if len(timeslist) < limitperclass:
                            if 'STAR' == i[68] and sub == i[69] and i[5] =='good' and i[89] == 0:
                                obj = i
                                timeslist.append(index)
                                #Need this for boss and eboss url
                                if obj[0] == 'boss':
                                        survey = 'eboss'
                                else:
                                        survey = obj[0]
                                api='https://dr14.sdss.org/sas/dr14/{survey}/spectro/redux/{run2d}/spectra/{plate:04d}/spec-{plate:04d}-{mjd}-{fiber:04d}.fits'.format(survey=survey,run2d=obj[28],plate=obj[61],mjd=obj[63],fiber=obj[64])
                                f.write(i[69] + ',' + api+'\n')


In [112]:
%%time
# SUBCLASS to serach for and create a list file with url to downalod per subclass
limitclass = 100;
createurlfiles(limitclass,parsenames)

B3Ib-II
B3V
B3Ve
B5
B5III
B5Ib
B5V
B6
B6IIIpe
B6IV
B7IVe
B8III
B8IV
B8Ib
B9
B95V+
B9III
B9Vn
CV
CalciumWD
Carbon
CarbonWD
Carbon_lines
F0II
F0III
F0IV
F0Ib
F0V
F2
F2III
F2V
F3-F5V
F5
F5Ib
F6II
F6III
F6Iab:
F6V
F8Ibvar
F8V
F9
F9IV
G0
G05IV
G0Ib
G0Va
G1V
G2
G3Ib
G4V
G5
G5-G6IVw
G5III+
G5Iab:
G8V
G9IV
G9Ib
K0IIIa
K0IV
K0V
K1
K1III
K1IVa
K1V
K2III
K3
K3III
K3Iab:
K3Ib
K3V
K3p
K4III
K4Iab:
K5
K5III
K5Ve
K7
L0
L1
L2
L3
L4
L5
L55
L9
Ldwarf
M0
M0III
M0V
M1
M2
M2III
M2Iab:
M2V
M3
M3III
M4
M45:III
M4II
M5
M5III
M5Iab:
M6
M6III
M7
M7IIIevar
M8
M8IIIe
M9
O
O8-O9
O8e
O95Iae
OB
T2
WD
WDcooler
WDhotter
WDmagnetic
sd:F0
sdF3
CPU times: user 10h 9min 32s, sys: 24min 3s, total: 10h 33min 35s
Wall time: 10h 50min 7s


# [Stellar parameters from SSPP](https://data.sdss.org/datamodel/files/SSPP_REDUX/ssppOut.html)

This file contains the values of [Fe/H], log10 g, and Teff computed with various methods by the SSPP pipeline.



In [None]:
if not os.path.isfile('ssppOut-dr12.fits'):
    ! wget https://data.sdss.org/sas/dr14/sdss/sspp/ssppOut-dr12.fits
sspp = fits.open('ssppOut-dr12.fits')
data = sspp[1].data
types = set([ (i[26],i[27]) for i in data[0:10000] ])
types

In [None]:
print(types)
data[0][1:100]

In [None]:
# SUBCLASS to serach for and create a list file with url to downalod per subclass
listsoftype = ['A0','CV','O'];
for sub in listsoftype:
    with open('list{subclass}.txt'.format(subclass=sub), 'w') as f:
            for i in datatable[0:3000]:
                    if 'STAR' == i[68] and sub == i[69] and i[5] =='good':
                        obj = i
                        #Need this for boss and eboss url
                        if obj[0] == 'boss':
                                survey = 'eboss'
                        else:
                                survey = obj[0]
                                api='https://dr14.sdss.org/sas/dr14/{survey}/spectro/redux/{run2d}/spectra/{plate:04d}/spec-{plate:04d}-{mjd}-{fiber:04d}.fits'.format(survey=survey,run2d=obj[28],plate=obj[61],mjd=obj[63],fiber=obj[64])
                                f.write(i[69] + ',' + api+'\n')
