
Make spectral measurements of $F_{2500}$ for new spectra

---

In [1]:
import matplotlib.pyplot as plt
from astropy.io import fits
from astropy.io.fits import Column
from astropy.table import Table
import numpy as np
import pandas as pd
import gc

In [2]:
dat = pd.read_csv("anastasia/DR14_inCSC_wPlMjdFib.csv")
dat.head()

Unnamed: 0,SDSS_NAME,RA_1,DEC_1,PLATE,MJD,FIBERID,Z,usrid,separation_2,probability,...,flux_aper_w,flux_aper_lolim_w,flux_aper_hilim_w,flux_aper_avg_b,flux_aper_avg_lolim_b,flux_aper_avg_hilim_b,flux_aper_avg_w,flux_aper_avg_lolim_w,flux_aper_avg_hilim_w,Separation
0,000009.38+135618.4,0.039086,13.938452,6177,56268,386,2.239,97,0.180244,0.576511,...,,,,2.952932e-15,1.24334e-15,4.662524e-15,,,,0.180256
1,000015.47+005246.8,0.064497,0.879677,685,52203,439,1.8516,174,0.304549,0.770882,...,,,,4.659228e-14,3.623844e-14,5.694612e-14,,,,0.304533
2,000026.29+134604.6,0.109582,13.767969,750,52235,87,0.7676,303,0.203809,0.856049,...,,,,1.381749e-13,1.18679e-13,1.56524e-13,,,,0.203793
3,000102.74+023503.2,0.261452,2.584228,7852,56987,675,0.76489,680,0.390769,0.375171,...,,,,1.297644e-13,1.113988e-13,1.481301e-13,,,,0.39076
4,000104.46+024349.3,0.268606,2.730383,4277,55506,96,0.896,698,1.020935,0.815516,...,,,,7.312133e-14,3.489882e-14,1.113438e-13,,,,1.020933


In [3]:
#limit to Amy Rankine's redshift range
z = dat[["Z"]].values.flatten()
dat[((z>=1.56)&(z<=3.5))].to_csv("anastasia/DR14_inCSC_wPlMjdFib_amyzrange.csv")

In [4]:
dat = pd.read_csv("anastasia/DR14_inCSC_wPlMjdFib_amyzrange.csv")
dat.head()

Unnamed: 0.1,Unnamed: 0,SDSS_NAME,RA_1,DEC_1,PLATE,MJD,FIBERID,Z,usrid,separation_2,...,flux_aper_w,flux_aper_lolim_w,flux_aper_hilim_w,flux_aper_avg_b,flux_aper_avg_lolim_b,flux_aper_avg_hilim_b,flux_aper_avg_w,flux_aper_avg_lolim_w,flux_aper_avg_hilim_w,Separation
0,0,000009.38+135618.4,0.039086,13.938452,6177,56268,386,2.239,97,0.180244,...,,,,2.952932e-15,1.24334e-15,4.662524e-15,,,,0.180256
1,1,000015.47+005246.8,0.064497,0.879677,685,52203,439,1.8516,174,0.304549,...,,,,4.659228e-14,3.623844e-14,5.694612e-14,,,,0.304533
2,5,000106.87+023845.9,0.278661,2.64611,4296,55499,684,1.76,716,1.421888,...,,,,3.195623e-15,6.145429e-16,5.653795e-15,,,,1.421889
3,6,000130.57+130847.3,0.377384,13.146475,5649,55912,558,3.051,981,0.339238,...,,,,2.968747e-15,2.100959e-15,3.790861e-15,,,,0.33924
4,7,000130.63+233443.5,0.377629,23.578777,6513,56543,91,2.93,983,0.151793,...,,,,2.872255e-14,2.488519e-14,3.233418e-14,,,,0.151748


In [5]:
sdss_names = dat[["SDSS_NAME"]].values.flatten()
z     = dat[["Z"]].values.flatten()
plate = dat[["PLATE"]].values.flatten()
mjd   = dat[["MJD"]].values.flatten()
fiber = dat[["FIBERID"]].values.flatten()

In [6]:
spec_path = "/Users/trevormccaffrey/Desktop/spectra/aox_082621/lite/"

In [7]:
#Will want to save arrays of each QSO's wavelength, flux, and inverse variance
names   = []
spectra = []
wave    = []
ivar    = []
N = 0

for sdss_name, red, pl, mj, fib in zip(sdss_names, z, plate, mjd, fiber):
    try:
        hdul_spec = fits.open(spec_path+"%04d/spec-%04d-%05d-%04d.fits" % (pl,pl,mj,fib))
    except FileNotFoundError:
        print(spec_path+"%04d/spec-%04d-%05d-%04d.fits NOT FOUND" % (pl,pl,mj,fib))
        continue
        
        
    #Load in data from each FITS file
    hdul_spec_data = hdul_spec[1].data
    sdss_flux   = hdul_spec[1].data["flux"]
    sdss_loglam = hdul_spec[1].data["loglam"]
    sdss_wave   = 10.**(sdss_loglam) / (1+red)  #x-axis: wavelength
    sdss_ivar   = hdul_spec[1].data["ivar"]
    
    #Want the same wavelength range for each array
    """
    wavemask   = ((sdss_wave>=1443) & (sdss_wave<=2961))
    sdss_flux  = sdss_flux[wavemask]
    #sdss_flux /= np.median(sdss_flux) #normalize spectrum
    sdss_ivar  = sdss_ivar[wavemask]
    sdss_wave  = sdss_wave[wavemask]
    """
    #hdul_spec.flush()
    del hdul_spec_data
    hdul_spec.close()
    
    #if len(sdss_flux) >= 3121:
    names.append(sdss_name)
    spectra.append(sdss_flux)
    wave.append(sdss_wave)
    ivar.append(sdss_ivar)

    N+=1
    if N%1000==0: print(N)  
    #if N>3500: break
    
names   = np.array(names)
spectra = np.array(spectra, dtype=object)
wave    = np.array(wave, dtype=object)
ivar    = np.array(ivar, dtype=object)

1000


Keep getting a too many files open error.  Not sure why because I'm closing all of them and am following the exact same process as last time I did this.  This happens after ~4000 files, so for now just deal with DR14 and DR16 objects separately.

In [8]:
len(spectra)

1371

In [9]:
def get_f2500(wave, flux):
    arg2500 = np.abs(wave-2500).argmin()
    red2500 = (wave>2500).sum()
    
    #Case 1: A 10-pixel (~2Å) window exists in the spectrum
    if red2500>=5: 
        f2500 = np.nanmedian(flux[arg2500-5:arg2500+6])
        
    #Case 2: 2500Å is in the spectrum, but just barely
    elif 0<red2500<5:
        f2500 = np.nanmedian(flux[arg2500-(10-red2500):arg2500+(red2500+1)])
        
    #Case 3: 2500Å is red of the covered wavelength range-
    #        we'll fit the continuum and extrapolate to 2500Å
    else:
        fit_region = (wave>=2015.)
        m, b = np.polyfit(wave[fit_region], flux[fit_region], 1)
        x = np.linspace(2015, 2520, 2000)
        y = m*x + b
        f2500 = y[np.abs(x-2500.).argmin()]
        
    return f2500

In [10]:
%%capture
F2500 = []
for i in range(len(wave)):
#for i in range(100):
    f2500 = get_f2500(wave[i], spectra[i])
    F2500.append(f2500)
    """
    fig = plt.figure(figsize=(4,4))
    plt.plot(wave[i], spectra[i], zorder=1)
    plt.scatter(2500., f2500, color="r", s=100, zorder=2)
    try:
        plt.ylim(f2500-4, f2500+10)
    except ValueError:
        print("Bad Object")
    plt.tight_layout()
    plt.savefig("anastasia/plots/dr14/%s.png"%names[i])
    plt.show()
    """
F2500 = np.array(F2500)

I visually inspected all of these, and I'm happy with the quality of measurements.  The trickiest ones are those that have both very poor S/N and high enough redshifts such that 2500Å is either nearly or completely redshifted out of the spectrum.  Applying a S/N>5 cut should solve this.  How exactly to define S/N?  Something with spectrum inverse variance?

In [11]:
S2N = []
for i in range(len(spectra)):
    s2n_i = np.median(spectra[i] / (1 / (ivar[i] + 1.e-20)))
    S2N.append(s2n_i)
S2N = np.array(S2N)

In [12]:
(F2500<0).sum(), (S2N<10).sum()

(29, 686)

So how many "definitely bad" objects (F2500<0) are due simply to insufficient S/N?

In [13]:
((F2500<0)&(S2N>=5)).sum()

6

Most of them.  Measurements are imperfect for now, but I think are good enough to settle for.  Of course if we get/do reconstructions, this would be very easy and much more consistent.

In [14]:
dat["F2500"] = F2500
dat["Avg S2N"] = S2N
dat[((z>=1.56)&(z<=3.5))].to_csv("anastasia/DR14_inCSC_wPlMjdFib_amyzrange.csv")