## Importing and Defining

#### GRATING = 600ZD

In [None]:
import os
import numpy as np 
from astropy.io import fits 
from smooth_kevin import smoother
import py_specrebin
import matplotlib.pyplot as plt 
from matplotlib import rc
import py_specrebin
path_name = '.'

Caution: Execute the following cell only once per run. Do not modify the ```std_out``` or ```std_err``` variables. If they are modified by accident, please restart the kernel and run the notebook from the beginning.

In [None]:
# Saving the original streams for stdout and stderr. To be used for logging output later
import sys
std_out = sys.stdout; std_err = sys.stderr

In [None]:
new_wave_600 = np.arange(4000, 11000, .65) 
new_wave_1200 = np.arange(6000, 11000, .33) 

In [None]:
def get_original_data(file_names,mask_name):
    
    tot_flux = []
    tot_wave = []
    tot_ivar = []
    
    for j in range(len(file_names)):
        #read in star data
        h_star = fits.open(path_name + '/' + 'data/{0}'.format(mask_name) + '/' + file_names[j], ignore_missing_end = True)
        
        data_star1 = h_star[1].data
        star_flux1 = data_star1['SKYSPEC'][0]
        star_wave1 = data_star1['LAMBDA'][0]
        star_ivar1 = data_star1['IVAR'][0]
        
        data_star2 = h_star[2].data
        star_flux2 = data_star2['SKYSPEC'][0]
        star_wave2 = data_star2['LAMBDA'][0]
        star_ivar2 = data_star2['IVAR'][0]
        
        
        #combine the blue and red side into one list
        star_flux = np.array(list(star_flux1) + list(star_flux2))
        star_wave = np.array(list(star_wave1) + list(star_wave2))
        star_ivar = np.array(list(star_ivar1) + list(star_ivar2))
        
        if (sum(star_flux) == 0 and sum(star_ivar) == 0 and sum(star_wave) == 0):
            file_name_split = file_names[j].split(".")
            serendip_file_name = "{0}.{1}.{2}.serendip1.{3}.{4}".format(file_name_split[0],file_name_split[1],
                                                                   file_name_split[2],file_name_split[4],file_name_split[5])
            path_to_serendip = fits.open(path_name + '/' + "data/{0}/{1}".format(mask_name,serendip_file_name))
            
            star_flux1_serendip = path_to_serendip[1].data["SKYSPEC"][0]
            star_flux2_serendip = path_to_serendip[2].data["SKYSPEC"][0]
            star_flux_serendip = np.concatenate((star_flux1_serendip,star_flux2_serendip))
            
            star_ivar1_serendip = path_to_serendip[1].data["IVAR"][0]
            star_ivar2_serendip = path_to_serendip[2].data["IVAR"][0]
            star_ivar_serendip = np.concatenate((star_ivar1_serendip,star_ivar2_serendip))
            
            star_wave1_serendip = path_to_serendip[1].data["LAMBDA"][0]
            star_wave2_serendip = path_to_serendip[2].data["LAMBDA"][0]
            star_wave_serendip = np.concatenate((star_wave1_serendip,star_wave2_serendip))
            
            tot_flux.append(star_flux_serendip)
            tot_wave.append(star_wave_serendip)
            tot_ivar.append(star_ivar_serendip)
            
            h_star.close()
        
        else:
            #add to above lists
            tot_flux.append(star_flux)
            tot_wave.append(star_wave)
            tot_ivar.append(star_ivar)

            h_star.close()
        
    return tot_flux, tot_wave, tot_ivar 

In [None]:
def rebin(fluxes, waves, ivar, grating):
    
    rbflux = []
    rbivar = []
    
    if grating == 600:
        new_wave = new_wave_600
    elif grating == 1200:
        new_wave = new_wave_1200
    
    for i in range(len(waves)):
        new_flux,new_ivar = py_specrebin.rebinspec(waves[i],fluxes[i],new_wave,ivar=ivar[i])
        new_flux_err = 1/np.sqrt(new_ivar)

        rbflux.append(new_flux)
        rbivar.append(new_ivar)
        
    return rbflux, new_wave, rbivar

In [None]:
def find_median(rebinned_flux_array):
    
    median_vals = []
    
    print(len(rebinned_flux_array))
    
    for i in range(len(rebinned_flux_array[0])):

        comp = []
        
        for array in rebinned_flux_array:
            
            if np.isfinite(array[i]) == True:
                comp.append(array[i])
                
        median_vals.append(np.median(comp))
        
    return median_vals

In [None]:
def get_exclusions():
    filepath = 'ISM_EM_LINES.txt'
    fp = open(filepath)
    all_data = []
    for line in (fp):
        mask_name = line.split(':')[0].split('_')[0]
        slit_number = line.split(':')[1].strip().split(" ")[0]
        if len(slit_number) == 2:
            slit_number = '0' + slit_number
        elif len(slit_number) == 1:
            slit_number = '00' + slit_number
        else:
            pass
        object_id = line.split(':')[1].strip().split()[1]
        data = {}
        data['mask_name'] = mask_name
        data['slit_number'] = slit_number
        data['object_id'] = object_id
        all_data.append(data)
    return all_data     

In [None]:
def get_files_to_include(folder):
    import os
    list_of_files_to_include = []
    list_of_files_to_exclude = []
    serendip_files = []
    all_file_names_in_folder = os.listdir('data/{}'.format(folder))
    y = len(all_file_names_in_folder)
    print("The number of files in the folder is {0}".format(y))
    all_data = get_exclusions()
    len_all_data = len(all_data)
    for n in range(y):
        parts_of_file_name = all_file_names_in_folder[n].split(".")
        if parts_of_file_name[0] == 'spec1d': # avoids hidden DS_Store files on my mac
            object_id = parts_of_file_name[3]
            slit_number = parts_of_file_name[2]
            mask_name = parts_of_file_name[1]
            should_include = True
            should_exclude = True
            for k in range(len_all_data):
                if ((object_id == all_data[k]['object_id']) and (slit_number == all_data[k]['slit_number']) and (mask_name == all_data[k]['mask_name'])):
                    should_include = False
                    should_exclude = True
                if 'serendip' in object_id:
                    should_include = False
                    should_exclude = False
            if should_include == True:
                list_of_files_to_include.append(all_file_names_in_folder[n])       
            elif should_exclude == True:
                list_of_files_to_exclude.append(all_file_names_in_folder[n])
            elif should_include == False & should_exclude == False:
                serendip_files.append(all_file_names_in_folder[n])
    
    print('The number of files left after exclusions is {0}'.format(len(list_of_files_to_include)))
    
    return sorted(list_of_files_to_include), sorted(list_of_files_to_exclude), sorted(serendip_files)


## Function to Save The Rebinned Data

In [None]:
#Sarthak's function as modified by Liv Gaunt
def exportToFits(rbflux, rbwave, rbivar, mask_name, file_names, incl_or_excl):

    for i in range(len(rbflux)):
            
        hdu1 = fits.PrimaryHDU() #primary HDU (empty)
        hdu1.header['INCLUDE'] = (incl_or_excl, 'Include in median calc if T') #this sets the tag for inclusion
            
        c1 = fits.Column(name='RBFLUX', array=rbflux[i], format='E')
        c2 = fits.Column(name='RBWAVE', array=rbwave, format='E') #no [i] on rbwave since it's just one array
        c3 = fits.Column(name='RBIVAR', array=rbivar[i], format='E')
        hdu2 = fits.BinTableHDU.from_columns([c1, c2, c3]) #first extensional HDU (w data)
            
        hdul = fits.HDUList([hdu1, hdu2]) #combine both HDUs into file and write it below
            
        #this part puts the files to include in one folder, and those to exclude in another
        if incl_or_excl == True:
            hdul.writeto(path_name + '/SkySubData/{0}_Rebinned/{0}_Included'.format(mask_name) + '/' + 'rebinned_{0}'.format(file_names[i]))
            
        elif incl_or_excl == False:
            hdul.writeto(path_name + '/SkySubData/{0}_Rebinned/{0}_Excluded'.format(mask_name) + '/' + 'rebinned_{0}'.format(file_names[i]))
                
        else:
            hdul.writeto(path_name + '/SkySubData/{0}_Rebinned/{0}_Serendip'.format(mask_name) + '/' + 'rebinned_{0}'.format(file_names[i]))


## Function to Save The Median

In [None]:
def exportToFitsMedian(median,mask_name):
    
    hdu1 = fits.PrimaryHDU()
        
    c1 = fits.Column(name='MEDIAN',array=median,format="E")
    hdu2 = fits.BinTableHDU.from_columns([c1])
        
    hdul = fits.HDUList([hdu1,hdu2])
        
    hdul.writeto(path_name + '/SkySubData/{0}_Median/Median_of_{0}.fits.gz'.format(mask_name))

## Median Airglow Subtraction


In [None]:
def median_subtraction(slit_index,rebinned_flux):
    
    new_flux = []
    
    spectrum = rebinned_flux[slit_index]
   
    for i in range(len(spectrum)):
        if np.isfinite(spectrum[i]) == True:
            new_flux.append(spectrum[i] - median[i])
        else:
            new_flux.append(spectrum[i])
            
    return new_flux

In [None]:
def get_slit_nums(files):
    
    slit_nums = []
    
    if len(files) > 1:
    
        for i in range(len(files)):
            parts_of_file_name = files[i].split(".")
            slit_num = parts_of_file_name[2]
            slit_nums.append(int(slit_num))
            
    return slit_nums

In [None]:
def find_slit_index(slit_nums,slit_num): 
    return slit_nums.index(slit_num)

In [None]:
def exportToFitsSkySub(mask_name,slit_nums,rebinned_flux,incl_or_excl):
    
    '''
    
    Parameters
    ----------
    mask_name : str, required
        Name of mask.
    slit_nums : list, required
        List of slit number. Use slit_nums for 'include' slits or 
        slit_nums_exclude for 'exclude' slits.
    rebinned_flux : list, required
        A list containing arrays of rebinned flux. 
        Use rbflux for 'include' slits or rbflux_exclude 
        for 'excluded' slits.
    incl_or_excl: bool, required
        Use True for 'include' slits or False for 'exclude' slits.
        

    '''
    
    if incl_or_excl == True:
        
        for slit in slit_nums:
            index = find_slit_index(slit_nums,slit)
            skysub_spectrum = median_subtraction(index,rebinned_flux)
            
            
            hdu1 = fits.PrimaryHDU()
            c1 = fits.Column(name="SKYSUB_SPECTRUM",array=skysub_spectrum,format="E")
            hdu2 = fits.BinTableHDU.from_columns([c1])
            hdul = fits.HDUList([hdu1,hdu2])
            hdul.writeto(path_name + "/SkySubData/{0}_SkySub_Spectrum/{0}_Included/Slit_{1}_SkySub_Spectrum.fits.gz".format(mask_name,slit))
            
        
    elif incl_or_excl == False:
        
        for slit in slit_nums:
            index = find_slit_index(slit_nums,slit)
            skysub_spectrum = median_subtraction(index,rebinned_flux)
            
            
            hdu1 = fits.PrimaryHDU()
            c1 = fits.Column(name="SKYSUB_SPECTRUM",array=skysub_spectrum,format="E")
            hdu2 = fits.BinTableHDU.from_columns([c1])
            hdul = fits.HDUList([hdu1,hdu2])
            hdul.writeto(path_name + "/SkySubData/{0}_SkySub_Spectrum/{0}_Excluded/Slit_{1}_SkySub_Spectrum.fits.gz".format(mask_name,slit))
    

## Define The Mask

In [None]:
mask_name = "M33D2A" #change to fit the appropriate mask 

## Define The Grating

In [None]:
grating = 600 #change between 600 and 1200

## Getting Files We Want to Include and Exclude

In [None]:
#filtering files
list_of_files_to_include, list_of_files_to_exclude, list_of_serendip_files = get_files_to_include(mask_name)

#sorted
#file_names = all slits used to create the median (airglow)
#file_names_exclude = all slits that contain ISM emission lines 
#file_names_serendip = all serendip files
#file_names_all = all slits excluding "serendip"

file_names = list_of_files_to_include
file_names_exclude = list_of_files_to_exclude
file_names_serendip = list_of_serendip_files
file_names_all = list_of_files_to_include + list_of_files_to_exclude

## Extracting The Wavelength, Flux, and Inverse Variance

Make sure to comment out the codes in this section after you have rebinned and saved your data!!!

Then make sure to uncomment them whenever you're working with a new mask and want to rebin!!!

In [None]:
#getting data
#try getting and rebinning all files
flux, wave, ivar = get_original_data(file_names, mask_name) 

In [None]:
#rebinning the original data
rbflux, rbwave, rbivar = rebin(flux, wave, ivar, grating) # this takes about 4 minutes to run

In [None]:
#getting all excluded data
flux_exclude, wave_exclude, ivar_exclude = get_original_data(file_names_exclude, mask_name)

In [None]:
#rebinning the excluded data
rbflux_exclude, rbwave_exclude, rbivar_exclude = rebin(flux_exclude, wave_exclude, ivar_exclude, grating)

In [None]:
#getting all serendip data 
#NOTE: we will never use it but is good to just process it
flux_serendip, wave_serendip, ivar_serendip = get_original_data(list_of_serendip_files, mask_name)

In [None]:
#rebinning the serendip da
rbflux_serendip, rbwave_serendip, rbivar_serendip = rebin(flux_serendip, wave_serendip, ivar_serendip, grating)

## Making Directories

In [None]:
paths = [#make three folders to store the rebinned data, the median, and the spectra
        "./SkySubData/{0}_Rebinned".format(mask_name),
        "./SkySubData/{0}_Median".format(mask_name),
        "./SkySubData/{0}_SkySub_Spectrum".format(mask_name),
    

        #make sub-folders for rebinned data
        "./SkySubData/{0}_Rebinned/{0}_Excluded".format(mask_name),
        "./SkySubData/{0}_Rebinned/{0}_Included".format(mask_name),
        "./SkySubData/{0}_Rebinned/{0}_Serendip".format(mask_name),
        "./SkySubData/{0}_SkySub_Spectrum/{0}_Excluded".format(mask_name),
        "./SkySubData/{0}_SkySub_Spectrum/{0}_Included".format(mask_name)]
      
for path in paths:
    try: 
        os.makedirs(path)
    except OSError:
        if not os.path.isdir(path):
            raise

## Save Rebin Data

In [None]:
exportToFits(rbflux, rbwave, rbivar, mask_name, file_names, True) 
exportToFits(rbflux_exclude, rbwave_exclude, rbivar_exclude, mask_name, file_names_exclude, False)
exportToFits(rbflux_serendip, rbwave_serendip, rbivar_serendip, mask_name, file_names_serendip, None)

## Finding The Median 

In [None]:
#taking the median
median = find_median(rbflux) #median length is 10770 (M33D2A)

## Saving Median As FITS

In [None]:
exportToFitsMedian(median,mask_name)

## Slits to Include and Exclude

In [None]:
slit_nums = get_slit_nums(file_names)
slit_nums_exclude = get_slit_nums(file_names_exclude)

all_slit_nums = get_slit_nums(file_names_all)

print("Slit # to INCLUDE in median calculation: {0}".format(slit_nums))
print("Slit # to EXCLUDE: {0}".format(slit_nums_exclude))

## Perform Airglow Subtraction and Save Spectrum as FITS File

In [None]:
exportToFitsSkySub(mask_name,slit_nums,rbflux,True) #saving 'included' sloits as FITS files
exportToFitsSkySub(mask_name,slit_nums_exclude,rbflux_exclude,False) #saving 'excluded' slits as FITS files

## Moving Median

In [None]:
from scipy.ndimage import median_filter

def moving_median(a, size=325):
    
    '''
    Returns the moving median values of the array,
    using a window of a given size, centered at
    each point.
    
    Version - 4.0
    
    Parameters
    ----------
    a : ndarray
        One dimensional flux array.
    window : int, optional
        The size of each segment for taking the median.
        
    Returns
    ----------
    median_arr : One dimensional array of moving median.
    
    '''
        
    all_indices = np.arange(len(a))
    finite_bool = np.isfinite(a)
    nan_indices = all_indices[np.invert(finite_bool)]
    nan_indices_set = set(nan_indices)
    n = len(finite_bool)

    if (nan_indices_set=={0,n} or nan_indices_set=={0} or nan_indices_set=={n}):
        
        finite_indices = all_indices[finite_bool]
        nearest_finite_indices = np.searchsorted(finite_indices, nan_indices)
        nearest_finite_indices = nearest_finite_indices - (nearest_finite_indices==len(finite_indices))
        a[nan_indices] = a[finite_indices[nearest_finite_indices]][:]
        median_arr = median_filter(a, size, mode='nearest')

    elif (len(nan_indices_set)==0):
        
        median_arr = np.nan*np.ones(len(a))

    else:
        
        if True not in finite_bool:
            median_arr = np.nan*np.ones(len(a))
            
        else:
            finite_indices = all_indices[finite_bool]
            nearest_finite_indices = np.searchsorted(finite_indices, nan_indices)
            gap_indices = ((nearest_finite_indices>0) & (nearest_finite_indices<len(finite_indices)))
            middle_nan_indices = nan_indices[gap_indices]
            right_nearest_indices = finite_indices[nearest_finite_indices[gap_indices]]
            left_nearest_indices = finite_indices[nearest_finite_indices[gap_indices] - 1]
            right_distances = abs(right_nearest_indices - middle_nan_indices)
            left_distances = abs(left_nearest_indices - middle_nan_indices)
            right_is_near_bool = (left_distances > right_distances)
            left_is_near_bool = (left_distances <= right_distances)
            a[middle_nan_indices[right_is_near_bool]] = a[right_nearest_indices[right_is_near_bool]][:]
            a[middle_nan_indices[left_is_near_bool]] = a[left_nearest_indices[left_is_near_bool]][:]
            a[nan_indices[nearest_finite_indices==0]] = a[finite_indices[0]]
            a[nan_indices[nearest_finite_indices==len(finite_indices)]] = a[finite_indices[-1]]
            median_arr = median_filter(a, size, mode='nearest')
    
    return (median_arr)

## FITS Files for Marz 

In [None]:
def exportToFitsMarz(mask_name,slit_nums,grating,rebinned_flux,rebinned_ivar,median,min_wave,max_wave):
    
    '''
    
    Parameters
    ----------
    mask_name : str, required
        Name of mask.
    slit_nums : list, required
        List of slit number. 
    grating: int, required
        600 or 1200
    rebinned_flux : list, required
        A list containing arrays of rebinned flux. 
    rebinned_ivar: list, required
        A list containing arrays of rebinned inverse 
        variance.
    median: list, required
        Median of all included slits. Sky background.
    min_wave: float or int, required
        Left limit of the wavelength cutout.
    max_wave: float or int, required
        Right limit of the wavelength cutout.

    '''
    
    # GETTING 'WAVELENGTH' 2D ARRAY
    if grating == 600:
        wavelength_array = new_wave_600
    elif grating == 1200:
        wavelength_array = new_wave_1200
        
    wave_cutout_boolean = ((wavelength_array>=min_wave) & (wavelength_array<=max_wave))
    
    array2d_wavelength = [wavelength_array[wave_cutout_boolean]] * len(slit_nums) #duplicating same array by number of excluded slits
    
    
    #GETING 'INTENSITY' 2D ARRAY
    array2d_intensity = [] #empty list to stores rbflux 
    
    for slit in slit_nums: #sky subtraction and moving to empty list
        index = slit_nums.index(slit)
        skysub_spectrum = rebinned_flux[index] - median
#         skysub_spectrum = skysub_spectrum - moving_median(skysub_spectrum-median, size=325)
        skysub_spectrum = skysub_spectrum - moving_median(skysub_spectrum, size=325)
        array2d_intensity.append(np.array(skysub_spectrum)[wave_cutout_boolean])
            
    #GETTING 'VARIANCE' 2D ARRAY
    array2d_variance = [] #empoty list to stores variance
    for inv_var in rebinned_ivar: #getting var from ivar and moving to empty list
        var_list = []
        for value in inv_var:
            if value == 0:
                var_list.append(value)
            else:
                var_list.append(1/value)
        array2d_variance.append(np.asarray(var_list)[wave_cutout_boolean])
        
    #GETTING 'SKY BACKGROUND' 2D ARRAY
    array2d_sky = [np.array(median)[wave_cutout_boolean]] * len(slit_nums) #duplicating same array by number of excluded slits
#     array2d_sky = [np.zeros(len(wavelength_array))[wave_cutout_boolean]] * len(slit_nums)
    
    #WRITE TO FITS
    hdu0 = fits.PrimaryHDU()
    hdu1 = fits.ImageHDU(data=array2d_intensity,name="INTENSITY") #Image HDU containing intensity
    hdu2 = fits.ImageHDU(data=array2d_variance,name="VARIANCE") #Image HDU containing variance
    hdu3 = fits.ImageHDU(data=array2d_sky,name="SKY") #Image HDU containing sky background
    hdu4 = fits.ImageHDU(data=array2d_wavelength,name="WAVELENGTH") #Image HDU containing wavelength
    c1 = fits.Column(name="NAME",array = slit_nums,format="E") 
    hdu5 = fits.BinTableHDU.from_columns([c1]) #BinTable HDU containing info about slits (ex. ID,Dec,RA...)
    #BinTableHDU isn't needed for calculation. It is use to label slits.
    hdul = fits.HDUList([hdu0,hdu1,hdu2,hdu3,hdu4,hdu5])
    hdul.writeto("./SkySubData/{0}_Marz.fits".format(mask_name))

In [None]:
exportToFitsMarz(mask_name,slit_nums_exclude,grating,rbflux_exclude,rbivar_exclude,median,6500,6800)