# SDSS Spectra Viewer

I designed this viewer to plot spectra from the SDSS database for my research with Scott Anderson. As of now, it does not access the SDSS database to gather the spectra for me, but it does load and display the spectra from FITS files. In addition, I created tags that are stored in a CSV file for notable features of the spectra (different sorts of broad lines and whether or not it's a changing-look quasar candidate).

In [380]:
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.models import Title
from bokeh.layouts import gridplot, widgetbox
import pandas as pd
import astropy.io.fits as fits
import astropy.units as u
import numpy as np
from bokeh.models.widgets import RadioButtonGroup, CheckboxGroup, TextInput, Button
import glob

output_notebook()
pd.options.mode.chained_assignment = None

The code below first reads in the FITS files, extracts the flux data, and extracts the headers. It then creates two figure objects which have every spectra plotted on it at the same time. In order to create the illusion of scrolling, I defined two arrays: the first contains spectra 1 - 13 and the second contains 2 - 14. The code iterates through these and makes the desired spectra visible in the callbacks near the bottom. Because of cosmic rays and noise near the edges of the spectra, I suggest zooming in (the button second from the left) by drawing a box that just incloses the data when you first run the cell below. The scale will be much easier on the eyes if you do that. The checkboxes and text input modify a pandas dataframe, which is currently a sort of "test" dataframe with a length of 1. Clicking on the checkboxes modifies the dataframe immediately. To change the comments, you must hit enter. Any modifications can be saved at the end by selecting "Save as CSV."

In [381]:
def reader():
    """ Reads in the data. """
    files_list = glob.glob('data/*.fits')
    file_headers = [fits.getheader(file) for file in files_list]
    dates = [file_headers[i]['MJD'] for i in range(len(file_headers))]
    names = [str(file_headers[i]['PLATEID'])+ ' ' +str(file_headers[i]['MJD'])+ ' ' 
             +str(file_headers[i]['FIBERID']) 
             for i in range(len(file_headers))]
    table = pd.DataFrame({'file': files_list,
                          'name': names,
                          'dates': dates})
    table = table.sort_values(by = ['dates']).reset_index()
    files_data = []
    for i in range(0,14):
        data = fits.open(table['file'][i])
        files_data.append(data[1].data)
        data.close()
    fluxes = []
    lambs = []
    chron_names = []
    for i in range(0,14):
        fluxes.append(files_data[i]['flux'] * 10**-17 * u.Unit('erg cm-2 s-1 AA-1'))
        lambs.append(10**files_data[i]['loglam'] * u.AA)
        chron_names.append(table.iloc[i,3])
    return fluxes, lambs, chron_names

def plotter():
    """ Creates two figures for the flux data. """
    plot_options = dict(plot_width = 450, plot_height = 300)
    spec1 = figure(**plot_options)
    spec1.xaxis.axis_label = 'Wavelength (Å)'
    spec1.yaxis.axis_label = 'Flux (erg cm^-2 s^-1 Å^-1)'
    spec2 = figure(x_range = spec1.x_range, y_range = spec1.y_range, **plot_options)
    spec2.xaxis.axis_label = 'Wavelength (Å)'
    spec2.yaxis.axis_label = 'Flux (erg cm^-2 s^-1 Å^-1)'
    return spec1, spec2

def set_visible(i):
    spec1.title.text = chron_names[i]
    spec2.title.text = chron_names[i+1]
    spectra_1[i].visible = True
    spectra_2[i].visible = True
    
def set_invisible(i):
    spectra_1[i].visible = False
    spectra_2[i].visible = False

fluxes, lambs, chron_names = reader()
spec1, spec2 = plotter()

spectra_1 = []
spectra_2 = []

for i in range(len(fluxes)-1):
    spectra_1.append(spec1.line(lambs[i], fluxes[i], line_width = 0.5))
    spectra_2.append(spec2.line(lambs[i+1], fluxes[i+1], line_width = 0.5))
    spectra_1[i].visible = False
    spectra_2[i].visible = False

# Dataframe to be modified within the widget.
data = {'CLQ Candidate': [False], 'Broad OIII': [False], 'Other broad lines': [False],
     'Comments': 'None'}
df = pd.DataFrame(data=test_data, columns = ['CLQ Candidate', 'Broad OIII','Other broad lines', 'Comments'])

# Widget preamble.
i = 0
spec1.title = Title()
spec2.title = Title()
set_visible(i)

def output_app(doc):
    """ Outputs the interactive portion of the viewer. """
    
    comments = TextInput(value = None, title = "Comments")
    def text_callback(attr, old, new):
        """ Modifies the 'comments' column in the dataframe for this round of spectra. """
        df['Comments'][0] = new
    comments.on_change('value', text_callback)
    
    checkboxes = CheckboxGroup(labels = ['CLQ Candidate', 'Broad OIII', 'Other broad lines'])
    def checkbox_callback(new):
        """ Modifies the three 'label' columns in the dataframe for this round of spectra. """
        mod = [False, False, False]
        if len(new) > 0:
            for n in range(len(new)):
                mod[new[n]] = True
        for n in range(len(mod)):
            df.iloc[0,n] = mod[n]
    checkboxes.on_click(checkbox_callback)
    
    next_plot = Button(label = 'Next', button_type = 'primary')
    def next_callback():
        """ Selects the next two plots to be displayed. """
        global i
        set_invisible(i)
        i = i + 1
        if i > len(spectra_1) - 1:
            i = 0
        set_visible(i)
    next_plot.on_click(next_callback)
    
    previous_plot = Button(label = 'Previous', button_type = 'primary')
    def previous_callback():
        """ Selects the previous two plots to be displayed. """
        global i
        set_invisible(i)
        i = i - 1
        if i < 0:
            i = len(spectra_1) - 1
        set_visible(i)
    previous_plot.on_click(previous_callback)
                
    savebutton = Button(label = 'Save as CSV', button_type = 'success')
    def save():
        """ Saves the data as a CSV file. """
        df.to_csv('spectra.csv')
    savebutton.on_click(save)
    
    doc.add_root(gridplot([[checkboxes, savebutton], [comments, previous_plot, next_plot], [spec1, spec2]]))

show(output_app, notebook_url = "http://localhost:8888")