# Spectroscopic Classification

This notebook assigns Branch et al. 2006 style subtypes to SDSS spectra.

In [None]:
import sys
from pathlib import Path

import numpy as np
import pandas as pd
from astropy.table import Table
from matplotlib import pyplot as plt
from sndata.sdss import sako18spec
from sndata.csp import dr1

sys.path.insert(0, '../')
from phot_class.spectra import tabulate_spectral_properties, dust_map

sako18spec.download_module_data()

# Output directory for figures
fig_dir = Path('./notebook_figs/pew_measurements')
fig_dir.mkdir(exist_ok=True, parents=True)


We start by reading in measurements from external publications.

In [None]:
folatelli_13 = dr1.load_table(6).to_pandas()
branch_06 = pd.DataFrame({
    'obj_id': ['1981B', '1984A', '1986G', '1989B', '1990N', '1991M', '1991T', '1991bg', '1992A', '1994D', '1194ae', '1996X', '1997cn', '1998ag', '1998bu', '1999aw', '1999by', '1999ee', '2000cx', '2001ay', '2001el', '2002bf', '2002bo', '2002cx'],
    'pw7': [127, 204, 126, 124, 88, 137, 29, 92, 107, 96, 89, 87, 101, 78, 94, 58, 95, 82, 51, 150, 95, 171, 146, 18 ],
    'pw6': [17, 23, 33, 20, 12, 19, 0, 49, 19, 19, 7, 17, 45, 12, 16, 1, 46, 5, 2, 8, 16, 10, 11, 0]
})


Next we read in spectroscopic measurements from the analysis pipeline. We include results determined using a range of resampling sizes.

In [None]:
def read_spec_results(path):
    """Read in spectroscopic measurements from the analysis pipeline
    
    Args:
        path (str): The path of the ecsv file to read
        
    Returns:
        A Pandas DataFrame
    """
    
    spec_class = Table.read(path).to_pandas()
    spec_class.set_index(['feat_name', 'obj_id', 'date'], inplace=True)

    spec_summary = sako18spec.load_table(9).to_pandas()
    spec_summary = spec_summary[spec_summary['Type'] != 'Gal']
    spec_summary.drop_duplicates('CID', inplace=True)

    spec_summary.rename(columns={'CID': 'obj_id', 'Date': 'date'}, inplace=True)
    spec_summary.set_index(['obj_id'], inplace=True)

    combined_spec_data = spec_class.join(spec_summary)
    return combined_spec_data


In [None]:
results_dir = Path('/Users/daniel/Github/Photometric-Classification/results/')
n0_spec = read_spec_results(results_dir / 'spec_class/sdss_sako18spec_3_1_0.ecsv')
n2_spec = read_spec_results(results_dir / 'spec_class/sdss_sako18spec_3_1_2.ecsv')
n5_spec = read_spec_results(results_dir / 'spec_class/sdss_sako18spec_3_1_5.ecsv')


In [None]:
n5_spec.head()

Finally we plot the ratio of the Si features.

In [None]:
def get_colors(pw6, pw7):
    """Get the color of each point bassed on its coordinates
    
    Args:
        pw6 (ndarray): Array of EW measurements for feature 6
        pw7 (ndarray): Array of EW measurements for feature 7
        
    Returns:
        A 2d array of RGB values
    """
    
    color = np.ones((len(pw6), 4))
    color[:, 0: 3] = 0  # Default to black
    color[pw6 > 30] = 0, 0, 1, 1  # Blue
    color[(pw6 < 30) & (pw7 > 105)] = 1, 0, 0, 1  # Red
    color[pw7 < 70] = 0, 128 / 255, 0, 1  # Green

    return color

def subplot_published_classes(axis):
    """Plot pw7 vs pw6 from CSP and Branch 2006
    
    Args:
        axis (Axis): A matplotlib axis
    """

    axis.scatter(
        branch_06.pw7, 
        branch_06.pw6, 
        marker='D',
        facecolor='none', 
        edgecolor=get_colors(branch_06.pw6, branch_06.pw7),
        zorder=2)

    axis.scatter(
        folatelli_13.pW7, 
        folatelli_13.pW6, 
        marker='v',
        facecolor='none', 
        edgecolor=get_colors(folatelli_13.pW6, folatelli_13.pW7),
        zorder=2)

def plot_si_ratio(spec_data, plot_external_data=True):
    """Plot the pW6 vs pW7 silicon pEw ratios
    
    Args:
        spec_data     (DataFrame): Measurements from the analysis pipeline
        plot_external_data (bool): Whether to plot data from CSP and Branch 2006
        
    Returns:
        A matplotlib figure
        An array of matplotlib axes
    """
    
    fig, axes = plt.subplots(1, 3, figsize=(24, 8), sharex=True, sharey=True)
    si_data = spec_data[spec_data.Type.isin(['Ia', 'Ia-pec', 'Ia?'])].loc[['pW6', 'pW7']]
    for snr, axis in zip((1, 3, 5), axes.flatten()):
        good_snr = si_data[si_data.pew / si_data.pew_samperr  > snr]

        overlap = good_snr.loc['pW6'].index.join(good_snr.loc['pW7'].index)

        pw6 = good_snr.loc['pW6'].reindex(overlap).pew
        pw6_err = good_snr.loc['pW6'].reindex(overlap).pew_samperr
        pw7 = good_snr.loc['pW7'].reindex(overlap).pew
        pw7_err = good_snr.loc['pW7'].reindex(overlap).pew_samperr
        color = get_colors(pw6, pw7)

        axis.errorbar(
            x=pw7, 
            y=pw6, 
            xerr=pw7_err, 
            yerr=pw6_err, 
            linestyle='', 
            ecolor='grey', 
            color='grey',
            alpha=.3, 
            zorder=0)
        
        if plot_external_data:
            subplot_published_classes(axis)
        
        axis.scatter(pw7, pw6, color=color, zorder=1)
        axis.set_xlabel('Si ii λ6355')
        axis.set_title(f'SNR > {snr}')
        
    axes[0].set_ylabel('Si ii λ5972')        
    return fig, axes


In [None]:
fig, axes = plot_si_ratio(n0_spec)
fig.suptitle(f'0 Sampling Iterations')
axes[0].set_xlim(-10, 700)
axes[0].set_ylim(-10, 300)
plt.savefig(fig_dir / '0_iterations.pdf')
plt.show()


In [None]:
fig, axes = plot_si_ratio(n2_spec)
fig.suptitle(f'2 Sampling Iterations')
axes[0].set_xlim(-10, 500)
axes[0].set_ylim(-10, 200)
plt.savefig(fig_dir / '2_iterations.pdf')
plt.show()


In [None]:
fig, axes = plot_si_ratio(n5_spec)
fig.suptitle(f'5 Sampling Iterations')
axes[0].set_xlim(-10, 400)
axes[0].set_ylim(-10, 150)
plt.savefig(fig_dir / '5_iterations.pdf')
plt.show()
