# SNID Results

This notebook explores SDSS typing results from SNID.

In [None]:
import sys
from pathlib import Path

import numpy as np
import pandas as pd
import seaborn as sn
from matplotlib import pyplot as plt
from astropy.table import Table

sys.path.insert(0, '../scripts')
from run_snid_typing import sdss_data_iter

results_dir = Path('.').resolve().parent / 'results'
fig_dir = Path('./notebook_figs/snid')
fig_dir.mkdir(exist_ok=True, parents=True)


We define some plotting functions ahead of time.

In [None]:
def plot_snid_template_num(data, filt=None, colname='Type'):
    """Plot distributions for the number of template matches
    
    Args:
        data (DataFrame): DataFrame with ``type`` column
        filt       (str): Only plot types with this argument in the name
        colname    (str): Use a column other than ``type``
    """
    
    fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(9, 3))

    for classification, class_data in data.groupby(colname):
        if filt and filt not in classification:
            continue
            
        ax1.hist(class_data.nType, bins=np.arange(0, 100, 5), alpha=.5)
        ax1.set_xlabel('Number of Templates')
        ax1.set_ylabel('Number of Targets')

        ax2.hist(class_data.percType, bins=np.arange(0, 1.1, .1), alpha=.5)
        ax2.set_xlabel('Percentage of Templates')
        ax2.set_ylabel('Number of Targets')

        ax3.scatter(
            class_data.nType, class_data.percType, 
            label=classification, s=8, alpha=.5, zorder=-len(class_data))
        
        ax3.set_xlabel('Number of Templates')
        ax3.set_ylabel('Percentage of Templates')

    fig.legend(bbox_to_anchor=(1.1, 1.1))    
    fig.tight_layout()


def plot_confusion_matrix(data, colname='Type'):
    """Plot a confusion matrix comparing SDSS and SNID classifications
    
    Args:
        data (DataFrame): DataFrame with ``colname`` column
        colname    (str): Use a column other than ``'Type'``
    """
    
    from sndata.sdss import sako18
    master = sako18.load_table('master')
    master['objID'] = np.array(master['CID'], dtype=float)
    sdss_classifications = master[['objID', 'Classification']].to_pandas(index='objID')
    
    joined_data = data.join(sdss_classifications)

    confusion_matrix = pd.crosstab(
        joined_data['Classification'], joined_data[colname], 
        rownames=['SDSS'], colnames=['SNID'])

    sn.heatmap(confusion_matrix, annot=True, vmax=30, cmap="Blues", fmt='g')
    plt.tight_layout()
    

## SN Typing

Results for SNIDtop level types (`Ia`, `Ib`, `Ic`, `II`, and `NotSN`)


In [None]:
def read_peak_type(path):
    """Return the type summary from an SNID output file

    Args:
        path (str, Path): Path to read

    Returns:
         An astropy Table
    """

    names = ['type', 'ntemp', 'fraction', 'slope', 'redshift',
             'redshift_error', 'age', 'age_error']

    data = Table.read(
        str(path), header_start=4, data_start=4,
        data_end=28, format='ascii.basic', names=names
    ).to_pandas(index='type')

    # Calculate percentage of templates used for each type
    # Total matched templates equals the sum of matches for the parent types
    type_names = ['Ia', 'Ib', 'Ic', 'II', 'NotSN']
    peak_type = data.loc[type_names].ntemp.idxmax()
    ntemp = data.loc[peak_type].ntemp
    total_templates = data.loc[type_names].ntemp.sum()
    percent_templates =  ntemp / total_templates * 100

    return peak_type, ntemp, percent_templates


def compile_peak_types(snid_dir):
    """Get peak types from all output files from a previous SNID run

    Args:
        snid_dir (Path): Directory of SNID outputs

    Returns:
        A DataFrame indexed by object ID
    """
    
    if not snid_dir.exists():
        raise FileNotFoundError(f'Results directory DNE: {snid_dir}')

    rows = []
    for path in snid_dir.glob('*snid.output'):
        obj_id, phase, *_ = path.name.split('_')
        peak_type, ntemp, percent_templates = read_peak_type(path)
        rows.append([int(obj_id), float(phase), peak_type, ntemp, percent_templates])

    type_data = pd.DataFrame(
        rows,
        columns=['objId', 'Phase', 'Type', 'nType', 'percType',])

    # Keep only the spectra nearest peak
    type_data['abs_phase'] = type_data.Phase.abs()
    type_data = type_data.sort_values('abs_phase', ascending=True)
    type_data = type_data.drop_duplicates(keep='first', subset='objId')
    type_data = type_data.sort_values('objId', ascending=True)

    return type_data.set_index('objId')


In [None]:
types_rlap_5 = compile_peak_types(results_dir / 'snid' / 'type_rlap_5')

plot_snid_template_num(types_rlap_5)
types_rlap_5.Type.value_counts()


In [None]:
types_rlap_10 = compile_peak_types(results_dir / 'snid' / 'type_rlap_10')

plot_snid_template_num(types_rlap_10)
types_rlap_10.Type.value_counts()


In [None]:
final_types = types_rlap_5.copy()
final_types.update(types_rlap_10)

plot_snid_template_num(final_types)
final_types.Type.value_counts()


In [None]:
plot_confusion_matrix(final_types)
plt.savefig(fig_dir / 'type_confusion_matrix.pdf')


## SN Sub-Typing

SNID results for sub-categories.

In [None]:
def read_peak_subtype(path):
    """Return the type summary from an SNID output file

    Args:
        path (str, Path): Path to read

    Returns:
         An astropy Table
    """

    names = ['type', 'ntemp', 'fraction', 'slope', 'redshift',
             'redshift_error', 'age', 'age_error']

    data = Table.read(
        str(path), header_start=4, data_start=4,
        data_end=28, format='ascii.basic', names=names
    ).to_pandas(index='type')
    
    # Get the subtype with the most matches. Make sure the subtype 
    # is not equally as "good" a match as the second best subtype
    sn_type, subtype, second_subtype = data.ntemp.nlargest(3).index
    assert data.loc[subtype].ntemp != second_subtype
    
    subtype_record = data.loc[subtype]
    type_record = data.loc[sn_type]
    perc_temp = subtype_record.ntemp / type_record.ntemp * 100

    return subtype_record.redshift, subtype, subtype_record.ntemp, perc_temp


def compile_peak_subtypes(snid_dir):
    """Get peak subtypes from all output files from a previous SNID run

    Args:
        snid_dir (Path): Directory of SNID outputs

    Returns:
        A DataFrame indexed by object ID
    """
    
    if not snid_dir.exists():
        raise FileNotFoundError(f'Results directory DNE: {snid_dir}')
    
    rows = []
    for path in snid_dir.glob('*snid.output'):
        obj_id, phase, *_ = path.name.split('_') 
        rows.append([int(obj_id), float(phase), *read_peak_subtype(path)])

    type_data = pd.DataFrame(
        rows,
        columns=['objId', 'Phase', 'redshift', 'Type', 'nType', 'percType'])

    # Keep only the spectra nearest peak
    type_data['abs_phase'] = type_data['Phase'].abs()
    type_data = type_data.sort_values('abs_phase', ascending=True)
    type_data = type_data.drop_duplicates(keep='first', subset='objId')
    type_data = type_data.drop('abs_phase', axis='columns')

    type_data = type_data.sort_values('objId')
    return type_data.set_index('objId')


We consider results with a minimum rlap of 10 and 5. We also consider the combination of the results.

In [None]:
subtypes_rlap_5_path = results_dir / 'snid' / 'subtype_rlap_5'
subtypes_rlap_5 = compile_peak_subtypes(subtypes_rlap_5_path)

plot_snid_template_num(subtypes_rlap_5, 'Ia')
subtypes_rlap_5.Type.value_counts()


In [None]:
subtypes_rlap_10_path = results_dir / 'snid' / 'subtype_rlap_10'
subtypes_rlap_10 = compile_peak_subtypes(subtypes_rlap_10_path)

plot_snid_template_num(subtypes_rlap_10, 'Ia')
subtypes_rlap_10.Type.value_counts()


In [None]:
combined_subtypes = subtypes_rlap_5.copy()
combined_subtypes.update(subtypes_rlap_10)

plot_snid_template_num(combined_subtypes, 'Ia')
combined_subtypes.Type.value_counts()


In [None]:
plt.figure(figsize=(8, 6))
plot_confusion_matrix(combined_subtypes)
plt.savefig(fig_dir / 'subtype_confusion_matrix.pdf')
