# SNID Results

This notebook explores SDSS typing results from SNID.

In [None]:
import sys
from pathlib import Path

import numpy as np
import pandas as pd
import seaborn as sn
from matplotlib import pyplot as plt
from astropy.table import Table

sys.path.insert(0, '../scripts')
from run_snid_typing import sdss_data_iter

results_dir = Path('.').resolve().parent / 'results'


We define some plotting functions ahead of time.

In [None]:
def plot_snid_template_num(data, filt=None, colname='type'):
    """Plot distributions for the number of template matches
    
    Args:
        data (DataFrame): DataFrame with ``type`` column
        filt       (str): Only plot types with this argument in the name
        colname    (str): Use a column other than ``type``
    """
    
    fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(9, 3))

    for classification, class_data in data.groupby(colname):
        if filt and filt not in classification:
            continue
            
        ax1.hist(class_data.ntemp, bins=np.arange(0, 100, 5), alpha=.5)
        ax1.set_xlabel('Number of Templates')
        ax1.set_ylabel('Number of Targets')

        ax2.hist(class_data.perc_temp, bins=np.arange(0, 1.1, .1), alpha=.5)
        ax2.set_xlabel('Percentage of Templates')
        ax2.set_ylabel('Number of Targets')

        ax3.scatter(
            class_data.ntemp, class_data.perc_temp, 
            label=classification, s=8, alpha=.5, zorder=-len(class_data))
        
        ax3.set_xlabel('Number of Templates')
        ax3.set_ylabel('Percentage of Templates')

    fig.legend(bbox_to_anchor=(1.1, 1.1))    
    fig.tight_layout()
    
def plot_confusion_matrix(data, colname='type'):
    """Plot a confusion matrix comparing SDSS and SNID classifications
    
    Args:
        data (DataFrame): DataFrame with ``type`` column
        colname    (str): Use a column other than ``type``
    """
    
    from sndata.sdss import sako18
    master = sako18.load_table('master')
    mask = master['PeakMJDSALT2zspec'].mask
    sdss_classifications = master[~mask][['CID', 'Classification']].to_pandas(index='CID')
    
    joined_data = sdss_classifications.join(data[colname])

    confusion_matrix = pd.crosstab(
        joined_data['Classification'], joined_data[colname], 
        rownames=['SDSS'], colnames=['SNID'])

    sn.heatmap(confusion_matrix, annot=True, vmax=30, cmap="Blues")
    plt.show()


## SN Typing

Results for SNIDtop level types (`Ia`, `Ib`, `Ic`, `II`, and `NotSN`)


In [None]:
TYPES = ['Ia', 'Ib', 'Ic', 'II', 'NotSN']

def read_peak_type(path):
    """Return the type summary from an SNID output file

    Args:
        path (str, Path): Path to read

    Returns:
         An astropy Table
    """

    names = ['type', 'ntemp', 'fraction', 'slope', 'redshift',
             'redshift_error', 'age', 'age_error']

    data = Table.read(
        str(path), header_start=4, data_start=4,
        data_end=28, format='ascii.basic', names=names
    ).to_pandas(index='type')

    # Calculate percentage of templates used for each type
    # Total matched templates equals the sum of matches for the parent types
    # (see TYPES global)
    peak_type = data.loc[TYPES].ntemp.idxmax()
    ntemp = data.loc[peak_type].ntemp
    total_templates = data.loc[TYPES].ntemp.sum()
    percent_templates =  ntemp / total_templates

    return peak_type, ntemp, percent_templates


def compile_peak_types(results_dir):
    """Get peak types from all output files from a previous SNID run

    Args:
        results_dir (Path): Directory of SNID outputs

    Returns:
        A DataFrame indexed by object ID
    """

    rows = []
    for path in results_dir.glob('*snid.output'):
        obj_id, phase, *_ = path.name.split('_')
        peak_type, ntemp, percent_templates = read_peak_type(path)
        rows.append([obj_id, float(phase), peak_type, percent_templates, ntemp])

    type_data = pd.DataFrame(
        rows,
        columns=['obj_id', 'phase', 'type', 'perc_temp', 'ntemp'])

    # Keep only the spectra nearest peak
    type_data['abs_phase'] = type_data.phase.abs()
    type_data = type_data.sort_values('abs_phase', ascending=True)
    type_data = type_data.drop_duplicates(keep='first', subset='obj_id')

    type_data['obj_id'] = type_data['obj_id'].astype('str')
    return type_data.set_index('obj_id')


In [None]:
types_rlap_5_dir = results_dir / 'snid' / 'type_rlap_5'
types_rlap_5 = compile_peak_types(types_rlap_5_dir)

plot_snid_template_num(types_rlap_5)
types_rlap_5.type.value_counts()


In [None]:
types_rlap_10_dir = results_dir / 'snid' / 'type_rlap_10'
types_rlap_10 = compile_peak_types(types_rlap_10_dir)

plot_snid_template_num(types_rlap_10)
types_rlap_10.type.value_counts()


In [None]:
combined_types = types_rlap_5.copy()
combined_types.update(types_rlap_10)

plot_snid_template_num(combined_types)
combined_types.type.value_counts()


In [None]:
plot_confusion_matrix(combined_types, 'type')


## SN Sub-Typing

SNID results for sub-categories.

In [None]:
def read_peak_subtype(path):
    """Return the type summary from an SNID output file

    Args:
        path (str, Path): Path to read

    Returns:
         An astropy Table
    """

    names = ['type', 'ntemp', 'fraction', 'slope', 'redshift',
             'redshift_error', 'age', 'age_error']

    data = Table.read(
        str(path), header_start=4, data_start=4,
        data_end=28, format='ascii.basic', names=names
    ).to_pandas(index='type')
    
    sn_type, subtype, second_subtype = data.ntemp.nlargest(3).index
    assert data.loc[subtype].ntemp != second_subtype
    ntemp = data.loc[subtype].ntemp
    perc_temp = ntemp / data.loc[sn_type].ntemp

    return subtype, ntemp, perc_temp


def compile_peak_subtypes(results_dir):
    """Get peak subtypes from all output files from a previous SNID run

    Args:
        results_dir (Path): Directory of SNID outputs

    Returns:
        A DataFrame indexed by object ID
    """
    
    rows = []
    for path in results_dir.glob('*snid.output'):
        obj_id, phase, *_ = path.name.split('_')
        peak_type, ntemp, perc_temp = read_peak_subtype(path)
        rows.append([obj_id, float(phase), peak_type, ntemp, perc_temp])

    type_data = pd.DataFrame(
        rows,
        columns=['obj_id', 'phase', 'type', 'ntemp', 'perc_temp'])

    # Keep only the spectra nearest peak
    type_data['abs_phase'] = type_data.phase.abs()
    type_data = type_data.sort_values('abs_phase', ascending=True)
    type_data = type_data.drop_duplicates(keep='first', subset='obj_id')

    type_data['obj_id'] = type_data['obj_id'].astype('str')
    return type_data.set_index('obj_id')


We consider results with a minimum rlap of 10 and 5. We also consider the combination of the results.

In [None]:
subtypes_rlap_5_path = results_dir / 'snid' / 'subtype_rlap_5'
subtypes_rlap_5 = compile_peak_subtypes(subtypes_rlap_5_path)

plot_snid_template_num(subtypes_rlap_5, 'Ia')
subtypes_rlap_5.type.value_counts()


In [None]:
subtypes_rlap_10_path = results_dir / 'snid' / 'subtype_rlap_10'
subtypes_rlap_10 = compile_peak_subtypes(subtypes_rlap_10_path)

plot_snid_template_num(subtypes_rlap_10, 'Ia')
subtypes_rlap_10.type.value_counts()


In [None]:
combined_subtypes = subtypes_rlap_5.copy()
combined_subtypes.update(subtypes_rlap_10)

plot_snid_template_num(combined_subtypes, 'Ia')
combined_subtypes.type.value_counts()
