# SNID Results

This notebook explores typing results from running SNID on SDSS spectra.

In [None]:
import sys
import warnings
from datetime import datetime
from pathlib import Path

import numpy as np
import pandas as pd
import seaborn as sn
from astropy.table import Table
from matplotlib import pyplot as plt
from sndata.sdss import sako18spec
from sndata._utils import convert_to_jd

sys.path.insert(0, '../scripts')
from run_snid_typing import compile_peak_types, compile_peak_subtypes


In [None]:
# Location of pipeline outputs
results_dir = Path('.').resolve().parent / 'results'
snid_dir = results_dir / 'snid'

# Where to save figures to
fig_dir = Path('./notebook_figs/snid')
fig_dir.mkdir(exist_ok=True, parents=True)


## SN Typing

Results for SNIDtop level types (`Ia`, `Ib`, `Ic`, `II`, and `NotSN`). We consider results with a minimum rlap of 10 and 5. We also consider the combination of the results.


In [None]:
def plot_snid_template_num(
        data, filt=None, type_col='Type', num_col='nType', perc_col='percType'):
    """Plot distributions for the number of template matches
    
    Args:
        data (DataFrame): DataFrame with ``type`` column
        filt       (str): Only plot types with this argument in the name
        colname    (str): Use a column other than ``type``
    """
    
    fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(9, 3))

    for classification, class_data in data.groupby(type_col):
        if filt and filt not in classification:
            continue
            
        ax1.hist(class_data[num_col], bins=np.arange(0, 100, 5), alpha=.5)
        ax1.set_xlabel('Number of Templates')
        ax1.set_ylabel('Number of Targets')

        ax2.hist(class_data[perc_col], bins=np.arange(0, 1.1, .1), alpha=.5)
        ax2.set_xlabel('Percentage of Templates')
        ax2.set_ylabel('Number of Targets')

        ax3.scatter(
            class_data[num_col], class_data[perc_col], 
            label=classification, s=8, alpha=.5, zorder=-len(class_data))
        
        ax3.set_xlabel('Number of Templates')
        ax3.set_ylabel('Percentage of Templates')

    fig.legend(bbox_to_anchor=(1.1, 1.1))    
    fig.tight_layout()
    

In [None]:
types_rlap_5 = compile_peak_types(snid_dir / 'type_rlap_5')
plot_snid_template_num(types_rlap_5)
types_rlap_5.Type.value_counts()


In [None]:
types_rlap_10 = compile_peak_types(snid_dir / 'type_rlap_10')
plot_snid_template_num(types_rlap_10)
types_rlap_10.Type.value_counts()


In [None]:
final_types = types_rlap_5.copy()
final_types.update(types_rlap_10)

plot_snid_template_num(final_types)
final_types.Type.value_counts()


## SN Sub-Typing

SNID results for sub-categories.

In [None]:
subtypes_rlap_5_path = snid_dir / 'subtype_rlap_5'
subtypes_rlap_5 = compile_peak_subtypes(subtypes_rlap_5_path)

plot_snid_template_num(
    subtypes_rlap_5, 'Ia', 
    type_col='subType',
    num_col='nSubType', 
    perc_col='percSubType')

subtypes_rlap_5.subType.value_counts()


In [None]:
subtypes_rlap_10_path = snid_dir / 'subtype_rlap_10'
subtypes_rlap_10 = compile_peak_subtypes(subtypes_rlap_10_path)

plot_snid_template_num(
    subtypes_rlap_10, 'Ia', 
    type_col='subType',
    num_col='nSubType', 
    perc_col='percSubType')

subtypes_rlap_10.subType.value_counts()


In [None]:
final_subtypes = subtypes_rlap_5.copy()
final_subtypes.update(subtypes_rlap_10)

plot_snid_template_num(
    final_subtypes, 'Ia',
    type_col='subType',
    num_col='nSubType', 
    perc_col='percSubType')

final_subtypes.subType.value_counts()


## Comparison with SDSS

### Confusion Matrix

In [None]:
def plot_confusion_matrix(data, xcol='Type', ycol='SDSSClass', xlabel='This Work', ylabel='SDSS'):
    """Plot a confusion matrix comparing SDSS and SNID classifications
    
    Args:
        data (DataFrame): DataFrame with ``colname`` column
        colname    (str): Use a column other than ``'Type'``
        
    Returns:
        Dataframe of classifications used in the plot
    """
    
    confusion_matrix = pd.crosstab(
        data[ycol], data[xcol], 
        rownames=[ylabel], colnames=[xlabel])

    sn.heatmap(confusion_matrix, annot=True, vmax=30, cmap="Blues", fmt='g')
    plt.tight_layout()


In [None]:
subtype_summary = pd.read_csv(snid_dir / 'summary.csv', index_col='objId')
subtype_summary['SDSSClass'] = subtype_summary.SDSSClass.replace({
    'SNIa': 'Ia',
    'SNIa?': 'Ia',
    'SNIb': 'Ib',
    'SNIc': 'Ic',
    'SNII': 'II',
    'AGN': 'NotSN'
    })

subtype_summary['Type'] = subtype_summary.subType.replace({
    'Ia-norm': 'Ia',
    'Ia-91bg': 'Ia',
    'Ia-91T': 'Ia',
    'Ia-pec': 'Ia',
    'Ia-csm': 'Ia',
    
    'Ib-norm': 'Ib',
    'Ib-pec': 'Ib',
    
    'Ic-norm': 'Ic',
    'Ic-broad': 'Ic',
    
    'IIb': 'II',
    'IIn': 'II',
    'IIL': 'II',
    'IIP': 'II',
    
    'Gal': 'NotSN',
    'AGN': 'NotSN'
})


In [None]:
subtype_summary

In [None]:
plot_confusion_matrix(subtype_summary)
plt.savefig(fig_dir / 'type_confusion_matrix.pdf')


In [None]:
plt.figure(figsize=(8, 6))
plot_confusion_matrix(subtype_summary, xcol='subType')
plt.savefig(fig_dir / 'subtype_confusion_matrix.pdf')


### Spectra

We inspect spectra with classifications that disagree.

In [None]:
def get_sdss_t0(obj_id):
    """Get the t0 value for a SDSS target

    Args:
        obj_id (str): The object identifier

    Returns:
        The time of B-band maximum in units of
    """

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        sdss_master_table = sako18spec.load_table('master').to_pandas(index='CID')

    
    # Unknown object ID
    if obj_id not in sdss_master_table.index:
        raise ValueError(f't0 not available for <{obj_id}>')

    t0_mjd = sdss_master_table.loc[obj_id]['PeakMJDSALT2zspec']

    # Known object Id with unknown peak time
    if np.isnan(t0_mjd):
        raise ValueError(f't0 not available for <{obj_id}>')

    to_jd = convert_to_jd(t0_mjd)
    return to_jd


@np.vectorize
def convert_sdss_date_to_jd(observed_date):
    """Convert SDSS Spectra observation dates from string format to JD

    Args:
        observed_date (str): Date string with format ``%Y-%m-%d``

    Returns:
        Observed date in JD as a float
    """

    date_with_timezone = observed_date + '+0000'
    date = datetime.strptime(date_with_timezone, '%Y-%m-%d%z')

    unix_time = date.timestamp()
    january_1_1970_in_julian = 2440587.5
    day_in_seconds = 24 * 60 * 60
    date_in_jd = (unix_time / day_in_seconds) + january_1_1970_in_julian

    return date_in_jd


In [None]:
disagreement = subtype_summary[
    subtype_summary['SDSSClass'] != subtype_summary['Type']
]

disagreement = disagreement.sort_values(['SDSSClass', 'Type'])
print(f'{len(disagreement)} objects are in disagreement')
disagreement


In [None]:
def plot_dis_spectra(disagreement_df):
    """Plot spectra with disagreeing classifications between our work and SDSS
    
    Args:
        disagreement_df (DataFrame): Data frame indexed by object Id
    """

    for obj_id, row in disagreement_df.iterrows():
        
        # Load object spectra
        data = sako18spec.get_data_for_id(str(obj_id))
        data = data[data['type'] != 'Gal']

        # Determine phase of each spectrum
        t0 = get_sdss_t0(str(obj_id))
        phase = convert_sdss_date_to_jd(data['date']) - t0
        data['phase'] = np.round(phase, 1)

        # Keep only the phase used by SNID
        snid_phase = row.Phase
        spectrum = data[data['phase'] == snid_phase]
        
        # Crop spectrum to wavelength range considered by SNID
        # This helps the auto axis scaler
        spectrum = spectrum[
            (spectrum['wavelength'] > 4000) & (spectrum['wavelength'] < 9000)]

        plt.plot(spectrum['wavelength'], spectrum['flux'])
        plt.title(f'{obj_id} - SDSS: {row.SDSSClass} - SNID: {row.Type}')
        plt.xlim(4000, 9000)
        plt.show()


In [None]:
plot_dis_spectra(disagreement)
