# Apply Photometric Classification To Fit Results

This notebook applies the photometric classification method from González-Gaitán et al. 2014 to SDSS, DES, and CSP light-curve fits. Results are then used to analyze the properties of peculiar supernovae.

In [None]:
from os import path

import numpy as np
import pandas as pd
from astropy.table import Table, join
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.palettes import Category20
from matplotlib import pyplot as plt
from scipy import optimize
from sklearn.utils import resample


## Load Data

We begin by reading in SNCosmo fit results for SDSS, DES, and CSP. This includes fits that use all observed bands, along with those that use exclusively the restframe blue and restframe red filters.

In [None]:
def get_fit_results(survey, model):
    """Get lightcurve fits for a given survey and model
    
    Args:
        survey (str): The name of the survey
        model  (str): The name of the fitted model
        
    Returns:
        A DataFrame of fits in all bands
        A DataFrame of fits in blue bands
        A DataFrame of fits in red bands
    """

    index_col = 1 if survey == 'sdss' else 0
    path_pattern = f'../sncosmo_fits/{survey}_results/{model}_{{}}.csv'
    
    all_data = pd.read_csv(path_pattern.format('all'), index_col=index_col)
    blue_data = pd.read_csv(path_pattern.format('blue'), index_col=index_col)
    red_data = pd.read_csv(path_pattern.format('red'), index_col=index_col)
    return all_data.dropna(), blue_data.dropna(), red_data.dropna()


sdss_snia_all, sdss_snia_blue, sdss_snia_red = get_fit_results('sdss', 'salt_2_4')
sdss_91bg_all, sdss_91bg_blue, sdss_91bg_red = get_fit_results('sdss', 'nugent')

des_snia_all, des_snia_blue, des_snia_red = get_fit_results('des', 'salt_2_4')
des_91bg_all, des_91bg_blue, des_91bg_red = get_fit_results('des', 'nugent')

csp_snia_all, csp_snia_blue, csp_snia_red = get_fit_results('csp', 'salt_2_4')
csp_91bg_all, csp_91bg_blue, csp_91bg_red = get_fit_results('csp', 'nugent')


Next, we calculate the difference in normalized chi-squared values and plot the results. The resulting plot is simpler to Figure 3 in the 2014 paper.

In [None]:
def calc_chisquared_diff(snia_blue, snia_red, bg_blue, bg_red):
    """Calculate the difference between the normalized chi-squared
    of the Salt and 91bg model in restframe blue and red bands

    Args:
        snia_blue (DataFrame): Salt fit results in the resftram blue
        snia_red  (DataFrame): Salt fit results in the resftram red
        bg_blue   (DataFrame): 91bg fit results in the resftram blue
        bg_red    (DataFrame): 91bg fit results in the resftram red
        
    Returns:
        A DataFrame with columns 'red' and 'blue'
    """
    
    combined_snia = snia_blue.join(snia_red, lsuffix='_blue', rsuffix='_red')
    combined_snia['chi_norm_blue'] = combined_snia['chi_blue'] / combined_snia['dof_blue']
    combined_snia['chi_norm_red'] = combined_snia['chi_red'] / combined_snia['dof_red']
    
    combined_91bg = bg_blue.join(bg_red, lsuffix='_blue', rsuffix='_red')
    combined_91bg['chi_norm_blue'] = combined_91bg['chi_blue'] / combined_91bg['dof_blue']
    combined_91bg['chi_norm_red'] = combined_91bg['chi_red'] / combined_91bg['dof_red']
    
    # Calculate chi-squared differences
    chi_squared_values = pd.DataFrame({
        'blue': combined_snia['chi_norm_blue'] - combined_91bg['chi_norm_blue'],
        'red': combined_snia['chi_norm_red'] - combined_91bg['chi_norm_red']
    })
    
    return chi_squared_values.dropna()


sdss_chi_diff = calc_chisquared_diff(sdss_snia_blue, sdss_snia_red, sdss_91bg_blue, sdss_91bg_red)
des_chi_diff = calc_chisquared_diff(des_snia_blue, des_snia_red, des_91bg_blue, des_91bg_red)
csp_chi_diff = calc_chisquared_diff(csp_snia_blue, csp_snia_red, csp_91bg_blue, csp_91bg_red)

csp_chi_diff.head()


In [None]:
plt.scatter(sdss_chi_diff['blue'], sdss_chi_diff['red'], label='SDSS')
plt.scatter(des_chi_diff['blue'], des_chi_diff['red'], label='DES')
plt.scatter(csp_chi_diff['blue'], csp_chi_diff['red'], label='CSP')

plt.axhline(0, linestyle='--', color='black', alpha=.6)
plt.axvline(0, linestyle='--', color='black', alpha=.6)
plt.xlabel(r'$\chi^2_{blue}(Ia) - \chi^2_{blue}(91bg)$', fontsize=14)
plt.ylabel(r'$\chi^2_{red}(Ia) - \chi^2_{red}(91bg)$', fontsize=14)

plt.show()


We expect 91bg like supernovae to fall in the upper-right quadrant of the figure. Here we have drawn dashed lines intersecting at (0, 0) for visual reference only. A new set of lines will be generated later on to indicate a more carefully chosen set of boundaries to isolate 91bg like supernovae.

# Optimize FOM

We use a figure of merit (FOM) parameter as an optimization parameter for training our classification. The FOM is defined as:

$$FOM = \frac{N_{true}}{N_{tot}} * \frac{N_{true}}{N_{true} + N_{false}}$$

where $N_{true}$ is the number of correctly identified objects of a given type (e.g. 91bg-like objects), $N_{tot}$ is the total input number of that type and $N_{false}$ is the number of objects. In order to calculate this parameter, we will need to construct a dataframe containing the known classifications of spectroscopically observed targets along with the chi-squared differences from above.


In [None]:
csp_spec_class = Table.read('./csp_classifications.txt', format='ascii')
csp_spec_class.keep_columns(['SN', 'Type'])
csp_spec_class.rename_column('Type', 'type')

# Remove rows without data or targets with inconclusive classifications
csp_spec_class.remove_rows([0, 1])
is_data = np.logical_not(csp_spec_class['type'] == '        ')
csp_spec_class = csp_spec_class[is_data]

classification_data = csp_spec_class.to_pandas()
classification_data = classification_data.join(sdss_chi_diff)

classification_data.head()


Next we write a few functions to determine the optimal chi-squared boudaries based on the FOM.

In [None]:
def calc_fom(dataframe, blue_cutoff, red_cutoff):
    """Calculate the figure of merrit for a set of classifications

    FOM = (num_true / num_tot) X (num_true / (num_true + num_false))
    
    args:
        num_tot   (float): Total input number of that type
        num_true  (float): Number of correct classifications for a given type
        num_false (float): Number of objects falsely classified for that type
        
    Returns:
        The figure of merit value
    """
    
    is_91bg_true = dataframe['type'] == '91bg'
    is_91bg_classified = (
        (dataframe['blue'] > blue_cutoff)
        & (dataframe['red'] > red_cutoff)
    )

    num_tot = sum(is_91bg_true)  # How to handle multiple types?
    num_true = sum(is_91bg_true == is_91bg_classified)
    num_false = len(dataframe) - num_true
    return (num_true / num_tot) * (num_true / (num_true + num_false))


initial_fom = calc_fom(classification_data, 0, 0)
print(f'FOM at (0, 0): {initial_fom}\n')

inverse_fom = lambda args: 1 / calc_fom(classification_data, *args)
result = optimize.minimize(inverse_fom, [0, 0])
print('Optimization results:')
print(result)


In [None]:
plt.scatter(sdss_chi_diff['blue'], sdss_chi_diff['red'], label='SDSS')
plt.scatter(des_chi_diff['blue'], des_chi_diff['red'], label='DES')
plt.scatter(csp_chi_diff['blue'], csp_chi_diff['red'], label='CSP')

plt.axvline(0, linestyle='--', color='black', alpha=.6)
plt.axhline(0, linestyle='--', color='black', alpha=.6)
plt.axvline(result.x[0], linestyle='--', color='red', alpha=.6)
plt.axhline(result.x[1], linestyle='--', color='red', alpha=.6, label=f'FOM = {1 / result.fun}')
plt.xlabel(r'$\chi^2_{blue}(Ia) - \chi^2_{blue}(91bg)$', fontsize=14)
plt.ylabel(r'$\chi^2_{red}(Ia) - \chi^2_{red}(91bg)$', fontsize=14)

plt.legend()
plt.show()


## Bootstrap

Now that we have a figure of merit optimization, we can bootstrap our data to determine our final classification parameters.

In [None]:
# configure bootstrap
n_iterations = 1000
n_size = int(len(classification_data) * 0.50)

# run bootstrap
fom_values = []
classification_params = []
for i in range(n_iterations):
    # prepare train and test sets
    sample_data = resample(classification_data, n_samples=n_size)
    inverse_fom = lambda args: 1 / calc_fom(classification_data, *args)
    result = optimize.minimize(inverse_fom, [0, 0])
    
    fom_values.append(1 / result.fun)
    classification_params.append(result.x)


In [None]:
def calc_confidence_intervals(alpha=0.95):
    p = ((1 - alpha) / 2) * 100
    lower = max(0, numpy.percentile(stats, p))
    
    p = (alpha + ((1 - alpha) / 2)) * 100
    upper = min(1.0, numpy.percentile(stats, p))
    
    return lower, upper


confidence = 0.95
average_fom = np.average(fom_values)
fom_interval = calc_confidence_intervals(confidence, fom_values)

print(f'Average FOM: {average_fom}')
print(f'{alpha*100:.1f} confidence interval {fom_interval[0]*100:.1f} and {fom_interval[1]*100:.1f}')

average_params = np.average(classification_params, axis=1)
blue_param_interval = np.average(classification_params[0], axis=1)
red_param_interval = np.average(classification_params[1], axis=1)

print(f'Average classification params: {average_params}')
print(f'{alpha*100:.1f} confidence interval blue param {blue_param_interval[0]*100:.1f} and {blue_param_interval[1]*100:.1f}')
print(f'{alpha*100:.1f} confidence interval red param {red_param_interval[0]*100:.1f} and {red_param_interval[1]*100:.1f}')


In [None]:
plt.scatter(sdss_chi_diff['blue'], sdss_chi_diff['red'], label='SDSS')
plt.scatter(des_chi_diff['blue'], des_chi_diff['red'], label='DES')
plt.scatter(csp_chi_diff['blue'], csp_chi_diff['red'], label='CSP')

plt.axvline(0, linestyle='--', color='black', alpha=.6)
plt.axhline(0, linestyle='--', color='black', alpha=.6)
plt.axvline(average_params[0], linestyle='--', color='red', alpha=.6)
plt.axhline(average_params[1], linestyle='--', color='red', alpha=.6, label=f'FOM = {1 / result.fun}')
plt.xlabel(r'$\chi^2_{blue}(Ia) - \chi^2_{blue}(91bg)$', fontsize=14)
plt.ylabel(r'$\chi^2_{red}(Ia) - \chi^2_{red}(91bg)$', fontsize=14)

plt.legend()
plt.show()
