# Apply Photometric Classification To Fit Results

This notebook applies the photometric classification method from González-Gaitán et al. 2014 to SDSS, DES, and CSP light-curve fits. Results are then used to analyze the properties of peculiar supernovae.

In [None]:
import numpy as np
from astropy.table import Table
from matplotlib import pyplot as plt
from scipy import optimize
from sklearn.utils import resample
from sndata.csp import dr1


## Load Data

We begin by reading in spectroscopic classificaions for supernovae observed by CSP DR1. We join these classifications with our own classification parameters.

In [None]:
photo_class = Table.read('../results/csp_dr3_simple_fit_class.ecsv').to_pandas(index='obj_id')

spec_class = dr1.load_table(1)['SN', 'Type'].to_pandas(index='SN')
spec_class.fillna('Unknown', inplace=True)

classification = photo_class.join(spec_class)
classification['Type']

This gives us enough information to replicate the classification figure from Gonzalez-Gaitan+ 14. Note that we draw dashed lines intersecting at (0, 0) for visual reference only.

In [None]:
def plot_classification(class_data):
    fig = plt.figure(figsize=(10, 10))
    for sn_type in set(class_data['Type']):
        i = class_data['Type'] == sn_type
        plt.scatter(class_data['x'][i], 
                    class_data['y'][i], 
                    label=sn_type)

    plt.axhline(0, linestyle='--', color='black', alpha=.6)
    plt.axvline(0, linestyle='--', color='black', alpha=.6)
    plt.xlabel(r'$\chi^2_{blue}(Ia) - \chi^2_{blue}(91bg)$', fontsize=14)
    plt.ylabel(r'$\chi^2_{red}(Ia) - \chi^2_{red}(91bg)$', fontsize=14)
    plt.xlim(-6000, 6000)
    plt.ylim(-6000, 6000)

    plt.legend(bbox_to_anchor=(1, 1))
    return fig


In [None]:
plot_classification(classification)
plt.show()


# Optimize FOM

We use a figure of merit (FOM) value as an optimization parameter for training our classification. The FOM is defined as:

$$FOM = \frac{N_{true}}{N_{tot}} * \frac{N_{true}}{N_{true} + N_{false}}$$

where $N_{true}$ is the number of correctly identified objects of a given type (e.g. 91bg-like objects), $N_{tot}$ is the total input number of that type and $N_{false}$ is the number of objects. 


Next, we write a few functions to determine the optimal chi-squared boundaries based on the FOM. We include functions for two types of classification boundaries. The first calculates the FOM using a verticle and horizontal boundary. The second uses a single boundary at an angle. 

In [None]:
def calc_rect_fom(dataframe, blue_cutoff, red_cutoff):
    """Calculate the rectangular figure of merrit for a set of classifications
    
    args:
        dataframe   (float): 
        blue_cutoff (float): 
        red_cutoff  (float): 
        
    Returns:
        The figure of merit value
    """

    is_91bg_true = dataframe['Type'] == '91bg'
    is_91bg_classified = (
            (dataframe['x'] > blue_cutoff)
            & (dataframe['y'] > red_cutoff)
    )

    num_tot = sum(is_91bg_true)  # How to handle multiple types?
    num_true = sum(is_91bg_true == is_91bg_classified)
    num_false = len(dataframe) - num_true
    return (num_true / num_tot) * (num_true / (num_true + num_false))


def calc_diagonal_fom(dataframe, m, b):
    """Calculate the rectangular figure of merrit for a set of classifications
    
    args:
        dataframe   (float): 
        blue_cutoff (float): 
        red_cutoff  (float): 
        
    Returns:
        The figure of merit value
    """

    is_91bg_true = dataframe['Type'] == '91bg'
    is_91bg_predict = m * dataframe['x'] + b
    is_91bg_classified = dataframe['y'] >= is_91bg_predict

    num_tot = sum(is_91bg_true)  # How to handle multiple types?
    num_true = sum(is_91bg_true == is_91bg_classified)
    num_false = len(dataframe) - num_true
    return (num_true / num_tot) * (num_true / (num_true + num_false))


In [None]:
print('Rectangular FOM:')
initial_fom = calc_rect_fom(classification, 0, 0)
print(f'FOM at (0, 0): {initial_fom}\n')

inverse_fom = lambda args: 1 / calc_rect_fom(classification, *args)
rect_result = optimize.minimize(inverse_fom, [.1, 0])
rect_result['fun'] = 1 / rect_result['fun']
print('Optimization results:')
print(rect_result)

print('\n\nAngled FOM:')
initial_fom = calc_diagonal_fom(classification, -1, 0)
print(f'FOM at (-1, 0): {initial_fom}\n')

inverse_fom = lambda args: 1 / calc_diagonal_fom(classification, *args)
angl_result = optimize.minimize(inverse_fom, [-1, 0])
angl_result['fun'] = 1 / angl_result['fun']
print('Optimization results:')
print(angl_result)

print('\n\nAngled FOM at 45 degrees:')
initial_fom = calc_diagonal_fom(classification, -1, 0)
print(f'FOM at (-1, 0): {initial_fom}\n')

inverse_fom = lambda b: 1 / calc_diagonal_fom(classification, -1, b)
angl_45_result = optimize.minimize(inverse_fom, 0)
angl_45_result['fun'] = 1 / angl_45_result['fun']
print('Optimization results:')
print(angl_45_result)


In [None]:
plt.figure(figsize=(10, 10))
a = max(np.abs(classification['x']))
line_points = np.array([-a, a])

for sn_type in set(classification['Type']):
    i = classification['Type'] == sn_type
    plt.scatter(classification['x'].loc[i], 
                classification['y'].loc[i], 
                label=sn_type)

plt.axvline(rect_result.x[0], linestyle='--', color='black', alpha=.6)
plt.axhline(rect_result.x[1], linestyle='--', color='black', alpha=.6)

# Rectangular FOM
plt.axvline(rect_result.x[0], linestyle='--', color='red', alpha=.6)
plt.axhline(rect_result.x[1], linestyle='--', color='red', alpha=.6,
            label=f'FOM = {rect_result.fun:.4f}')

# Angled FOM
angl_line = angl_result.x[0] * line_points + angl_result.x[1]
plt.plot(line_points, angl_line, linestyle='-', color='orange', alpha=.6,
        label=f'FOM = {angl_result.fun:.4f}')

# Angled FOM at 45 degrees
angl_45_line = -line_points + angl_result.x[0]
plt.plot(line_points, angl_45_line, linestyle='-.', color='green', alpha=.6,
         label=f'FOM = {angl_45_result.fun:.4f}')

plt.xlabel(r'$\chi^2_{blue}(Ia) - \chi^2_{blue}(91bg)$', fontsize=14)
plt.ylabel(r'$\chi^2_{red}(Ia) - \chi^2_{red}(91bg)$', fontsize=14)
plt.xlim(-6000, 6000)
plt.ylim(-6000, 6000)

plt.legend(bbox_to_anchor=(1, 1))
plt.show()


## Bootstrap

Now that we have a figure of merit optimization, we can bootstrap our data to determine our final classification parameters.

In [None]:
# configure bootstrap
n_iterations = 1000
n_size = int(len(classification) * 0.50)

# run bootstrap
fom_values = []
classification_params = []
for i in range(n_iterations):
    # prepare train and test sets
    sample_data = resample(classification, n_samples=n_size)
    inverse_fom = lambda args: 1 / calc_rect_fom(classification, *args)
    result = optimize.minimize(inverse_fom, [0, 0])

    fom_values.append(1 / result.fun)
    classification_params.append(result.x)

classification_params = np.array(classification_params).T

In [None]:
def calc_confidence_intervals(alpha, stats):
    p = ((1 - alpha) / 2) * 100
    lower = max(0, np.percentile(stats, p))

    p = (alpha + ((1 - alpha) / 2)) * 100
    upper = min(1.0, np.percentile(stats, p))

    return lower, upper


alpha=0.95
confidence = 0.95
average_fom = np.average(fom_values)
fom_interval = calc_confidence_intervals(confidence, fom_values)

print(f'Average FOM: {average_fom}')
print(f'{alpha * 100:.1f} confidence interval: [{fom_interval[0] * 100:.1f} '
      f', {fom_interval[1] * 100:.1f}]')

average_params = np.average(classification_params, axis=1)
blue_param_interval = calc_confidence_intervals(confidence, classification_params[0])
red_param_interval = np.average(classification_params[1])

print(f'Average classification params: {average_params}')
print(f'{alpha * 100:.1f} confidence interval for blue '
      f'param: [{blue_param_interval[0] * 100:.1f} '
      f', {blue_param_interval[1] * 100:.1f}]')

print(f'{alpha * 100:.1f} confidence interval for red '
      f'param: [{red_param_interval[0] * 100:.1f} '
      f', {red_param_interval[1] * 100:.1f}]')


In [None]:
plt.scatter(sdss_chi_diff['blue'], sdss_chi_diff['red'], label='SDSS')
plt.scatter(des_chi_diff['blue'], des_chi_diff['red'], label='DES')
plt.scatter(csp_chi_diff['blue'], csp_chi_diff['red'], label='CSP')

plt.axvline(0, linestyle='--', color='black', alpha=.6)
plt.axhline(0, linestyle='--', color='black', alpha=.6)
plt.axvline(average_params[0], linestyle='--', color='red', alpha=.6)
plt.axhline(average_params[1], linestyle='--', color='red', alpha=.6,
            label=f'FOM = {1 / result.fun}')

plt.xlabel(r'$\chi^2_{blue}(Ia) - \chi^2_{blue}(91bg)$', fontsize=14)
plt.ylabel(r'$\chi^2_{red}(Ia) - \chi^2_{red}(91bg)$', fontsize=14)

plt.legend()
plt.show()
