# Compare fit results to published values

This notebook compares fit results from SNCosmo with fit results published SDSS and DES. For this notebook to work for either survey, the analysis pipeline must have been run for that survey.

#### Table of Contents:
1. <a href='#document_setup'>Document Setup</a>: Download published data, define file paths, and define some functions for plotting / reading data
1. <a href='#sdss'>SDSS</a>: Compare fit results for the Sloan Digital Sky Survey
1. <a href='#des'>DES</a>: Compare fit results for the Dark Energy Survey


In [None]:
import sys
from pathlib import Path

import numpy as np
import sncosmo
from astropy.table import Column, Table
from bokeh.io import output_notebook, show
from bokeh.layouts import gridplot
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.plotting import figure
from matplotlib import pyplot as plt
from sndata.des import sn3yr
from sndata.sdss import sako18

sys.path.insert(0, '../')
from phot_class import models


## Document Setup <a id='document_setup'></a>

We start by downloading the published data to the local machine and instantiating various light-curve models.

In [None]:
# Make sure published data is locally available
sako18.download_module_data()
sn3yr.download_module_data()

# Define models
models.register_sources(force=True)
salt_2_4 = sncosmo.Model(source=sncosmo.get_source('salt2', version='2.4'))
salt_2_0 = sncosmo.Model(source=sncosmo.get_source('salt2', version='2.0'))
sn_91bg = sncosmo.Model(source=sncosmo.get_source('sn91bg'))


Next we define necessary file paths alog with a function for reading in data from outputs of the analysis pipeline

In [None]:
# Output directory for figures
fig_dir = Path('./notebook_figs')
fig_dir.mkdir(exist_ok=True, parents=True)

# Define the path of results from our analysis pipeline
pipeline_out_dir = '../phot_class_results'


def read_pipeline_salt2_fits(survey, release, fit_func='simple_fit'):
    """Return pipeline fit results
    
    Args:
        survey   (str): The name of the survey
        release  (str): The name of the survey's data release
        fit_func (str): The name of the fit function used by the pipeline

    Returns:
        A pandas DataFrame
    """

    file_name = f'{survey}_{release}_{fit_func}_fits.ecsv'
    inpath = Path(pipeline_out_dir).resolve() / file_name

    data = Table.read(inpath)
    data['obj_id'] = Column(data['obj_id'], dtype='U100')
    data = data.to_pandas()

    # Return just the salt2 results fit to all available data
    data.set_index(['source', 'band_set', 'obj_id'], inplace=True)
    return data.loc['salt2', 'all']


Since we will be creating the same plots for multiple data sets, we create some generic plotting functions. We create a static version of the plot along with an interactive version.

In [None]:
# Set up bokeh plotting
output_notebook()
_basic_tools = "save,pan,box_zoom,reset,wheel_zoom".split(',')


def create_static_figure(fit_data):
    """Return a matplotlib figure comparing 't0', 'x0', 'x1', 'c', and chisq
    of the published and pipeline fit values.
    
    Published values should be in columns with the above mentioned parameter
    names. Pipeline values should be columns with the same name plus a "_pipe"
    suffix.
    
    Args:
        fit_data (DataFrame): The data to plot
        
    Returns:
         A matplotlib figure object
    """

    fig, axes = plt.subplots(2, 3, figsize=(18, 12))

    # Start by plotting parameter values (things that have errors)
    for axis, value in zip(axes.flatten(), ['t0', 'x0', 'x1', 'c']):
        axis.set_title(value)

        # Plot a reference line at y = x
        line_start = np.min([fit_data[f'{value}'], fit_data[f'{value}_pipe']])
        line_end = np.max([fit_data[f'{value}'], fit_data[f'{value}_pipe']])
        line = [line_start, line_end]
        axis.plot(line, line, linestyle='--', color='grey', alpha=.7)

        axis.errorbar(fit_data[f'{value}'],
                      fit_data[f'{value}_pipe'],
                      xerr=fit_data[f'{value}_err'],
                      yerr=fit_data[f'{value}_err_pipe'],
                      linestyle='',
                      alpha=.3)

        axis.scatter(fit_data[f'{value}'],
                     fit_data[f'{value}_pipe'],
                     alpha=.3,
                     s=5)

        axis.set_xlim(*line)
        axis.set_ylim(*line)

    # Now plot the stuff without errors
    axes[1, 1].scatter(fit_data[f'chisq'], fit_data['chisq_pipe'], alpha=.3,
                       s=10)
    axes[1, 1].set_title('chisq')

    reduced_chisq_survey = (fit_data[f'chisq'] / fit_data[f'ndof'])
    reduced_chisq_pipeline = fit_data['chisq_pipe'] / fit_data['ndof_pipe']
    axes[1, 2].scatter(reduced_chisq_survey, reduced_chisq_pipeline, alpha=.3,
                       s=10)
    axes[1, 1].set_title('chisq_norm')

    axes[0, 0].set_ylabel('Pipeline Fit')
    axes[1, 0].set_xlabel('Published Value')
    axes[1, 0].set_ylabel('Pipeline Fit')
    axes[1, 1].set_xlabel('Published Value')
    axes[1, 2].set_xlabel('Published Value')
    return axes


def create_interactive_figure(fit_data):
    """Return a bokeh figure comparing 't0', 'x0', 'x1', 'c', and chisq
    of the published and pipeline fit values.
    
    Published values should be in columns with the above mentioned parameter
    names. Pipeline values should be columns with the same name plus a "_pipe"
    suffix.
    
    Args:
        fit_data (DataFrame): The data to plot
        
    Returns:
         A bokeh figure object
    """

    data_dict = fit_data.copy()
    data_dict['chisq_norm'] = (data_dict['chisq'] / data_dict['ndof'])
    data_dict[f'chisq_norm_pipe'] = (
            data_dict[f'chisq_pipe'] /
            data_dict[f'ndof_pipe']
    )

    source = ColumnDataSource(data=data_dict)
    hover = HoverTool(tooltips=[
        ("obj_id", "@obj_id"),
        ("class", "@class"),
        ("z", "@z"),
        ('z_fit', '@fit_z'),
        ('chisq', '@chisq'),
        ('ndof', '@ndof'),
    ])

    figures = []
    for value in ('x0', 'x1', 'c', 'chisq'):
        x_key, y_key = f'{value}', f'{value}_pipe'
        fig = figure(
            tools=_basic_tools + [hover, 'box_select', 'lasso_select'],
            title=value)

        fig.circle(x_key, y_key, source=source, size=4, alpha=.5)
        fig.line([min(data_dict[x_key]), max(data_dict[x_key])],
                 [min(data_dict[x_key]), max(data_dict[x_key])],
                 line_width=2)

        fig.xaxis.axis_label = 'Published Value'
        fig.yaxis.axis_label = 'SNCosmo Fit ugriz'
        figures.append(fig)

    return gridplot(figures, ncols=2, plot_width=350, plot_height=350)


## SDSS <a id='sdss'></a>

We read in published fit results for SDSS from the `"master"` table of the Sako et al. 2018 data release. We then join this data with results from the analysis pipeline.

In [None]:
# Get SDSS published data
master_table = sako18.load_table('master')
sdss_published = master_table[
    'CID',
    'PeakMJDSALT2zspec',
    'PeakMJDderrSALT2zspec',
    'x0SALT2zspec',
    'x0errSALT2zspec',
    'x1SALT2zspec',
    'x1errSALT2zspec',
    'cSALT2zspec',
    'cerrSALT2zspec',
    'chi2SALT2zspec',
    'ndofSALT2zspec'
]

# Convert to pandas and rename for consistency
sdss_published = sdss_published.to_pandas()
sdss_published.dropna(inplace=True)
sdss_published.rename(index=str, inplace=True, columns={
    'CID': 'obj_id',
    'PeakMJDSALT2zspec': 't0',
    'PeakMJDderrSALT2zspec': 't0_err',
    'x0SALT2zspec': 'x0',
    'x0errSALT2zspec': 'x0_err',
    'x1SALT2zspec': 'x1',
    'x1errSALT2zspec': 'x1_err',
    'cSALT2zspec': 'c',
    'cerrSALT2zspec': 'c_err',
    'chi2SALT2zspec': 'chisq',
    'ndofSALT2zspec': 'ndof'})

# Read in fit results from the pipeline
sdss_pipeline = read_pipeline_salt2_fits('sdss', 'sako18')
sdss_pipeline.dropna(inplace=True)

# Join the two data frames together
sdss_published.set_index('obj_id', inplace=True)
sdss_combined = sdss_published.join(sdss_pipeline, rsuffix='_pipe')
sdss_combined.dropna(inplace=True)
sdss_combined.head()


In [None]:
plot_axes = create_static_figure(sdss_combined)
plt.savefig(fig_dir / 'sdss_compare_published.pdf')
plt.show()


In [None]:
plot_grid = create_interactive_figure(sdss_combined)
show(plot_grid)


Note the disagreement of the `x0` values. Having no explanation for this disagreement, we settle on characterizing the slope of the relationship and move on.

In [None]:
x = sdss_combined['x0']
y = sdss_combined['x0_pipe']
np.polyfit(x, y, 1)


## DES <a id='des'></a>

In [None]:
# Get DES published data
des_published = sn3yr.load_table('SALT2mu_DES+LOWZ_C11.FITRES')[
    'CIDint',
    'PKMJD',
    'PKMJDERR',
    'x0',
    'x0ERR',
    'x1',
    'x1ERR',
    'c',
    'cERR',
    'FITCHI2',
    'NDOF'
]

# Convert object ids to a uniform length
des_published['obj_id'] = [f'{cid:08}' for cid in des_published['CIDint']]
des_published.remove_column('CIDint')

# Convert published t0 from MJD to JD
des_published['PKMJD'] += 2400000.5

# Rename columns to match pipeline naming convention
des_published = des_published.to_pandas()
des_published.rename(index=str, inplace=True, columns={
    'CIDint': 'obj_id',
    'PKMJD': 't0',
    'PKMJDERR': 't0_err',
    'x0': 'x0',
    'x0ERR': 'x0_err',
    'x1': 'x1',
    'x1ERR': 'x1_err',
    'c': 'c',
    'cERR': 'c_err',
    'FITCHI2': 'chisq',
    'NDOF': 'ndof'})

des_published.set_index('obj_id', inplace=True)

# Join the published and pipeline results into a single data frame
des_pipeline = read_pipeline_salt2_fits('des', 'sn3yr')
combined_des = des_published.join(des_pipeline, rsuffix='_pipe')
combined_des.dropna(inplace=True)


In [None]:
plot_axes = create_static_figure(combined_des)
plt.savefig(fig_dir / 'des_compare_published.pdf')
plt.tight_layout()
plt.show()


In [None]:
plot_grid = create_interactive_figure(combined_des)
show(plot_grid)
