# Load the dataset

In [1]:
run manifold_twins

In [2]:
%matplotlib ipympl

In [3]:
# Default settings for matplotlib figures
#import matplotlib as mpl

# Choose how big to make figures. This will scale the text size.
# mpl.rcParams['figure.figsize'] = (5, 4)

# Set the DPI. This will change how big things appear in Jupyter lab
# mpl.rcParams['figure.dpi'] = 120

In [4]:
# a = ManifoldTwinsAnalysis(idr='HICKORY', verbosity=1)
a = ManifoldTwinsAnalysis(idr='CASCAD', verbosity=1)
# a = ManifoldTwinsAnalysis(idr='CASCAD', verbosity=1, bin_velocity=2000, max_count=100)

Loading dataset...
IDR:          CASCAD
Phase range: [-5.0, 5.0]
Center phase: 0.0
Bin velocity: 1000.0
Cutting SNe:  ['PTF12ecm', 'PTF11mty']


  return getattr(self.data, op)(other)
100%|██████████| 438/438 [00:19<00:00, 22.34it/s]


# Run the analysis

## Interpolate the spectra

In [5]:
# Choose how much of the model to refit. For a full analysis, use 'refit' mode.
# If you already ran everything with the exact same configuration, 'cached' will
# load the previous result. If anything changed, 'cached_uncertainty' can be used
# to keep the uncertainty model from the last fit (which takes a long time to fit,
# and is very stable) but refit everything else. Don't use cached_uncertainty for
# the final analysis, it is only for debugging!

# fit_type = 'refit'              # Refit everything
fit_type = 'cached'             # Use a full cached model that was previously fit.
# fit_type = 'cached_uncertainty' # Use only the uncertainty from a cached model.

In [6]:
print("Modeling maximum spectra with fit type '%s'" % fit_type)

if fit_type == 'refit':
    a.model_maximum_spectra(use_cache=False)
elif fit_type == 'cached':
    a.model_maximum_spectra(use_cache=True)
elif fit_type == 'cached_uncertainty':
    a.model_maximum_spectra(use_cached_model_uncertainty=True)
else:
    print("Unknown fit type! Can't handle")
    
print("Done!")

Modeling maximum spectra with fit type 'cached'
Using saved interpolation result
Done!


## Read between the lines

In [7]:
a.read_between_the_lines(fiducial_rv=2.8)

print("Done!")

Loaded cached stan model
Masking 24/202 targets whose interpolation uncertainty power is more than 0.100 of the intrinsic power.
Done!


## Do embedding

In [8]:
a.do_embedding()

## Apply standardization

In [9]:
a.fit_gp()
# a.apply_polynomial_standardization(1)

a.plot_gp()
# a.plot_gp(show_mask=True)

Fitting GP hyperparameters...
Fit result:
      fun: -59.26127018578216
 hess_inv: array([[ 4.98245421e-03,  3.23904835e-06, -6.30946395e-04,
        -3.05735506e-02],
       [ 3.23904835e-06,  1.61370479e-04,  2.64752292e-05,
         1.04339917e-02],
       [-6.30946395e-04,  2.64752292e-05,  1.17696783e-03,
         4.34280822e-02],
       [-3.05735506e-02,  1.04339917e-02,  4.34280822e-02,
         3.55195095e+00]])
      jac: array([-2.86102295e-06, -9.05990601e-06,  2.38418579e-06, -5.72204590e-06])
  message: 'Optimization terminated successfully.'
     nfev: 132
      nit: 15
     njev: 22
   status: 0
  success: True
        x: array([-0.046595  ,  0.07682295,  0.10195705,  2.69551498])
Finite difference covariance step sizes: [1.60e-04 2.00e-05 4.00e-05 2.56e-03]
Fit uncertainty: [0.07023724 0.01255326 0.03414024 1.8833214 ]
Fit NMAD:        0.08971590237637203
Fit std:         0.10109691316372353


FigureCanvasNbAgg()

## Calculate SALT2 Hubble Residuals

In [10]:
a.calculate_salt_hubble_residuals()

Pass 0, MB=-19.071, alpha=0.127, beta=2.983
  -> new intrinsic_dispersion=0.107
Pass 1, MB=-19.070, alpha=0.127, beta=2.984
  -> new intrinsic_dispersion=0.107
Pass 2, MB=-19.070, alpha=0.127, beta=2.984
  -> new intrinsic_dispersion=0.107
Pass 3, MB=-19.070, alpha=0.127, beta=2.984
  -> new intrinsic_dispersion=0.107
Pass 4, MB=-19.070, alpha=0.127, beta=2.984
  -> new intrinsic_dispersion=0.107
SALT2 Hubble fit: 
    MB:    -19.070499921667995
    alpha: 0.12740083248355008
    beta:  2.9837638512608176
    σ_int: 0.10720876144031752
    RMS:   0.15081021570900285
    NMAD:  0.11006123624560227
    WRMS:  0.1499260868916875


## Load host galaxy data

In [13]:
a.load_host_data()

# Interpolation plots

## Examples of interpolations

In [14]:
def plot_same_night(idx, save=False):
    night_flux = a.flux[a.target_map == idx]
    phases = a.salt_phases[a.target_map == idx]
    model = a.interpolation_result['maximum_flux'][idx]
    model_err = a.interpolation_result['maximum_fluxerr'][idx]
    plt.figure()
    for flux, phase in zip(night_flux, phases):
        plt.plot(a.wave, flux, label='%.2f days' % phase)
    plt.plot(a.wave, model, c='k', ls='--', label='Model')
    plt.fill_between(a.wave, model - model_err, model + model_err, facecolor='k', alpha=0.3)
    plt.legend()
    plt.title(a.targets[idx])
    plt.xlabel('Wavelength ($\AA$)')
    plt.ylabel('Flux')
    
    if save:
        plt.savefig('./figures/interpolation_model_%s.pdf' % a.targets[idx])
    
    # plt.figure()
    # shift_frac = (a.interpolation_result['shift_fluxerr'] / a.interpolation_result['shift_flux'])[a.target_map == idx]
    # plt.plot(a.wave, shift_frac.T)
    # orig_frac = (a.fluxerr / a.flux)[a.target_map == idx]
    # plt.plot(a.wave, orig_frac.T, ls='--')
    
    
    phase_slope = a.interpolation_result['phase_slope']
    phase_quadratic = a.interpolation_result['phase_quadratic']
    gray_offsets = a.interpolation_result['gray_offsets'][a.target_map == idx]
    model_diffs = a.interpolation_result['model_diffs'][a.target_map == idx]
    
    plt.figure()
    for i, (flux, phase, gray_offset, model_diff) in enumerate(zip(night_flux, phases, gray_offsets, model_diffs)):
        plt.plot(a.wave, -2.5*np.log10(flux / model), label='Data %.2f days' % phase, c='C%d' % i)
    for i, (flux, phase, gray_offset, model_diff) in enumerate(zip(night_flux, phases, gray_offsets, model_diffs)):
        plt.plot(a.wave, model_diff, label='Model %.2f days' % phase, c='C%d' % i, ls='--')
    plt.legend(ncol=2, loc=1)
    plt.title(a.targets[idx])
    plt.xlabel('Wavelength ($\AA$)')
    plt.ylabel('Difference from maximum light (mag)')
    
    if save:
        plt.savefig('./figures/interpolation_difference_%s.pdf' % a.targets[idx])
    
    plt.figure()
    for i, (flux, phase, gray_offset, model_diff) in enumerate(zip(night_flux, phases, gray_offsets, model_diffs)):
        plt.plot(a.wave, -2.5*np.log10(flux / model) - model_diff, label='Residuals %.2f days' % phase, c='C%d' % i)
    plt.legend()
    plt.title(a.targets[idx])
    plt.xlabel('Wavelength ($\AA$)')
    plt.ylabel('Interpolation residuals (mag)')
    
    if save:
        plt.savefig('./figures/interpolation_residuals_%s.pdf' % a.targets[idx])
    
from ipywidgets import interact
interact(plot_same_night, idx=(0, len(a.targets)-1))

interactive(children=(IntSlider(value=100, description='idx', max=201), Checkbox(value=False, description='sav…

<function __main__.plot_same_night(idx, save=False)>

In [133]:
plot_targets = ['PTF13ayw', 'SN2004gc']
for plot_target in plot_targets:
    target_names = np.array([i.name for i in a.targets])
    plot_idx = np.where(target_names == plot_target)[0][0]

    plot_same_night(plot_idx, save=True)



FigureCanvasNbAgg()



FigureCanvasNbAgg()



FigureCanvasNbAgg()



FigureCanvasNbAgg()



FigureCanvasNbAgg()



FigureCanvasNbAgg()

## Interpolation model

In [142]:
phase_slope = a.interpolation_result['phase_slope']
phase_quadratic = a.interpolation_result['phase_quadratic']
phase_slope_x1 = a.interpolation_result['phase_slope_x1']
phase_quadratic_x1 = a.interpolation_result['phase_quadratic_x1']

def evaluate_phase_difference(phase, x1=0):
    phase_difference = (
        phase_slope * phase
        + phase_quadratic * phase * phase
        + phase_slope_x1 * x1 * phase
        + phase_quadratic_x1 * x1 * phase * phase
    )
    
    return phase_difference

# Look at change in phase for the same x1
max_phase = a.phase_width
min_phase = -a.phase_width
num_phases = 10
phases = np.linspace(min_phase, max_phase, num_phases)

plt.figure(figsize=(8, 5))
norm = plt.Normalize(vmin=min_phase, vmax=max_phase)
cmap = plt.cm.Spectral_r
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array(phases)

for phase in phases:
    plt.plot(a.wave, evaluate_phase_difference(phase), c=cmap(norm(phase)))
    
plt.colorbar(sm, label='Phase (days)')

# plt.xlim(-5.2, 5.2)
plt.xlabel('Wavelength ($\AA$)')
plt.ylabel('Difference relative to maximum light (mag)')
plt.gca().invert_yaxis()
plt.tight_layout()
plt.savefig('./figures/interpolation_phase_difference.pdf')


def plot_x1_difference(phase):
    # Look at change in phase for the same x1
    min_x1 = -2
    max_x1 = 2
    num_x1s = 10
    x1s = np.linspace(min_x1, max_x1, num_x1s)

    plt.figure(figsize=(6, 4))
    norm = plt.Normalize(vmin=min_x1, vmax=max_x1)
    cmap = plt.cm.Spectral_r
    sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
    sm.set_array(x1s)

    for x1 in x1s:
        plt.plot(a.wave, evaluate_phase_difference(phase, x1) - evaluate_phase_difference(phase, 0), c=cmap(norm(x1)))

    plt.colorbar(sm, label='SALT2 $x_1$')

    plt.xlabel('Wavelength ($\AA$)')
    plt.ylabel('Difference relative to $x_1=0$ (mag)')
    plt.title('Difference in interpolation at %+d days' % phase)
    # plt.gca().invert_yaxis()
    plt.ylim(0.4, -0.4)
    plt.tight_layout()
    plt.savefig('./figures/interpolation_x1_difference_phase_%d.pdf' % phase)
    
for phase in [-5, -3, -1, 1, 3, 5]:
    plot_x1_difference(phase)



FigureCanvasNbAgg()



FigureCanvasNbAgg()



FigureCanvasNbAgg()



FigureCanvasNbAgg()



FigureCanvasNbAgg()



FigureCanvasNbAgg()



FigureCanvasNbAgg()

## Gray dispersion

In [153]:
print(a.interpolation_result['gray_dispersion_scale'])

0.02659528944914929


In [182]:
plt.figure()
plt.scatter(a.salt_phases, a.interpolation_result['gray_offsets'], s=3, label='Individual spectra')
math.plot_binned_mean(a.salt_phases, a.interpolation_result['gray_offsets'], c='C2', lw=2, label='Binned mean')
math.plot_binned_rms(a.salt_phases, a.interpolation_result['gray_offsets'], c='C3', lw=2, label='Binned RMS')
plt.xlabel('SALT2 Phase (days)')
plt.ylabel('Gray offset (mag)')
plt.legend()
plt.savefig('./figures/gray_offset_vs_phase.pdf')



FigureCanvasNbAgg()

In [192]:
minloc = np.argmin(a.interpolation_result['gray_offsets'])
print(a.spectra[minloc-5:minloc+5])
print(a.interpolation_result['gray_offsets'][minloc-3:minloc+3])

[ModifiedSpectrum(target="PTF09dnp", name="PTF09dnp_P004498")
 ModifiedSpectrum(target="PTF09fox", name="PTF09fox_M004455")
 ModifiedSpectrum(target="PTF09fox", name="PTF09fox_M002596")
 ModifiedSpectrum(target="PTF09fox", name="PTF09fox_P000163")
 ModifiedSpectrum(target="PTF09fox", name="PTF09fox_P002071")
 ModifiedSpectrum(target="PTF09fox", name="PTF09fox_P002093")
 ModifiedSpectrum(target="PTF09foz", name="PTF09foz_M004182")
 ModifiedSpectrum(target="PTF09foz", name="PTF09foz_M002288")
 ModifiedSpectrum(target="PTF09foz", name="PTF09foz_P000521")
 ModifiedSpectrum(target="PTF10hmv", name="PTF10hmv_M002117")]
[ 0.03453738  0.06016092  0.04791748 -0.17384663 -0.04604238  0.01668082]


In [191]:
plt.figure()
plt.plot(a.wave, -2.5*np.log10(a.spectra[minloc].flux / a.spectra[minloc-1].flux))



FigureCanvasNbAgg()

[<matplotlib.lines.Line2D at 0x7f5f7b69bcc0>]

## Interpolation uncertainty

In [45]:
try:
    coefs = a.interpolation_result['phase_dispersion_coefficients']
except KeyError:
    coefs = a.stan_data['phase_dispersion_coefficients']
num_phase_coefficients = len(coefs)

def evaluate_phase_dispersion(phase):
    phase_scale = np.abs((num_phase_coefficients / 2) * (phase / a.phase_width))
    full_bins = int(np.floor(phase_scale))
    remainder = phase_scale - full_bins
    
    phase_coefficients = np.zeros(num_phase_coefficients)
    
    for j in range(full_bins + 1):
        if j == full_bins:
            weight = remainder
        else:
            weight = 1
            
        if weight == 0:
            break
            
        if phase > 0:
            phase_bin = num_phase_coefficients // 2 + j
        else:
            phase_bin = num_phase_coefficients // 2 - 1 - j
            
        phase_coefficients[phase_bin] = weight
        
    fractional_dispersion = phase_coefficients.dot(coefs)
    
    # Convert to magnitudes
    mag_dispersion = frac_to_mag(fractional_dispersion)
    
    return mag_dispersion

phases = np.linspace(-a.phase_width, a.phase_width, 1 + num_phase_coefficients)

eval_coefs = np.array([evaluate_phase_dispersion(phase) for phase in phases])

# Uncertainties for different wavelengths
plt.figure()
num_wave = 10
for i in range(num_wave):
    min_wave = a.wave[0]
    max_wave = a.wave[-1]
    wave_range = max_wave - min_wave
    target_wave = min_wave + wave_range * i / (num_wave - 1)
    idx = np.argmin(np.abs(a.wave - target_wave))
    use_wave = a.wave[idx]
    color = plt.cm.rainbow((use_wave - min_wave) / wave_range)
    plt.plot(phases, eval_coefs[:, idx], label='%d $\AA$' % use_wave, c=color)
    
plt.xlim(-5.2, 5.2)
plt.xlabel('Phase (days)')
plt.ylabel('Interpolation uncertainty (mag)')
plt.legend()
plt.savefig('./figures/interpolation_uncertainty_phase.pdf')

plt.figure()
for i in range(len(phases)):
    plt.plot(a.wave, eval_coefs[i], label='%.2f days' % phases[i])
plt.legend()
plt.xlabel('Wavelength $(\AA$)')
plt.ylabel('Interpolation uncertainty (mag)')
plt.savefig('./figures/interpolation_uncertainty_wavelength.pdf')

FigureCanvasNbAgg()

FigureCanvasNbAgg()

## Model accuracy

In [226]:
max_flux = a.interpolation_result['maximum_flux']
max_fluxerr = a.interpolation_result['maximum_fluxerr']

max_magerr = frac_to_mag(max_fluxerr / max_flux)

rbtl_dispersion = frac_to_mag(a.rbtl_result['fractional_dispersion'])

def plot_uncertainties(show_rbtl=False):
    plt.figure(figsize=(8, 5))
    offset = 29
    
    # Make sure that we include the worst offender.
    max_loc = np.argmax(np.sum(max_magerr**2, axis=1))
    start = max_loc % offset
    
    for idx in range(start, len(a.targets), offset):
        plt.plot(a.wave, max_magerr[idx], label=a.targets[idx].name)
    plt.legend(ncol=2)
    
    plt.xlabel('Wavelength ($\AA$)')
    
    if show_rbtl:
        plt.plot(a.wave, rbtl_dispersion, label='Supernova intrinsic dispersion', c='k', lw=2, ls='--')
        plt.ylabel('Dispersion (mag)')
        path = './figures/interpolation_uncertainty_rbtl.pdf'
    else:
        plt.ylabel('Uncertainty on $f_{max}$ (mag)')
        path = './figures/interpolation_uncertainty_norbtl.pdf'
        
    plt.legend(ncol=2)
    plt.tight_layout()
    plt.savefig(path)
        
plot_uncertainties(False)
plot_uncertainties(True)


plt.figure(figsize=(8, 5))
for idx in range(len(a.targets)):
    if idx == 0:
        label = 'Individual uncertainties of $f_{max}$'
    else:
        label = ''
    plt.plot(a.wave, max_magerr[idx], label=label, alpha=0.02, c='C0')
plt.plot(a.wave, rbtl_dispersion, label='Supernova intrinsic dispersion', lw=2, ls='--', c='k')
plt.plot(a.wave, np.median(max_magerr, axis=0), label='Median uncertainty on $f_{max}$', lw=2, ls='--', c='C0')
plt.plot(a.wave, np.max(max_magerr, axis=0), label='Maximum uncertainty on $f_{max}$', c='C1')
plt.legend()
plt.ylabel('Dispersion (magnitude)')
plt.xlabel('Wavelength ($\AA$)')
plt.tight_layout()
plt.savefig('./figures/interpolation_uncertainty_median.pdf')

plt.figure(figsize=(8, 5))
plt.plot(a.wave, rbtl_dispersion, label='Supernova intrinsic dispersion', lw=2, ls='--', c='k')
plt.plot(a.wave, np.min(max_magerr, axis=0), label='Lowest uncertainty on $f_{max}$')
for percentile in (25, 50, 75):
    plt.plot(a.wave, np.percentile(max_magerr, percentile, axis=0), label='%dth percentile uncertainty on $f_{max}$' % percentile)
plt.plot(a.wave, np.max(max_magerr, axis=0), label='Highest uncertainty on $f_{max}$')
plt.legend(ncol=2)
plt.ylabel('Dispersion (magnitude)')
plt.xlabel('Wavelength ($\AA$)')
plt.tight_layout()
plt.savefig('./figures/interpolation_uncertainty_percentile.pdf')



FigureCanvasNbAgg()



FigureCanvasNbAgg()



FigureCanvasNbAgg()



FigureCanvasNbAgg()

## Contribution to the total interpolation uncertainty from various sources

In [44]:
len(a.salt_x1)

202

In [145]:
targets = []
diffs = []
phases_1 = []
phases_2 = []
x1s = []
gray_differences = []

gray_offsets = a.interpolation_result['gray_offsets']

center_specs = a.spectra[a.center_mask]
center_gray_offsets = gray_offsets[a.center_mask]
for target_idx in range(len(a.targets)):
    near_max_spec = center_specs[target_idx]
    
    target_mask = (a.target_map == target_idx) & (~a.center_mask)
    target_specs = a.spectra[target_mask]
    target_gray_offsets = gray_offsets[target_mask]
    
    for spec_idx, target_spec in enumerate(target_specs):
        phase_diff = target_spec.phase - near_max_spec.phase
        # if np.abs(phase_diff) < 1:
            # continue

        targets.append(a.targets[target_idx])
        diff = -2.5*np.log10(target_spec.flux / near_max_spec.flux)
        diffs.append(diff)
        phases_1.append(near_max_spec.phase)
        phases_2.append(target_spec.phase)
        x1s.append(a.salt_x1[target_idx])
        
        gray_differences.append(target_gray_offsets[spec_idx] - center_gray_offsets[target_idx])

targets = np.array(targets)
diffs = np.array(diffs)
phases_1 = np.array(phases_1)
phases_2 = np.array(phases_2)
x1s = np.array(x1s)
gray_differences = np.array(gray_differences)

phase_diffs = phases_2 - phases_1

def plot_diffs(diffs):
    sel_mask = np.zeros(len(diffs), dtype=bool)
    sel_mask[4::50] = True
    sel_mask[np.abs(phase_diffs) < 1] = False
    
    print(np.min(x1s[sel_mask]))
    print(np.max(x1s[sel_mask]))
    
    plt.figure(figsize=(8, 5))
    
    for use_idx in np.where(sel_mask)[0]:
        target = targets[use_idx]
        phase_1 = phases_1[use_idx]
        phase_2 = phases_2[use_idx]
        
        if phase_1 > phase_2:
            phase_1, phase_2 = phase_2, phase_1
        
        label = '%s, %.1f to %.1f days' % (target, phase_1, phase_2)
        
        plt.plot(a.wave, diffs[use_idx] / phase_diffs[use_idx], alpha=0.5, label=label)
        
    plt.legend(ncol=2)

    plt.ylim(-0.25, 0.25)
    plt.xlabel('Wavelength ($\AA$)')
    plt.ylabel('Magnitudes per day')
    plt.tight_layout()

plot_diffs(diffs)
plt.savefig('./figures/raw_phase_difference.pdf')

residuals_no_x1 = []
residuals_x1 = []
for diff, phase_1, phase_2, x1 in zip(diffs, phases_1, phases_2, x1s):
    model_no_x1 = evaluate_phase_difference(phase_2, 0) - evaluate_phase_difference(phase_1, 0)
    model_x1 = evaluate_phase_difference(phase_2, x1) - evaluate_phase_difference(phase_1, x1)
    residuals_no_x1.append(diff - model_no_x1)
    residuals_x1.append(diff - model_x1)
    
residuals_no_x1 = np.array(residuals_no_x1)
residuals_x1 = np.array(residuals_x1)

residuals_gray_no_x1 = residuals_no_x1 - gray_differences[:, None]
residuals_gray_x1 = residuals_x1 - gray_differences[:, None]

plot_diffs(residuals_gray_no_x1)
plt.savefig('./figures/corr_phase_difference_no_x1.pdf')

plot_diffs(residuals_gray_x1)
plt.savefig('./figures/corr_phase_difference_x1.pdf')

def print_interpolation_residuals(min_days, max_days):
    cut = (np.abs(phase_diffs) < max_days) & (np.abs(phase_diffs) > min_days)

    def do_print(label, vals, cut):
        cut_vals = vals[cut]
        print('%20s: std=%.3f, NMAD=%.3f' % (label, math.rms(cut_vals), math.nmad(cut_vals)))

    print("Interpolation of %.1f-%.1f days:" % (min_days, max_days))
    do_print('Raw', diffs, cut)    
    do_print('Phase', residuals_no_x1, cut)    
    do_print('Phase + x1', residuals_x1, cut)    
    do_print('Phase + gray', residuals_gray_no_x1, cut)    
    do_print('Phase + x1 + gray', residuals_gray_x1, cut)    
    print("")
    
print_interpolation_residuals(0., 1.5)
print_interpolation_residuals(1.5, 2.5)
print_interpolation_residuals(2.5, 5.5)
print_interpolation_residuals(5.5, 10.5)

-2.32898376265
1.22872789967




FigureCanvasNbAgg()

-2.32898376265
1.22872789967




FigureCanvasNbAgg()

-2.32898376265
1.22872789967




FigureCanvasNbAgg()

Interpolation of 0.0-1.5 days:
                 Raw: std=0.050, NMAD=0.037
               Phase: std=0.045, NMAD=0.036
          Phase + x1: std=0.045, NMAD=0.037
        Phase + gray: std=0.033, NMAD=0.015
   Phase + x1 + gray: std=0.032, NMAD=0.015

Interpolation of 1.5-2.5 days:
                 Raw: std=0.116, NMAD=0.084
               Phase: std=0.078, NMAD=0.060
          Phase + x1: std=0.076, NMAD=0.057
        Phase + gray: std=0.061, NMAD=0.044
   Phase + x1 + gray: std=0.059, NMAD=0.041

Interpolation of 2.5-5.5 days:
                 Raw: std=0.180, NMAD=0.126
               Phase: std=0.094, NMAD=0.072
          Phase + x1: std=0.086, NMAD=0.068
        Phase + gray: std=0.082, NMAD=0.057
   Phase + x1 + gray: std=0.074, NMAD=0.050

Interpolation of 5.5-10.5 days:
                 Raw: std=0.282, NMAD=0.196
               Phase: std=0.143, NMAD=0.097
          Phase + x1: std=0.142, NMAD=0.103
        Phase + gray: std=0.133, NMAD=0.082
   Phase + x1 + gray: std=0.131, NMA

In [55]:
np.mean(a.flux / a.fluxerr)

129.74659428623275

# Reading between the lines plots

## Show spectra before and after

In [229]:
plt.figure(figsize=(8, 5))
plt.plot(a.wave, a.maximum_flux[a.interp_mask][0], alpha=1, lw=0.5, label='Individual spectra')
plt.plot(a.wave, a.maximum_flux[a.interp_mask][1:].T, lw=0.5)
plt.xlabel('Wavelength ($\AA$)')
plt.ylabel('Flux (arbitrary units)')
plt.legend()
plt.tight_layout()
plt.savefig('./figures/spectra_at_maximum.pdf')

plt.figure(figsize=(8, 5))
plt.plot(a.wave, a.scale_flux[a.interp_mask][0], alpha=1, lw=0.5, label='Individual spectra')
plt.plot(a.wave, a.scale_flux[a.interp_mask][1:].T, alpha=1., lw=0.5)
plt.plot(a.wave, a.mean_flux, c='k', lw=2, ls='--', label='Mean spectrum')
plt.legend()
plt.xlabel('Wavelength ($\AA$)')
plt.ylabel('Flux (arbitrary units)')
plt.tight_layout()
plt.savefig('./figures/scale_spectra.pdf')

plt.figure(figsize=(8, 5))
fractional_dispersion = a.rbtl_result['fractional_dispersion']
plt.plot(a.wave, a.mean_flux, c='k', lw=2, ls='--', label='Mean spectrum')
plt.fill_between(a.wave, a.mean_flux * (1 - fractional_dispersion), a.mean_flux * (1 + fractional_dispersion), label='Supernova intrinsic dispersion', alpha=0.5)
plt.legend()
plt.xlabel('Wavelength ($\AA$)')
plt.ylabel('Flux (arbitrary units)')
plt.tight_layout()
plt.savefig('./figures/scale_spectra_model.pdf')

plt.figure(figsize=(8, 5))
intrinsic_dispersion = frac_to_mag(a.rbtl_result['fractional_dispersion'])
plt.plot(a.wave, intrinsic_dispersion, c='k', lw=2, label='Supernova intrinsic dispersion')
plt.legend()
plt.xlabel('Wavelength ($\AA$)')
plt.ylabel('Intrinsic dispersion (mag)')
plt.tight_layout()
plt.savefig('./figures/rbtl_intrinsic_dispersion.pdf')



FigureCanvasNbAgg()



FigureCanvasNbAgg()



FigureCanvasNbAgg()



FigureCanvasNbAgg()

## Magnitudes

In [271]:
plt.figure(figsize=spec_figsize)

plt.scatter(a.redshifts[a.interp_mask], a.mags[a.interp_mask], s=15, c='C3', label='Supernovae rejected by cuts')
plt.scatter(a.redshifts[a.good_mag_mask], a.mags[a.good_mag_mask], s=15, c='C0', label='Supernovae passing cuts')

z_range = np.linspace(0.001, 0.09, 100)
pec_vel_disp = 0.00217 / z_range
plt.fill_between(z_range, -pec_vel_disp, pec_vel_disp, alpha=0.2, label='Peculiar velocity dispersion')
plt.axvline(0.02, lw=1, ls='--', c='k', label='Redshift cutoff')

plt.xlim(0.001, 0.09)
plt.ylim(-1, 1.5)
plt.xlabel('Redshift')
plt.ylabel('RBTL measured magnitude')
plt.legend()
plt.tight_layout()
plt.savefig('./figures/rbtl_magnitude.pdf')



FigureCanvasNbAgg()

In [272]:
plt.figure(figsize=spec_figsize)

plt.scatter(a.redshifts[a.good_mag_mask], a.mags[a.good_mag_mask], s=15, c='C0', label='Supernovae passing cuts')
plt.xlabel('Redshift')
plt.ylabel('RBTL measured magnitude')


z_range = np.linspace(0.001, 0.09, 100)
pec_vel_disp = 0.00217 / z_range
plt.fill_between(z_range, -pec_vel_disp, pec_vel_disp, alpha=0.2, label='Peculiar velocity dispersion')

plt.legend()
plt.tight_layout()
plt.xlim(0.01, 0.09)
plt.ylim(-0.5, 0.5)
plt.savefig('./figures/rbtl_magnitude_cut.pdf')



FigureCanvasNbAgg()

In [273]:
print("Raw RBTL mag std:  %.3f mag" % np.std(a.mags[a.good_mag_mask]))
print("Raw RBTL mag NMAD: %.3f mag" % math.nmad(a.mags[a.good_mag_mask]))

Raw RBTL mag std:  0.123 mag
Raw RBTL mag NMAD: 0.112 mag


In [294]:
plt.figure(figsize=spec_figsize)

plt.scatter(a.redshifts[a.good_mag_mask], a.corr_mags[a.good_mag_mask], s=15, c='C0', label='Supernovae passing cuts')
plt.xlabel('Redshift')
plt.ylabel('RBTL measured magnitude')


z_range = np.linspace(0.001, 0.09, 100)
pec_vel_disp = 0.00217 / z_range
plt.fill_between(z_range, -pec_vel_disp, pec_vel_disp, alpha=0.2, label='Peculiar velocity dispersion')

plt.legend()
plt.tight_layout()
plt.xlim(0.01, 0.09)
plt.ylim(-0.5, 0.5)
plt.savefig('./figures/rbtl_magnitude_cut.pdf')



FigureCanvasNbAgg()

# Manifold learning plots

## Reconstruction uncertainty

In [739]:
# Note: the total variance isn't defined for Isomap. The variances of the transfomed components
# do map onto the variance of real components though. We provide a very rough estimate of the
# measurement variance for comparison purposes... not sure how much it can be trusted...

num_show = 5

# Do an initial embedding with as many components as possible to get the full variance.
a.do_embedding(n_components=None)
variances = np.var(a.trans[a.interp_mask], axis=0)

ref_var = np.sum(variances[:10])

plot_ref = variances[0]

print(variances[:10] / plot_ref)
print(variances[:10] / np.cumsum(variances[:10]))

plt.figure()
plt.scatter(np.arange(num_show), variances[:num_show] / plot_ref, label='Contributed variance of each component')
plt.axhline(0.1 * ref_var / plot_ref, label='Approximate measurement variance cut', ls='--', c='C3')
plt.axhline(np.mean(a.interp_power_fraction[a.interp_mask]) * ref_var / plot_ref, label='Approximate mean measurement variance', ls='--', c='C2')
plt.ylim(0, None)
plt.xlabel('Component number')
plt.ylabel('Relative variance (Normalized to Component 1)')
plt.xticks(np.arange(num_show), np.arange(num_show) + 1)
plt.legend()
plt.tight_layout()

plt.savefig('./figures/isomap_component_variance.pdf')

[1.         0.62284067 0.30493579 0.10871492 0.0968748  0.07069073
 0.06639156 0.04895932 0.04520822 0.03151806]
[1.         0.38379656 0.15818006 0.05338344 0.04540936 0.032073
 0.02924161 0.02110855 0.01911863 0.01315371]




FigureCanvasNbAgg()

## Twin reconstruction

In [12]:
# Plot where twins and non-twins end up for different number of components.
# We also make a summary plot.
confused_fraction = []

plot_components = np.arange(1, 6)
for n_components in plot_components:
    a.do_embedding(n_components=n_components)
    leakage_matrix = a.plot_twin_distances(figsize=(5, 4))
    if n_components == 1:
        title = '1 Component + Color'
    else:
        title = '%d Components + Color' % n_components
    plt.title(title)
    plt.xlabel('Recovered twinness percentile in the embedded space')
    plt.tight_layout()
    plt.savefig('./figures/twins_recovery_%d_components.pdf' % n_components)
    
    confused_fraction.append(leakage_matrix[3, 0] + leakage_matrix[3, 1])

plt.figure()
plt.scatter(np.arange(len(confused_fraction)) + 1, confused_fraction)
plt.xticks(plot_components, plot_components)
plt.ylim(0, 0.1)
plt.xlabel('Number of components (in addition to color)')
plt.ylabel('Fraction of non-twins confused as twins')
plt.savefig('./figures/twins_confusion.pdf')

FigureCanvasNbAgg()

Unnamed: 0,To Best 10% of spectral twinness,To 10-20%,To 20-50%,To Worst 50% of spectral twinness
From Best 10% of spectral twinness,0.299492,0.271574,0.421954,0.00698
From 10-20%,0.16254,0.186667,0.55746,0.093333
From 20-50%,0.112381,0.105185,0.419259,0.363175
From Worst 50% of spectral twinness,0.040249,0.045201,0.152489,0.762062


FigureCanvasNbAgg()

Unnamed: 0,To Best 10% of spectral twinness,To 10-20%,To 20-50%,To Worst 50% of spectral twinness
From Best 10% of spectral twinness,0.592005,0.29632,0.111675,0.0
From 10-20%,0.187302,0.312381,0.496508,0.00381
From 20-50%,0.055873,0.111111,0.59746,0.235556
From Worst 50% of spectral twinness,0.010665,0.011554,0.119858,0.857796


FigureCanvasNbAgg()

Unnamed: 0,To Best 10% of spectral twinness,To 10-20%,To 20-50%,To Worst 50% of spectral twinness
From Best 10% of spectral twinness,0.727157,0.236041,0.036802,0.0
From 10-20%,0.19873,0.426667,0.374603,0.0
From 20-50%,0.023069,0.106878,0.703069,0.166984
From Worst 50% of spectral twinness,0.001016,0.003301,0.095861,0.899822


FigureCanvasNbAgg()

Unnamed: 0,To Best 10% of spectral twinness,To 10-20%,To 20-50%,To Worst 50% of spectral twinness
From Best 10% of spectral twinness,0.75,0.224619,0.025381,0.0
From 10-20%,0.189841,0.455238,0.354921,0.0
From 20-50%,0.017989,0.102434,0.725503,0.154074
From Worst 50% of spectral twinness,0.00127,0.002539,0.088624,0.907567


FigureCanvasNbAgg()

Unnamed: 0,To Best 10% of spectral twinness,To 10-20%,To 20-50%,To Worst 50% of spectral twinness
From Best 10% of spectral twinness,0.759518,0.211294,0.029188,0.0
From 10-20%,0.187302,0.466667,0.344762,0.00127
From 20-50%,0.016296,0.103492,0.725291,0.154921
From Worst 50% of spectral twinness,0.000889,0.002285,0.09002,0.906679


FigureCanvasNbAgg()

## Plot slices through the manifold

In [18]:
a.do_embedding()

def plot_slice(scan_component, closest_count=10, max_dist=1., loc=np.zeros(a.trans.shape[1] - 1)):
    loc = np.asarray(loc)
    mask = a.interp_mask

    use_trans = a.trans[mask]
    use_flux = a.scale_flux[mask]

    other_trans = np.delete(use_trans, scan_component, axis=1)
    dists = np.sqrt(np.sum((other_trans - loc)**2, axis=1))

    dist_limit = np.min([np.sort(dists)[closest_count], max_dist])
    scan_cut = dists < dist_limit
    
    scan_trans = use_trans[scan_cut, scan_component]
    
    sort_trans = np.sort(scan_trans)
    min_comp = sort_trans[0]
    max_comp = sort_trans[-1]
    # min_comp = np.min(scan_trans)
    # max_comp = np.max(scan_trans)
    # cmap = plt.cm.viridis
    cmap = plt.cm.coolwarm

    plt.figure(figsize=(8, 5))
    for spec, val in zip(use_flux[scan_cut], scan_trans):
        plt.plot(a.wave, spec, c=cmap((val - min_comp) / (max_comp - min_comp)))

    plt.xlabel('Wavelength ($\AA$)')
    plt.ylabel('Normalized flux')

    sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=min_comp, vmax=max_comp))
    sm._A = []
    plt.colorbar(sm, label='Value of Component %d' % (scan_component + 1))
    plt.title('Component %d' % (scan_component + 1))
    
    plt.savefig('./figures/component_%d_effect.pdf' % (scan_component + 1))

In [19]:
plot_slice(0, loc=[0.5, -0.5])
plot_slice(1)
plot_slice(2, loc=[0, 1])

FigureCanvasNbAgg()

FigureCanvasNbAgg()

FigureCanvasNbAgg()

## Plot steps through a components values

In [32]:
# a.do_embedding()

def plot_steps(component, num_steps=10, xlim=None):
    mask = a.interp_mask

    use_trans = a.trans[mask, component]
    use_flux = a.scale_flux[mask]
    
    min_trans = np.percentile(use_trans, 5)
    max_trans = np.percentile(use_trans, 95)
    
    bin_edges = np.linspace(min_trans, max_trans, num_steps+1)
    
    bin_edges[0] = -1e20
    bin_edges[-1] = 1e20
    
    plt.figure(figsize=(9, 4))
    
    cmap = plt.cm.coolwarm
    sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=min_trans, vmax=max_trans))
    sm._A = []

    if xlim is not None:
        wave_mask = (a.wave > xlim[0] - 50) & (a.wave < xlim[1] + 50)
    else:
        wave_mask = np.ones(len(a.wave), dtype=bool)
    
    for step in range(num_steps):
        step_mask = (use_trans >= bin_edges[step]) & (use_trans < bin_edges[step+1])
        step_trans = use_trans[step_mask]

        mean_val = np.mean(step_trans)
        step_flux = np.median(use_flux[step_mask], axis=0)
        
        if step == 0:
            label = 'Median spectra in each component bin'
        else:
            label = ''
        
        plt.plot(a.wave[wave_mask], step_flux[wave_mask], c=sm.to_rgba(mean_val), label=label)
        
    if xlim is not None:
        plt.xlim(*xlim)
        
    plt.colorbar(sm, label='Value of Component %d' % (component + 1))
    plt.title('Component %d' % (component + 1))
    
    plt.xlabel('Wavelength ($\AA$)')
    plt.ylabel('Normalized flux')
    plt.ylim(0, None)
    
    plt.legend()
    
    plt.tight_layout()
    
    if xlim is None:
        plt.savefig('./figures/component_%d_steps.pdf' % (component + 1))
    else:
        plt.savefig('./figures/component_%d_steps_zoom_%d_%d.pdf' % (component + 1, xlim[0], xlim[1]))

In [34]:
for component in range(3):
    plot_steps(component)
    plot_steps(component, xlim=(3300, 4500))
    plot_steps(component, xlim=(4900, 6700))
    plot_steps(component, xlim=(7200, 8600))



FigureCanvasNbAgg()



FigureCanvasNbAgg()



FigureCanvasNbAgg()



FigureCanvasNbAgg()



FigureCanvasNbAgg()



FigureCanvasNbAgg()



FigureCanvasNbAgg()



FigureCanvasNbAgg()



FigureCanvasNbAgg()



FigureCanvasNbAgg()



FigureCanvasNbAgg()



FigureCanvasNbAgg()

## Comparison to original twins

In [15]:
hannah_list = np.genfromtxt('./data/fakhouri_atmax_list.txt', dtype='str')
twins_mask = np.array([i.name in hannah_list for i in a.targets])

In [16]:
a.plot_twin_pairings()
plt.xlim(0, 100)
plt.ylim(0, None)
plt.savefig('./figures/twin_dispersion.pdf')

RMS  20%: 0.1045176399407426
NMAD 20%: 0.09740618077278954


FigureCanvasNbAgg()

## Twins that are poor brightness matches

In [17]:
from scipy.spatial.distance import pdist, squareform

mask = a.good_mag_mask

raw_spec_dists = pdist(a.iso_diffs[mask])
spec_dists = squareform(raw_spec_dists)
mag_diffs = squareform(pdist(a.mags[mask][:, None]))

In [18]:
dist_mask = spec_dists < np.percentile(raw_spec_dists, 20)
plt.figure()
plt.hist(mag_diffs[dist_mask])

FigureCanvasNbAgg()

(array([380., 314., 210., 138.,  72.,  44.,  36.,  18.,   0.,   4.]),
 array([0.        , 0.05152395, 0.10304789, 0.15457184, 0.20609578,
        0.25761973, 0.30914367, 0.36066762, 0.41219156, 0.46371551,
        0.51523946]),
 <a list of 10 Patch objects>)

In [19]:
idx1, idx2 = np.where(dist_mask & (mag_diffs > 0.35))
idx_mask = idx1 < idx2
idx1 = idx1[idx_mask]
idx2 = idx2[idx_mask]

def print_pairs(vals):
    for val_name, val in vals.items():
        print("%13s_1" % val_name, "%13s_2" % val_name, end='')
    print('')
        
    for i, j in zip(idx1, idx2):
        for val_name, val in vals.items():
            try:
                print('%15.3f' % val[mask][i], '%15.3f' % val[mask][j], end='')
            except TypeError:
                print('%15s' % val[mask][i], '%15s' % val[mask][j], end='')
        print('')

print_pairs({
    'target': a.targets,
    'redshift': a.redshifts,
    'color': a.colors,
    'mag': a.mags,
    'salt_mag': a.salt_hr,
    'trans[0]': a.trans[:, 0],
})

       target_1        target_2     redshift_1      redshift_2        color_1         color_2          mag_1           mag_2     salt_mag_1      salt_mag_2     trans[0]_1      trans[0]_2
   CSS130502_01 SNF20080612-003          0.034           0.033         -0.321          -0.174          0.213          -0.286          0.182          -0.234          1.805          -0.294
       PTF10wnm SNF20080612-003          0.065           0.033         -0.246          -0.174          0.117          -0.286          0.079          -0.234          0.676          -0.294
       PTF11drz SNF20080612-003          0.057           0.033         -0.086          -0.174          0.093          -0.286          0.099          -0.234         -1.618          -0.294
       PTF11pdk SNF20080612-003          0.085           0.033         -0.267          -0.174          0.100          -0.286          0.064          -0.234          0.240          -0.294
       PTF12jqh SNF20080612-003          0.047           0.033   

## Comparision to previous spectral classifications

In [21]:
a.do_blondin_plot()
plt.savefig('./figures/branch_classification.pdf')

FigureCanvasNbAgg()

In [22]:
a.do_component_blondin_plot()
plt.savefig('./figures/branch_labels_isomap.pdf')

FigureCanvasNbAgg()

In [109]:
def plot_spec(frac, radius=1.):
    val_0 = -1.7 + (2.2 - -1.7) * frac
    val_1 = 2.2 + (-1.4 - 2.2) * frac
    
    dist = np.sqrt(
        (a.trans[:, 0] - val_0)**2
        + (a.trans[:, 1] - val_1)**2
    )
    
    t = a.trans
    cut = dist < radius
    
    print(np.sum(cut))
    
    f = np.median(a.scale_flux[cut], axis=0)
    
    # c = plt.cm.coolwarm((val_0 + 2) / 4)
    c = plt.cm.coolwarm(i)
    
    plt.plot(a.wave, f, label='Median of spectra near (%.1f, %.1f)' % (val_0, val_1), c=c)
    
plt.figure(figsize=(8, 5))
for i in np.linspace(0, 1, 4):
    plot_spec(i)
plt.xlabel('Wavelength ($\AA$)')
plt.ylabel('Flux (arbitrary units)')
plt.legend()
plt.savefig('./figures/core_normal_comparison.pdf')



FigureCanvasNbAgg()

16
27
12
16


  # This is added back by InteractiveShellApp.init_path()


## Color comparison

In [50]:
a.scatter(a.colors, vmin=-0.5, vmax=0.5)



FigureCanvasNbAgg()

# Standardization 

## Raw magnitudes

In [22]:
a.scatter(a.mags, a.good_mag_mask, vmin=-0.2, vmax=0.2, label='Residual magnitude', invert_colorbar=True)
plt.savefig('./figures/components_12_residual_magnitude.pdf')

FigureCanvasNbAgg()

In [23]:
a.scatter(a.mags, a.good_mag_mask, vmin=-0.2, vmax=0.2, axis_1=0, axis_2=2, label='Residual magnitude', invert_colorbar=True)
plt.savefig('./figures/components_13_residual_magnitude.pdf')

FigureCanvasNbAgg()

In [24]:
a.scatter(a.mags, a.good_mag_mask, vmin=-0.2, vmax=0.2, axis_1=1, axis_2=2, label='Residual magnitude', invert_colorbar=True)
plt.savefig('./figures/components_23_residual_magnitude.pdf')

FigureCanvasNbAgg()

## GP standardization

In [47]:
# Reset to defaults in case things got messed up
a.do_embedding()
a.fit_gp()

Fitting GP hyperparameters...
Fit result:
      fun: -59.26127018578216
 hess_inv: array([[ 4.98245421e-03,  3.23904835e-06, -6.30946395e-04,
        -3.05735506e-02],
       [ 3.23904835e-06,  1.61370479e-04,  2.64752292e-05,
         1.04339917e-02],
       [-6.30946395e-04,  2.64752292e-05,  1.17696783e-03,
         4.34280822e-02],
       [-3.05735506e-02,  1.04339917e-02,  4.34280822e-02,
         3.55195095e+00]])
      jac: array([-2.86102295e-06, -9.05990601e-06,  2.38418579e-06, -5.72204590e-06])
  message: 'Optimization terminated successfully.'
     nfev: 132
      nit: 15
     njev: 22
   status: 0
  success: True
        x: array([-0.046595  ,  0.07682295,  0.10195705,  2.69551498])
Finite difference covariance step sizes: [1.60e-04 2.00e-05 4.00e-05 2.56e-03]
Fit uncertainty: [0.07023724 0.01255326 0.03414024 1.8833214 ]
Fit NMAD:        0.08971590237637203
Fit std:         0.10109691316372353


In [41]:
a.fit_gp(kind='salt_raw')

Fitting GP hyperparameters...
Fit result:
      fun: -81.60244487218607
 hess_inv: array([[ 2.59577051e-02, -1.04250891e-04, -9.82967154e-04,
        -8.26180244e-03],
       [-1.04250891e-04,  7.86249046e-05,  1.25693295e-04,
         2.85355060e-04],
       [-9.82967154e-04,  1.25693295e-04,  3.46500706e-03,
         6.37116495e-03],
       [-8.26180244e-03,  2.85355060e-04,  6.37116495e-03,
         1.33836705e-02]])
      jac: array([ 0.00000000e+00,  3.81469727e-06, -1.90734863e-06, -6.67572021e-06])
  message: 'Optimization terminated successfully.'
     nfev: 450
      nit: 33
     njev: 75
   status: 0
  success: True
        x: array([ 3.10484818,  0.0884479 , -0.30768032,  6.25696017])
Fit NMAD:        0.14270963860704455
Fit std:         0.15181864902995143


In [57]:
a.apply_polynomial_standardization(kind='salt_raw')

Fitted coefficients:
[-0.10088315  2.84020783 -0.04884191 -0.07415973  0.02167978]
Fit NMAD:        0.1302191752040881
Fit std:         0.12547577023620432


In [40]:
plt.figure()
def f(x):
    return np.std(a.colors - a.salt_color * x[0])

res = minimize(f, [1])
print(res)

plt.scatter(a.salt_color, a.colors / a.salt_color)

FigureCanvasNbAgg()

      fun: 0.12155185608365733
 hess_inv: array([[4.83633085]])
      jac: array([-1.8319115e-06])
  message: 'Optimization terminated successfully.'
     nfev: 24
      nit: 6
     njev: 8
   status: 0
  success: True
        x: array([2.64308055])


<matplotlib.collections.PathCollection at 0x7f2f4ab80748>

In [50]:
a.plot_gp(axis_1=0, axis_2=1)
plt.savefig('./figures/gp_mag_components_12.pdf')

a.plot_gp(axis_1=0, axis_2=2)
plt.savefig('./figures/gp_mag_components_13.pdf')

a.plot_gp(axis_1=1, axis_2=2)
plt.savefig('./figures/gp_mag_components_23.pdf')

FigureCanvasNbAgg()

FigureCanvasNbAgg()



FigureCanvasNbAgg()

In [31]:
a.scatter(a.colors, vmin=-0.2, vmax=0.2)

FigureCanvasNbAgg()

## Check vs phases of original spectra

In [39]:
a.fit_gp()

Fitting GP hyperparameters...
Fit result:
      fun: -59.26127018578216
 hess_inv: array([[ 4.98245421e-03,  3.23904835e-06, -6.30946395e-04,
        -3.05735506e-02],
       [ 3.23904835e-06,  1.61370479e-04,  2.64752292e-05,
         1.04339917e-02],
       [-6.30946395e-04,  2.64752292e-05,  1.17696783e-03,
         4.34280822e-02],
       [-3.05735506e-02,  1.04339917e-02,  4.34280822e-02,
         3.55195095e+00]])
      jac: array([-2.86102295e-06, -9.05990601e-06,  2.38418579e-06, -5.72204590e-06])
  message: 'Optimization terminated successfully.'
     nfev: 132
      nit: 15
     njev: 22
   status: 0
  success: True
        x: array([-0.046595  ,  0.07682295,  0.10195705,  2.69551498])
Fit NMAD:        0.08882869659158774
Fit std:         0.10137719908554844


In [47]:
plt.figure()
plt.scatter(a.salt_phases[a.center_mask][a.good_mag_mask], a.mags[a.good_mag_mask], label='Individual observations')
math.plot_binned_mean(a.salt_phases[a.center_mask][a.good_mag_mask], a.mags[a.good_mag_mask], c='C2', label='Binned mean')
plt.xlabel('Phase of closest spectrum to maximum (days)')
plt.ylabel('Residual magnitude')
plt.gca().invert_yaxis()
plt.legend()



FigureCanvasNbAgg()

<matplotlib.legend.Legend at 0x7fe4f9c51f28>

# SALT2 comparison

In [287]:
# Load SALT2 Hubble residuals
a.calculate_salt_hubble_residuals()

Pass 0, MB=-19.149, alpha=0.129, beta=2.985
  -> new intrinsic_dispersion=0.119
Pass 1, MB=-19.149, alpha=0.129, beta=2.987
  -> new intrinsic_dispersion=0.119
Pass 2, MB=-19.149, alpha=0.129, beta=2.987
  -> new intrinsic_dispersion=0.119
Pass 3, MB=-19.149, alpha=0.129, beta=2.987
  -> new intrinsic_dispersion=0.119
Pass 4, MB=-19.149, alpha=0.129, beta=2.987
  -> new intrinsic_dispersion=0.119
SALT2 Hubble fit: 
    MB:    -19.14936035870967
    alpha: 0.12912169654736533
    beta:  2.986549046949107
    σ_int: 0.11893666801711794
    std:   0.1506544775204048
    NMAD:  0.10991458888441766


## SALT2 colors

In [285]:
plt.figure()
plt.scatter(a.salt_color, a.colors, s=5)
plt.xlabel('SALT2 Color ($c$)')
plt.ylabel('RBTL Color ($A_V$)')
plt.tight_layout()

plt.savefig('./figures/salt2_color_comparison.pdf')



FigureCanvasNbAgg()

## SALT2 X1

In [97]:
a.scatter(a.salt_x1, a.interp_mask, label='SALT $x_1$')
plt.savefig('./figures/salt2_x1_components.pdf')



FigureCanvasNbAgg()

In [26]:
# Find the best predictor of x1
def to_min(x):
    diff = a.salt_x1 - a.trans.dot(x)
    return np.nanstd(diff[a.salt_mask])

res = minimize(to_min, [0, 0, 0])

norm_x = res.x / np.sqrt(np.sum(res.x**2))
print(norm_x)

plt.figure()
# plt.scatter(a.trans.dot(res.x), a.salt_x1, c=a.salt_mask)
plt.scatter(a.trans.dot(res.x)[~a.salt_mask], a.salt_x1[~a.salt_mask], c='C3', s=30, label='"Bad" SALT2 fits', alpha=0.8)
plt.scatter(a.trans.dot(res.x)[a.salt_mask], a.salt_x1[a.salt_mask], c='C0', s=30, label='"Good" SALT2 fits', alpha=0.8)
plt.plot([-3, 3], [-3, 3], ls='--', c='k', label='One-to-one line')
plt.xlim(-3, 3)
plt.ylim(-3, 3)
plt.xlabel('Rotated Isomap components')
plt.ylabel('SALT2 $x_1$')
plt.legend()
plt.savefig('./figures/rotated_isomap_salt_x1.pdf')


print(np.corrcoef(a.trans.dot(res.x)[a.interp_mask & a.salt_mask], a.salt_x1[a.interp_mask & a.salt_mask]))

[ 0.35367054  0.91044673 -0.2144852 ]


FigureCanvasNbAgg()

[[1.         0.78539571]
 [0.78539571 1.        ]]


## SALT2 outliers (Type Iax)

In [28]:
# Outlier spectra
mask = (a.trans[:, 0] > 5) & (a.trans[:, 1] > 2)
print(a.targets[mask])
print(a.colors[mask])
print(a.redshifts[mask])
print(a.mags[mask])

idx2 = np.where((a.trans[:, 0] > 5) & (a.trans[:, 1] < 2))[0][0]

plt.figure()
for i in np.where(mask)[0]:
    plt.plot(a.wave, a.scale_flux[i], label=a.targets[i].name)
    
plt.plot(a.wave, a.scale_flux[idx2], c='k', ls='--', label=a.targets[idx2].name)
print(a.trans[idx2])

plt.legend()

plt.xlabel('Wavelength ($\AA$)')
plt.ylabel('Flux (arbitrary units)')

plt.savefig('./figures/type_iax_comparison.pdf')

[Target(name="LSQ12fhs") Target(name="SN2005cc") Target(name="SN2011ay")]
[1.00445401 1.14540576 1.0136129 ]
[0.03202726 0.00785178 0.02125125]
[ 0.23935881  1.43149462 -0.45383737]


FigureCanvasNbAgg()

[ 5.08567677  0.81677272 -0.92275992]


## SALT2 magnitudes vs components

In [121]:
a.scatter(a.salt_hr, mask=a.good_salt_mask & a.interp_mask, vmin=-0.3, vmax=0.3, label='SALT2-corrected residual magnitude', invert_colorbar=True)
# a.scatter(a.salt_hr, mask=a.good_salt_mask & a.interp_mask, vmin=-0.3, vmax=0.3, label='SALT2 Hubble residuals', invert_colorbar=True)
plt.savefig('./figures/salt2_hr_components.pdf')



FigureCanvasNbAgg()

In [122]:
plt.figure()

use_x = a.trans[:, 0]

mask = a.salt_mask & a.redshift_color_mask & a.interp_mask
plt.scatter(use_x[mask], a.salt_hr[mask], label='Individual supernovae')
math.plot_binned_mean(use_x[mask], a.salt_hr[mask], c='C2', lw=2, label='Binned mean')
plt.legend()

plt.gca().invert_yaxis()

plt.xlabel('Component 1')
plt.ylabel('SALT2-corrected residual magnitude')

plt.tight_layout()
plt.savefig('./figures/salt2_hr_component_1.pdf')

plt.figure()
plt.hist(a.salt_hr[(use_x < 2) & mask], 10, (-0.6, 0.4), alpha=0.3, color='C0', label='Component 1 < 2', density=True)
plt.hist(a.salt_hr[(use_x < 2) & mask], 10, (-0.6, 0.4), histtype='step', lw=2, color='C0', density=True)
plt.hist(a.salt_hr[(use_x > 2) & mask], 10, (-0.6, 0.4), alpha=0.3, color='C1', label='Component 1 > 2', density=True)
plt.hist(a.salt_hr[(use_x > 2) & mask], 10, (-0.6, 0.4), histtype='step', lw=2, color='C1', density=True)

plt.gca().invert_xaxis()

plt.xlabel('SALT2 residual magnitude')
plt.ylabel('Normalized counts')
plt.legend()

plt.tight_layout()
plt.savefig('./figures/salt2_hr_hist.pdf')



FigureCanvasNbAgg()



FigureCanvasNbAgg()

## SALT2 + Isomap standardization

In [45]:
a.fit_gp(kind='salt_raw')
a.plot_gp(kind='salt_raw', vmin=-0.5, vmax=0.5)

plt.savefig('./figures/salt_gp_component_12.pdf')

Fitting GP hyperparameters...
Fit result:
      fun: -81.60244487218607
 hess_inv: array([[ 2.59577051e-02, -1.04250891e-04, -9.82967154e-04,
        -8.26180244e-03],
       [-1.04250891e-04,  7.86249046e-05,  1.25693295e-04,
         2.85355060e-04],
       [-9.82967154e-04,  1.25693295e-04,  3.46500706e-03,
         6.37116495e-03],
       [-8.26180244e-03,  2.85355060e-04,  6.37116495e-03,
         1.33836705e-02]])
      jac: array([ 0.00000000e+00,  3.81469727e-06, -1.90734863e-06, -6.67572021e-06])
  message: 'Optimization terminated successfully.'
     nfev: 450
      nit: 33
     njev: 75
   status: 0
  success: True
        x: array([ 3.10484818,  0.0884479 , -0.30768032,  6.25696017])
Finite difference covariance step sizes: [3.20e-04 2.00e-05 1.60e-04 2.56e-03]
Fit uncertainty: [0.16192743 0.00931934 0.14512514 3.21787536]
Fit NMAD:        0.11275838379102883
Fit std:         0.11720230579451736




FigureCanvasNbAgg()

In [63]:
plt.figure()
m = a.good_salt_mask & a.interp_mask
plt.scatter(a.salt_x1[m], (a.corr_mags - a.salt_hr)[m], c=a.trans[m, 0], cmap=plt.cm.coolwarm, vmin=-3, vmax=3)
plt.gca().invert_yaxis()
plt.colorbar(label='Value of Isomap component 1')
plt.xlabel('SALT2 $x_1$')
plt.ylabel('Difference between SALT2 and\nSALT2 + Isomap standardization (mag)')
plt.tight_layout()
plt.savefig('./figures/salt_isomap_difference.pdf')



FigureCanvasNbAgg()

In [53]:
plt.figure()
plt.scatter(a.salt_x1[a.good_salt_mask], a.corr_mags[a.good_salt_mask])



FigureCanvasNbAgg()

<matplotlib.collections.PathCollection at 0x7fafdbdad518>

# Host galaxy correlations

In [527]:
def apply_bootstrap_mask(vals, mask):
    if len(np.shape(vals)) != 1 or len(vals) != mask.shape[0]:
        # Not something to apply the bootstrap to
        return vals
    
    else:
        return vals[mask]

def bootstrap_statistic(statistic, vals, *args, num_resamples=10000, **kwargs):
    stat = statistic(vals, *args, **kwargs)
    
    if len(np.shape(vals)) != 1:
        raise Exception("bootstrap_statistic only supported for 1-D inputs.")
        
    bootstrap_idx = np.random.choice(len(vals), (len(vals), num_resamples))

    bootstrap_vals = vals[bootstrap_idx]
    
    bootstrap_args = []
    for arg in args:
        bootstrap_args.append(apply_bootstrap_mask(arg, bootstrap_idx))
        
    bootstrap_kwargs = {}
    for arg_name, arg in kwargs.items():
        bootstrap_kwargs[arg_name] = apply_bootstrap_mask(arg, bootstrap_idx)
        
    bootstrap_stat = statistic(bootstrap_vals, *bootstrap_args, axis=0, **bootstrap_kwargs)
    stat_err = np.std(bootstrap_stat, axis=-1)
    
    if len(np.shape(stat_err)) > 0:
        stat = np.array(stat)
    
    return stat, stat_err

In [523]:
def calc_step(sample, weights, axis=None):
    vals_1 = np.average(sample, weights=weights, axis=axis)
    vals_2 = np.average(sample, weights=1-weights, axis=axis)
    
    return vals_1, vals_2, vals_2 - vals_1

In [524]:
m = a.good_salt_mask & a.host_mask & a.interp_mask & a.good_mag_mask
bootstrap_statistic(calc_step, salt_isomap_mags[m], a.host_data['p_young'][m] / 100)

(array([ 0.00866794, -0.0158116 , -0.02447954]),
 array([0.01758642, 0.02692721, 0.03154575]))

In [525]:
m = a.good_mag_mask & a.host_mask
calc_step(rbtl_isomap_mags[m], a.host_data['p_young'][m] / 100)
bootstrap_statistic(calc_step, rbtl_isomap_mags[m], a.host_data['p_young'][m] / 100)

(array([ 0.01484403, -0.02815389, -0.04299793]),
 array([0.01490948, 0.02197723, 0.02588642]))

In [526]:
weights = np.ones(len(a.mags))
weights[::2] = 0
print(bootstrap_statistic(np.average, a.mags[a.good_mag_mask]))
print(bootstrap_statistic(np.average, a.mags[a.good_mag_mask], weights=weights[a.good_mag_mask]))

(0.003795664125211102, 0.013938931998063548)
(-0.0033917121161430638, 0.021066102679448633)


In [513]:
def analyze_host_variable(variable, mags, mask, threshold=None, sign=+1, plot=True):
    # Default thresholds from Rigault et al. 2019
    if threshold is None:
        if variable == 'lssfr':
            threshold = -10.8
        elif variable == 'gmass':
            threshold = 10
        elif variable == 'p_young' or variable == 'p_highmass':
            threshold = None
        else:
            raise Exception("Must specify threshold!")

    use_mask = mask & a.host_mask
    use_var = a.host_data[variable][use_mask]
    use_mags = mags[use_mask]
    
    if threshold is None:
        # Do weighted probability
        val, val_err = 
        
    else:
        # Do a flat cut
        cut = use_var < threshold

        sample_1 = use_mags[use_var < threshold]
        sample_2 = use_mags[use_var > threshold]

        mean_1 = np.mean(sample_1)
        mean_2 = np.mean(sample_2)

        err_1 = np.std(sample_1) / np.sqrt(len(sample_1) - 1)
        err_2 = np.std(sample_2) / np.sqrt(len(sample_2) - 1)
    
    diff = sign * (mean_1 - mean_2)
    err = np.sqrt(err_1**2 + err_2**2)
    
    if plot:
        plt.figure()
        plt.scatter(use_var, use_mags)

    return diff, err

SyntaxError: invalid syntax (<ipython-input-513-1ecc715ad164>, line 19)

In [291]:
analyze_host_variable('lssfr', a.salt_hr, a.good_mag_mask)



FigureCanvasNbAgg()

(-0.08893440034210678, 0.03585147424242947)

In [288]:
a.fit_gp(kind='rbtl', verbose=False)
rbtl_isomap_mags = a.corr_mags
a.fit_gp(kind='salt_raw', verbose=False)
salt_isomap_mags = a.corr_mags

In [289]:
print(analyze_host_variable('gmass', rbtl_isomap_mags, a.good_mag_mask, plot=False))
print(analyze_host_variable('lssfr', rbtl_isomap_mags, a.good_mag_mask, plot=False))

(0.003409867206575488, 0.029479132230884747)
(-0.04669787576565154, 0.028422318831935123)


In [283]:
print(analyze_host_variable('gmass', a.salt_hr, a.good_salt_mask, 10., plot=False))
print(analyze_host_variable('lssfr', a.salt_hr, a.good_salt_mask, -10.8, plot=False))
a.fit_gp(kind='salt_raw', verbose=False)
print(analyze_host_variable('gmass', salt_isomap_mags, a.good_salt_mask & a.interp_mask, 10., plot=False))
print(analyze_host_variable('lssfr', salt_isomap_mags, a.good_salt_mask & a.interp_mask, -10.8, plot=False))

(0.08609509248116043, 0.026609260908795625)
(-0.1129230676134455, 0.02553876837067687)
(0.016822469717758807, 0.027493506148109824)
(-0.03812162386466404, 0.026798952470804296)


In [284]:
print(analyze_host_variable('gmass', a.salt_hr, a.good_mag_mask & a.good_salt_mask, 10., plot=False))
print(analyze_host_variable('lssfr', a.salt_hr, a.good_mag_mask & a.good_salt_mask, -10.8, plot=False))
a.fit_gp(kind='rbtl', verbose=False)
print(analyze_host_variable('gmass', rbtl_isomap_mags, a.good_mag_mask & a.good_salt_mask, 10., plot=False))
print(analyze_host_variable('lssfr', rbtl_isomap_mags, a.good_mag_mask & a.good_salt_mask, -10.8, plot=False))
a.fit_gp(kind='salt_raw', verbose=False)
print(analyze_host_variable('gmass', salt_isomap_mags, a.good_mag_mask & a.good_salt_mask, 10., plot=False))
print(analyze_host_variable('lssfr', salt_isomap_mags, a.good_mag_mask & a.good_salt_mask, -10.8, plot=False))

(0.04201185552896323, 0.03590401974521659)
(-0.08955416488636746, 0.034637835085661955)
(-0.008161766604542717, 0.030782657597718432)
(-0.04504729901030638, 0.02993385879369351)
(-0.028081582076588254, 0.034075502304940795)
(-0.02959087554328841, 0.033854281769135644)


In [None]:
host_results = {
    # 'Local Mass':  [0.008,  0.04,   -0.02,  0.028],
    # 'Local SFR':   [0.013,  -0.007, -0.018, 0.015],
    # 'Local SSFR':  [0.058,  0.0956, 0.029,  0.070],
    # 'Global Mass': [0.0258,  0.061, -0.0136,-0.008],
    'Local Mass':  [-0.0041,  0.0488],
    'Local SSFR':  [0.0164,  0.0744],
    'Global Mass': [-0.0121,  0.0517],
}

err = 0.025

plt.figure()
labels = []
for idx, (key, values) in enumerate(host_results.items()):
    for val_idx, value in enumerate(values):
        if val_idx >= 2:
            continue
        marker = 'oovv'[val_idx]
        # color = 'C%d' % (val_idx % 2)
        color = 'C%d' % (val_idx % 2)
        if idx == 0:
            label = ['Manifold twins', 'SALT2', 'Manifold twins color cut', 'SALT2 color cut'][val_idx]
        else:
            label = None
        gap = 0.1
        # xpos = idx - 3.*gap + 2*gap * val_idx + gap * (val_idx // 2)
        # xpos = idx - 1.5*gap + gap * val_idx
        xpos = idx - 0.5*gap + gap * val_idx
        plt.errorbar(xpos, value, err, c=color, alpha=1.)
        plt.plot(xpos, value, marker=marker, c=color, label=label)
        
    labels.append(key)

plt.xticks(np.arange(len(labels)), labels=labels)
plt.axhline(0., c='k')
plt.xlim(-0.5, 2.5)
# plt.ylim(-0.05, 0.13)

plt.ylabel('Step size (mag)')
plt.legend(loc=2)

In [None]:
plt.savefig('host_correlations.eps')

In [None]:
host_results = {
    # 'Local Mass':  [0.008,  0.04,   -0.02,  0.028],
    # 'Local SFR':   [0.013,  -0.007, -0.018, 0.015],
    # 'Local SSFR':  [0.058,  0.0956, 0.029,  0.070],
    # 'Global Mass': [0.0258,  0.061, -0.0136,-0.008],
    'Local Mass':  [0.008,  0.04,   -0.004, 0.051],
    'Local SFR':   [0.013,  -0.007, 0.0047, -0.031],
    'Local SSFR':  [0.058,  0.0956, 0.027, 0.065],
    'Global Mass': [0.0258,  0.061, -0.001, 0.040],
}

err = 0.025

plt.figure()
labels = []
for idx, (key, values) in enumerate(host_results.items()):
    for val_idx, value in enumerate(values):
        if val_idx >= 2:
            continue
        marker = 'oovv'[val_idx]
        # color = 'C%d' % (val_idx % 2)
        color = 'C%d' % (val_idx % 2)
        if idx == 0:
            label = ['Manifold twins', 'SALT2', 'Manifold twins color cut', 'SALT2 color cut'][val_idx]
        else:
            label = None
        gap = 0.1
        # xpos = idx - 3.*gap + 2*gap * val_idx + gap * (val_idx // 2)
        # xpos = idx - 1.5*gap + gap * val_idx
        xpos = idx - 0.5*gap + gap * val_idx
        plt.errorbar(xpos, value, err, c=color, alpha=0.3)
        plt.plot(xpos, value, marker=marker, c=color, label=label)
        
    labels.append(key)

plt.xticks(np.arange(len(labels)), labels=labels)
plt.axhline(0., c='k')
# plt.ylim(-0.05, 0.13)

plt.ylabel('Step size (mag)')
plt.legend(loc=2)

In [None]:
plt.figure()

c1 = (a.host_data['lssfr'][a.train_cut[a.host_mask]] > -11)[:, 0]
c2 = (a.corr_mags[a.host_mask[a.train_cut]] > 0.05)
cc = c1 + 2*c2

x = a.trans[:, 0][a.train_cut & a.host_mask]
y = a.trans[:, 1][a.train_cut & a.host_mask]
plt.scatter(x, y, c=cc, cmap=plt.cm.jet)
plt.colorbar()

In [None]:
a.host_mask & a.train_cut

In [None]:
plt.figure()

lssfr = a.host_data['lssfr'][a.train_cut[a.host_mask]]
mag1 = a.salt_hr[a.host_mask & a.train_cut]
mag2 = a.corr_mags[a.host_mask[a.train_cut]]

for i in range(len(lssfr)):
    bigger = mag1[i] > mag2[i]
    if bigger:
        c = 'C3'
    else:
        c = 'C0'
    
    plt.arrow(lssfr[i], mag1[i], 0, mag2[i] - mag1[i], width=0.01, head_width=0.1, head_length=0.03, edgecolor=c, facecolor=c)
    
plt.xlim(np.min(lssfr) - 0.5, np.max(lssfr) + 0.5)
plt.ylim(-0.4, 0.4)

plt.figure()
plt.scatter(lssfr, mag1)

plt.figure()
plt.scatter(lssfr, mag2)

# Modeling the full timeseries

In [None]:
all_phases = []
all_trans = []
all_fluxes = []
all_spectra = []

for idx in tqdm.tqdm(range(len(a.targets))):
    target = a.targets[idx]
    # scale = a.applied_scale[idx]
    # scale = a.scale_flux[idx] / a.interpolation_result['target_flux'][idx]
    scale = 1 / a.model_scales[idx]
    trans = a.trans[idx]

    for raw_spectrum in target.spectra:
        spectrum = raw_spectrum.bin_by_velocity(1000).apply_scale(scale)
        
        all_phases.append(spectrum.phase)
        all_fluxes.append(spectrum.flux)
        all_trans.append(trans)
        all_spectra.append(spectrum)
        
all_phases = np.array(all_phases)
all_fluxes = np.array(all_fluxes)
all_trans = np.array(all_trans)
all_spectra = np.array(all_spectra)

In [None]:
plt.figure()
plt.scatter(all_phases, all_fluxes[:, 20], c=all_trans[:, 0])

In [None]:
wave_idx = 50

a.scatter(a.scale_flux[:, wave_idx], label='Flux')
plt.title('%d $\AA$' % a.wave[wave_idx])

In [None]:
interp_x = np.hstack([all_phases[:, None], all_trans])

In [None]:
# Take ratios to the median spectrum and see how components affect things.
def do_plot(trans_idx, wave_idx):
    order = np.argsort(all_phases)
    x = all_trans[order, trans_idx]
    y = all_fluxes[order, wave_idx]

    mean_func = math.windowed_median(y)
    scale_y = y / mean_func

    plt.figure()
    plt.scatter(x, scale_y, s=10, alpha=0.2)
    math.plot_binned_median(x, scale_y, mode='error')
    plt.ylim(0, 2)
    # plt.ylim(-1, 2)
    # plt.xlim(-30, 50)
    
from ipywidgets import interact
interact(do_plot, trans_idx=(0, all_trans.shape[1]-1), wave_idx=(0, all_fluxes.shape[1]-1))

## Spline model

In [None]:
from scipy.interpolate import BSpline

min_phase = -15
max_phase = 60

t = np.arange(min_phase, max_phase, 5)
t = np.hstack([t[0], t[0], t, t[-1], t[-1]])
k = 3
len_c = len(t) - k - 1

def super_spline(all_c, phases, trans):
    use_c = all_c

    center_spl = BSpline(t, use_c[0], k)
    x0_spl = BSpline(t, use_c[1], k)
    x1_spl = BSpline(t, use_c[2], k)
    x2_spl = BSpline(t, use_c[3], k)
    x00_spl = BSpline(t, use_c[4], k)
    x01_spl = BSpline(t, use_c[5], k)
    x02_spl = BSpline(t, use_c[6], k)
    x11_spl = BSpline(t, use_c[7], k)
    x12_spl = BSpline(t, use_c[8], k)
    x22_spl = BSpline(t, use_c[9], k)
    
    c0 = trans[:, 0] / 10.
    c1 = trans[:, 1] / 10.
    c2 = trans[:, 2] / 10.
    
    model = (
        center_spl(phases) +
        c0 * x0_spl(phases) +
        c1 * x1_spl(phases) +
        c2 * x2_spl(phases) +
        c0 * c0 * x00_spl(phases) +
        c0 * c1 * x01_spl(phases) +
        c0 * c2 * x02_spl(phases) +
        c1 * c1 * x11_spl(phases) +
        c1 * c2 * x12_spl(phases) +
        c2 * c2 * x22_spl(phases)
    )
    
    # print(np.max(mag_model), np.min(mag_model))
    # model = 10**(-0.4*mag_model)
    
    return model

In [None]:
def build_spline_basis(phases, trans):
    basis = []
    
    for i in range(10):
        for j in range(len_c):
            impulse_coeffs = np.zeros((10, len_c))
            impulse_coeffs[i, j] = 1.
            spl = super_spline(impulse_coeffs, use_phases, use_trans)
            basis.append(spl)
            
    basis = np.array(basis)
    
    return basis

In [None]:
cut = (all_phases > min_phase) & (all_phases < max_phase)
use_phases = all_phases[cut]
use_trans = all_trans[cut]
basis = build_spline_basis(use_phases, use_trans)
print(np.max(basis), np.min(basis))

In [None]:
all_spl_c = []
for wave_idx in range(len(a.wave)):
    use_flux = all_fluxes[cut, wave_idx]
    coef, residuals, rank, s = np.linalg.lstsq(basis.T, use_flux, rcond=None)
    spl_c = coef.reshape((10, len_c))
    all_spl_c.append(spl_c)
all_spl_c = np.array(all_spl_c)

In [None]:
np.max(a.trans)

In [None]:
vary_idx = 1
center_loc = [0, 0, 0.]

center_loc = np.array(center_loc, dtype=float)
deltas = all_trans - center_loc
diff = np.delete(deltas, vary_idx, axis=1)
dist = np.sqrt(np.sum(diff**2, axis=1))
close_cut = dist < 1.

close_trans = all_trans[close_cut, vary_idx]

min_vary = np.min(close_trans)
max_vary = np.max(close_trans)
diff = max_vary - min_vary
min_vary -= 0.1*diff
max_vary += 0.1*diff


plt.figure()
for plot_loc in np.linspace(min_vary, max_vary, 10):
    plot_t = np.linspace(-15, 60, 1000)
    loc = center_loc.copy()
    loc[vary_idx] = plot_loc
    pred_trans = np.array([loc] * len(plot_t))
    
    model = super_spline(spl_c, plot_t, pred_trans)
    
    plt.plot(
        plot_t, model,
        c=plt.cm.coolwarm((plot_loc - min_vary) / (max_vary - min_vary)),
    )

plt.scatter(all_phases[close_cut], all_fluxes[close_cut, wave_idx], c=all_trans[close_cut, vary_idx],
            vmin=min_vary, vmax=max_vary, cmap=plt.cm.coolwarm)
plt.xlim(-20, 40)
# plt.ylim(-5, 25)

In [None]:
# Residuals
# phases = np.array([i.phase for i in a.spectra])
phases = np.zeros(len(a.targets))
a.scatter(a.scale_flux[:, wave_idx], cmap=plt.cm.coolwarm)
residuals = a.scale_flux[:, wave_idx] - super_spline(spl_c, phases, a.trans)
scale = np.max(np.abs(residuals))
a.scatter(residuals, cmap=plt.cm.coolwarm, vmin=-scale, vmax=scale)

plt.figure()
plt.scatter(a.trans[:, 2], residuals)

In [None]:
plt.figure()
plt.scatter(residuals[a.train_cut & a.mag_cut], a.corr_mags[a.mag_cut[a.train_cut]])

In [None]:
plt.figure()
plt.scatter(all_trans[:, 1], all_trans[:, 2])

In [None]:
# Reproduced spectra
for vary_idx in range(3):
    # center_loc = [-1.4, -0.7, 0.]
    # center_loc = [-0.5, 0.5, 0]
    center_loc = [0, 0.5, 0]
    phase = 0

    center_loc = np.array(center_loc, dtype=float)
    deltas = all_trans - center_loc
    diff = np.delete(deltas, vary_idx, axis=1)
    dist = np.sqrt(np.sum(diff**2, axis=1))
    close_cut = dist < 1.

    close_trans = all_trans[close_cut, vary_idx]

    # min_vary = np.clip(np.min(close_trans), -5, -2)
    # max_vary = np.clip(np.max(close_trans), 2, 5)
    min_vary = np.min(close_trans)
    max_vary = np.max(close_trans)
    diff = max_vary - min_vary
    min_vary += 0.1*diff
    max_vary -= 0.1*diff
    vary_loc = np.linspace(min_vary, max_vary, 10)

    print(min_vary, max_vary)

    plt.figure(figsize=(8, 6))
    models = []
    for wave_idx in range(len(a.wave)):
        plot_t = np.ones(len(vary_loc)) * phase
        plot_loc = np.tile(center_loc, (10, 1))
        plot_loc[:, vary_idx] = vary_loc

        model = super_spline(all_spl_c[wave_idx], plot_t, plot_loc)

        models.append(model)

    models = np.array(models)
    for model, model_loc in zip(models.T, vary_loc):
        plt.plot(a.wave, model, c=plt.cm.coolwarm((model_loc - min_vary) / (max_vary - min_vary)))

    plt.xlabel('Wavelength ($\AA$)')
    plt.ylabel('Flux')
    plt.title('Component %d' % vary_idx)


        # plt.plot(
            # plot_t, model,
            # c=plt.cm.coolwarm((plot_loc - min_vary) / (max_vary - min_vary)),
        # )

    # plt.scatter(all_phases[close_cut], all_fluxes[close_cut, wave_idx], c=all_trans[close_cut, vary_idx],
                # vmin=min_vary, vmax=max_vary, cmap=plt.cm.coolwarm)
    # plt.xlim(-20, 40)
    # plt.ylim(-5, 25)

In [None]:
from idrtools.tools import snf_filters
from idrtools.spectrum import _get_snf_magnitude

lc_phases = np.linspace(min_phase, max_phase, 100)

def calc_lc(band, loc):
    models = []
    min_wave, max_wave = snf_filters[band]

    min_wave_idx = np.min(np.where(a.wave > min_wave))
    max_wave_idx = np.max(np.where(a.wave < max_wave))
    for wave_idx in range(min_wave_idx, max_wave_idx+1):
        plot_loc = np.tile(loc, (len(lc_phases), 1))
        model = super_spline(all_spl_c[wave_idx], lc_phases, plot_loc)
        models.append(model)

    models = np.array(models)
    bandmag = []
    for model in models.T:
        bandmag.append(_get_snf_magnitude(a.wave[min_wave_idx:max_wave_idx+1], model, band))
    bandmag = np.array(bandmag)
    # bandflux = np.sum(models, axis=0) / (max_wave - min_wave)
    # bandmag = 2.5*np.log10(bandflux)
    
    return bandmag

In [None]:
colors = {'u': 'C4', 'b': 'C0', 'v': 'C2', 'r': 'C3'}

def plot_lc(idx):
    loc = a.trans[idx]

    target = a.targets[idx]
    scale = 1 / a.model_scales[idx]
    
    p = []
    m = []
    e = []
    c = []
    for raw_spectrum in target.spectra:
        spectrum = raw_spectrum.bin_by_velocity(1000).apply_scale(scale)
        for band in ['u', 'b', 'v', 'r']:
            p.append(spectrum.phase)
            mag, err = spectrum.get_snf_magnitude(band, calculate_error=True)
            m.append(mag)
            e.append(err)
            c.append(colors[band])
            
    print(target)
    print(target.subset)
    print(a.trans[idx])
            
    plt.figure()
    plt.scatter(p, m, c=c)
    plt.errorbar(p, m, e, c=c)
        
    for band in ['u', 'b', 'v', 'r']:
        plt.plot(lc_phases, calc_lc(band, loc), c=colors[band])

    plt.xlim(min_phase, max_phase)
    plt.ylim(-15, -20)
    plt.xlabel('Phase')
    plt.ylabel('Mag')
        
interact(plot_lc, idx=(0, len(a.targets)-1))
    
    
# loc = [0, 0, 0]

# u = calc_lc('u', loc)
# b = calc_lc('b', loc)
# v = calc_lc('v', loc)
# r = calc_lc('r', loc)

# plt.figure()
# plt.plot(lc_phases, u, c='C4', label='u')
# plt.plot(lc_phases, b, c='C0', label='b')
# plt.plot(lc_phases, v, c='C2', label='v')
# plt.plot(lc_phases, r, c='C3', label='r')

# plt.legend()


In [None]:
a.spectra[13]

In [None]:
dists = np.sum((a.trans - a.trans[13])**2, axis=1)

In [None]:
np.argsort(dists)

In [None]:
vary_idx = 0
band = 'v'

# center_loc = [-1.5, -2.1, 0.]
center_loc = [0, 1, -1.]

center_loc = np.array(center_loc, dtype=float)
deltas = all_trans - center_loc
diff = np.delete(deltas, vary_idx, axis=1)
dist = np.sqrt(np.sum(diff**2, axis=1))
close_cut = dist < 0.5

close_trans = all_trans[close_cut, vary_idx]

if len(close_trans) > 0:
    min_vary = np.clip(np.min(close_trans), -5, -2)
    max_vary = np.clip(np.max(close_trans), 2, 5)
else:
    print("WARNING: NO DATA IN RANGE")
    min_vary = -2
    max_vary = 2
    
diff = max_vary - min_vary
min_vary -= 0.1*diff
max_vary += 0.1*diff
vary_loc = np.linspace(min_vary, max_vary, 10)

print(min_vary, max_vary)

plt.figure()

for plot_loc in vary_loc:
    use_loc = center_loc.copy()
    use_loc[vary_idx] = plot_loc
    mag = calc_lc(band, use_loc)
    
    plt.plot(
        lc_phases, mag,
        c=plt.cm.coolwarm((plot_loc - min_vary) / (max_vary - min_vary)),
        zorder=-1,
    )
    
# Overplot real data
plot_phases = []
plot_mags = []
plot_colors = []
for idx in np.where(close_cut)[0]:
    phase = all_phases[idx]
    spectrum = all_spectra[idx]
    val = all_trans[idx, vary_idx]
    
    mag = spectrum.get_snf_magnitude(band)
    
    color = plt.cm.coolwarm((val - min_vary) / (max_vary - min_vary))
    
    plot_phases.append(phase)
    plot_mags.append(mag)
    plot_colors.append(color)
    
plt.scatter(plot_phases, plot_mags, c=plot_colors, s=30, edgecolors='k')

plt.ylim(np.nanpercentile(plot_mags, 90) + 0.5, np.nanpercentile(plot_mags, 10) - 0.5)
plt.xlim(min_phase, max_phase)
# plt.ylim(-4, 0)

# plt.legend()

In [None]:
# Reproduced spectra
vary_idx = 1
center_loc = [0., 0., 0.]
phase = 0

center_loc = np.array(center_loc, dtype=float)

all_model = []

for wave_idx in range(len(a.wave)):
    loc = center_loc.copy()
    loc[vary_idx] = plot_loc
    model = super_spline(all_spl_c[wave_idx], phase, loc)
    all_model.append(model)
    
plt.figure()
plt.plot(a.wave, all_model)

vary_idx = 1
center_loc = [0., 0., 0.]

center_loc = np.array(center_loc, dtype=float)
deltas = all_trans - center_loc
diff = np.delete(deltas, vary_idx, axis=1)
dist = np.sqrt(np.sum(diff**2, axis=1))
close_cut = dist < 1.0

close_trans = all_trans[close_cut, vary_idx]

min_vary = np.min(close_trans)
max_vary = np.max(close_trans)
diff = max_vary - min_vary
min_vary -= 0.1*diff
max_vary += 0.1*diff


plt.figure()
for plot_loc in np.linspace(min_vary, max_vary, 10):
    plot_t = np.linspace(-15, 60, 1000)
    loc = center_loc.copy()
    loc[vary_idx] = plot_loc
    pred_trans = np.array([loc] * len(plot_t))
    
    model = super_spline(spl_c, plot_t, pred_trans)
    
    plt.plot(
        plot_t, model,
        c=plt.cm.coolwarm((plot_loc - min_vary) / (max_vary - min_vary)),
    )

plt.scatter(all_phases[close_cut], all_fluxes[close_cut, wave_idx], c=all_trans[close_cut, vary_idx],
            vmin=min_vary, vmax=max_vary, cmap=plt.cm.coolwarm)
plt.xlim(-20, 40)
# plt.ylim(-5, 25)

In [None]:
a.spectra[(np.abs(a.trans[:, 0] - 0.73) < 0.1) & (np.abs(a.trans[:, 1] - 1.8) < 0.1)]

## GP model

In [None]:
import george
from george import kernels

def build_gp(x):
    kernel = x[0]**2 * kernels.Matern32Kernel([x[1]**2, x[2]**2, x[2]**2], ndim=3)
    gp = george.GP(kernel)
    yerr = x[3] * np.ones(all_fluxes.shape[0])
    gp.compute(interp_x, yerr)
    
    return gp

def to_min(x):
    gp = build_gp(x)
    
    nll = -gp.log_likelihood(all_fluxes[:, 20])
    # gnll = - gp.grad_log_likelihood(all_fluxes[:, 20])

    print(nll, x)
    return nll#, gnll

res = minimize(to_min, [10, 5, 5, 1])#, jac=True)

gp = build_gp([20, 5, 5, 1])

In [None]:
gp = build_gp([-7.3473681, 28.69758488,-2.5888035,  0.65813375])

In [None]:
res

In [None]:
pred_phase = 0
wave_idx = 110

min_x = np.min(a.trans[:, 0]) - 0.5
max_x = np.max(a.trans[:, 0]) + 0.5
min_y = np.min(a.trans[:, 1]) - 0.5
max_y = np.max(a.trans[:, 1]) + 0.5

num_points = 50

plot_x, plot_y = np.meshgrid(np.linspace(min_x, max_x, num_points),
                             np.linspace(min_y, max_y, num_points))

flat_plot_x = plot_x.flatten()
flat_plot_y = plot_y.flatten()
flat_phases = pred_phase * np.ones(len(flat_plot_x))

plot_coords = np.array([flat_phases, flat_plot_x, flat_plot_y]).T

pred = gp.predict(all_fluxes[:, wave_idx], plot_coords, return_cov=False)
pred = pred.reshape(plot_x.shape)

cut = (all_phases - pred_phase) < 2.5
scatter_fluxes = all_fluxes[cut, wave_idx]

vmin = np.percentile(scatter_fluxes, 5)
vmax = np.percentile(scatter_fluxes, 95)

plt.figure()
plt.imshow(pred[::-1], extent=(min_x, max_x, min_y, max_y), vmin=vmin, vmax=vmax)

plt.scatter(all_trans[cut, 0], all_trans[cut, 1], c=scatter_fluxes,
            edgecolors='k', vmin=vmin, vmax=vmax)


plt.colorbar()

In [None]:
interp_x.shape

In [None]:
all_fluxes.shape

In [None]:
pred_x.shape

In [None]:
plot_coords.shape

In [None]:
for transval in np.linspace(-3, 3, 10):
    pred_phases = np.arange(0, 1)
    loc = [0, transval]
    pred_trans = np.array([loc] * len(pred_phases))
    pred_x = np.hstack([pred_phases[:, None], pred_trans])

    all_vals = []
    for i in range(all_fluxes.shape[1]):
        vals, cov = gp.predict(all_fluxes[:, i], pred_x)
        all_vals.append(vals)

    all_vals = np.array(all_vals)
    plt.plot(all_vals, label=loc)
    plt.legend()

In [None]:
plt.figure()

In [None]:
plt.figure()
all_vals = np.array(all_vals)
plt.plot(all_vals)
# plt.plot(pred_phases, all_vals)

In [None]:
x = np.arange(100)
y1 = (x < 50).astype(float)
y2 = (x >= 50).astype(float)

in_coefs = np.random.normal(size=(1000, 2))
in_coefs[:, 1] *= 0.5
in_funcs = in_coefs.dot([y1, y2])

in_funcs += np.random.normal(0, 0.1, size=in_funcs.shape)

iso = Isomap(n_components=2, n_neighbors=10)
trans = iso.fit_transform(in_funcs)

In [None]:
plt.figure()
plt.scatter(in_coefs[:, 1], trans[:, 1])

In [None]:
iso.kernel_pca_.lambdas_

In [None]:
print(np.std(in_coefs, axis=0))
print(np.std(trans, axis=0))

# Twins bias

In [149]:
plt.figure()
np.random.seed(6)
x = np.random.normal(size=50)
y = np.random.normal(size=50)

plt.scatter(x, y, label='Reference sample')

for i in range(4):
    if i == 0:
        sel = np.argmin(x)
    elif i == 1:
        sel = np.argmax(x)
    elif i == 2:
        sel = np.argmin(y)
    elif i == 3:
        sel = np.argmax(y)

    if i == 0:
        label = 'New objects'
    else:
        label = ''

    plt.scatter(x[sel], y[sel], label=label, c='C1')

    twins = np.where((x - x[sel])**2 + (y - y[sel])**2 < 2)[0]

    sum_x = 0
    sum_y = 0

    for twin_idx in twins:
        if sel == twin_idx:
            continue

        if sum_x == 0 and i == 0:
            label = 'Twins'
        else:
            label = ''
        plt.plot([x[sel], x[twin_idx]], [y[sel], y[twin_idx]], label=label, c='C2', zorder=0)

        sum_x += x[twin_idx]
        sum_y += y[twin_idx]

    twin_x = sum_x / (len(twins) - 1)
    twin_y = sum_y / (len(twins) - 1)
    
    if i == 0:
        label = 'Twin-recovered locations'
    else:
        label = ''

    plt.scatter(twin_x, twin_y, c='C3', label=label, marker='*')

plt.gca().set_aspect(1)
plt.xlim(-3, 5)
plt.ylim(-3, 3)

plt.xlabel('Simulated component 1')
plt.ylabel('Simulated component 2')

plt.tight_layout()

plt.legend(loc=4)

plt.savefig('./figures/twins_simulation.pdf')



FigureCanvasNbAgg()

In [151]:
inds = a.get_indicators()

In [155]:
inds.keys()

['EWCaIIHK',
 'EWSiII4000',
 'EWSiII5972',
 'EWSiII6355',
 'lamCaIIHK',
 'lamSiII6355',
 'vCaIIHK',
 'vSiII6355']

In [162]:
plt.figure()
plt.hist(inds['EWSiII5972'])



FigureCanvasNbAgg()

(array([10., 50., 51., 48., 20.,  7.,  7.,  4.,  2.,  3.]),
 array([-3.02241679,  3.17477715,  9.37197109, 15.56916502, 21.76635896,
        27.9635529 , 34.16074684, 40.35794078, 46.55513472, 52.75232866,
        58.9495226 ]),
 <a list of 10 Patch objects>)

In [163]:
plt.figure()
plt.scatter(inds['EWSiII4000'], inds['EWSiII6355'], c=inds['EWSiII5972'], cmap=plt.cm.coolwarm, vmin=0, vmax=30)
plt.xlabel('SiII 4000 Equivalent Width')
plt.ylabel('SiII 6355 Equivalent Width')



FigureCanvasNbAgg()

Text(0, 0.5, 'SiII 6355 Equivalent Width')

# Comparison to linear models

In [63]:
from sklearn.decomposition import PCA

num_show = 5

pca = PCA()
pca_trans = fill_mask(pca.fit_transform(a.iso_diffs[a.interp_mask]), a.interp_mask)

ref_scale = pca.explained_variance_ratio_[0]

plt.figure()
# plt.scatter(np.arange(num_show), variances[:num_show] / ref_var, label='Explained variance of each component')
plt.scatter(np.arange(num_show), pca.explained_variance_ratio_[:num_show] / ref_scale, label='Explained variance of each component')
plt.axhline(0.1 / ref_scale, label='Approximate uncertainty cut', ls='--', c='C3')
plt.axhline(np.mean(a.interp_power_fraction[a.interp_mask]) / ref_scale, label='Approximate mean uncertainty', ls='--', c='C2')
plt.ylim(0, None)
plt.xlabel('Component number')
plt.ylabel('Fraction of total variance')
plt.xticks(np.arange(num_show), np.arange(num_show) + 1)
plt.legend()
plt.tight_layout()

plt.savefig('./figures/isomap_component_variance.pdf')



FigureCanvasNbAgg()

In [62]:
a.do_embedding()
# a.do_component_blondin_plot()
a.scatter(a.salt_x1)



FigureCanvasNbAgg()

In [64]:
trans_bak = a.trans
a.trans = pca_trans.copy()
a.trans[:, 1] = - a.trans[:, 1]
a.scatter(a.salt_x1)
# a.scatter(a.trans[:, 2])
# a.do_component_blondin_plot()

a.trans = trans_bak



FigureCanvasNbAgg()

In [65]:
plt.figure()
plt.scatter(pca_trans[:, 1], pca_trans[:, 2], s=20)
plt.xlabel('PCA Component 2')
plt.ylabel('PCA Component 3')
plt.tight_layout()
plt.savefig('./figures/pca_component_2_3_bad.pdf')



FigureCanvasNbAgg()

In [66]:
plt.figure()
plt.scatter(a.trans[:, 1], a.trans[:, 2], s=20)
plt.xlabel('Isomap Component 2')
plt.ylabel('Isomap Component 3')
plt.tight_layout()
plt.savefig('./figures/isomap_component_2_3.pdf')



FigureCanvasNbAgg()

# Attrition and LaTeX variables for things

In [268]:
math.bootstrap_statistic(np.std, a.corr_mags[a.good_mag_mask], ddof=1)

(0.10176866084040756, 0.010124521582186663)

In [271]:
print("\\textbf{General selection requirements} & \\\\")
print("Initial sample                                        & %d \\\\" % len(a.dataset.targets))
print("More than 5 spectra                                   & %d \\\\" % a.attrition_enough_spectra)
print("SALT2 date of maximum light uncertainty < 1 day       & %d \\\\" % a.attrition_salt_daymax)
print("At least one spectrum within 5 days of maximum light  & %d \\\\" % a.attrition_range)
print("At least one spectrum with S/N 3300-3800~\AA\ > 100   & %d \\\\" % a.attrition_usable)
print("Host galaxy model converged                           & %d \\\\" % a.attrition_explicit)
print("\hline")
print("\\textbf{Estimation of the spectra at maximum light} & \\\\")
print("Spectrum uncertainty < 10\\%% of intrinsic power        & %d \\\\" % np.sum(a.interp_mask))
print("\hline")
print("\\textbf{Valid supernova brightness requirements} & \\\\")
print("Host galaxy redshift available                        & %d \\\\" % np.sum(a.interp_mask & (a.redshift_errs < 0.004)))
print("Host galaxy redshift above 0.02                       & %d \\\\" % np.sum(a.interp_mask & (a.redshift_errs < 0.004) & (a.redshifts > 0.02)))
print("Measured $A_V$ < 0.5 mag                              & %d \\\\" % np.sum(a.interp_mask & a.redshift_color_mask))
print("Blinded training subsample                            & %d \\\\" % np.sum(a.good_mag_mask))
print("\hline")
print("\\textbf{Comparisons to SALT2 fits} & \\\\")
print("SNfactory SALT2 selection requirements                & %d \\\\"% np.sum(a.salt_mask))
print("Passes host galaxy redshift and color requirements    & %d \\\\"% np.sum(a.good_salt_mask))
print("Has a valid interpolation to maximum light            & %d \\\\"% np.sum(a.good_salt_mask & a.interp_mask))
print("Blinded training subsample                            & %d \\\\"% np.sum(a.good_salt_mask & a.good_mag_mask))

def latex_command(name, formatstr, val):
    print("\\newcommand{\\%s}{%s}" % (name, formatstr % val))
    
def latex_std(name, val):
    std, std_err = math.bootstrap_statistic(np.std, val, ddof=1)
    latex_command(name, '%.3f $\\pm$ %.3f', (std, std_err))
    
def latex_nmad(name, val):
    nmad, nmad_err = math.bootstrap_statistic(math.nmad, val)
    latex_command(name, '%.3f $\\pm$ %.3f', (nmad, nmad_err))

print("")

latex_command('nummanifoldsne', '%d', len(a.targets))
latex_command('nummanifoldspectra', '%d', len(a.spectra))
latex_command('numinterpsne', '%d', np.sum(a.interp_mask))
print("")
latex_command('numsnftrain', '%d', np.sum([i.subset == 'training' for i in a.targets[a.interp_mask]]))
latex_command('numsnfvalid', '%d', np.sum([i.subset == 'validation' for i in a.targets[a.interp_mask]]))
latex_command('numsnfother', '%d', np.sum([i.subset not in ['training', 'validation'] for i in a.targets[a.interp_mask]]))
print("")
latex_command('numsnredshift', '%d', np.sum(a.interp_mask & (a.redshift_errs >= 0.004)))
latex_command('numlowredshift', '%d', np.sum(a.interp_mask & (a.redshifts <= 0.02)))
latex_command('numhighav', '%d', np.sum(a.interp_mask & (a.colors - np.nanmedian(a.colors) >= 0.5)))
print("")
latex_command('nummagsne', '%d', np.sum(a.interp_mask & a.redshift_color_mask))
latex_command('nummagsnetrain', '%d', np.sum(a.good_mag_mask))
latex_command('nummagsnevalidation', '%d', np.sum(a.interp_mask & a.redshift_color_mask & ~a.good_mag_mask))
print("")
latex_command('saltparammb', '%.2f', a.salt_MB)
latex_command('saltparamalpha', '%.3f', a.salt_alpha)
latex_command('saltparambeta', '%.2f', a.salt_beta)
latex_command('saltparamsigmaint', '%.3f', a.salt_intrinsic_dispersion)
# latex_command('saltparamrms', '%.3f', np.std(a.salt_hr[a.good_salt_mask]))
latex_std('saltparamrms', a.salt_hr[a.good_salt_mask])
latex_nmad('saltparamnmad', a.salt_hr[a.good_salt_mask])
latex_command('saltparamwrms', '%.3f', a.salt_wrms)
latex_command('saltparammindisp', '%.2f', np.min(a.salt_hr_uncertainties[a.good_salt_mask]))
latex_command('saltparammaxdisp', '%.2f', np.max(a.salt_hr_uncertainties[a.good_salt_mask]))
print("")
latex_std('rawrbtlmagstd', a.mags[a.good_mag_mask])
latex_nmad('rawrbtlmagnmad', a.mags[a.good_mag_mask])
print("")
latex_command('twinrbtlmagstd', '%.3f', a.twins_rms)
latex_command('twinrbtlmagnmad', '%.3f', a.twins_nmad)
print("")
latex_std('saltcomprawrbtlmagstd', a.mags[a.good_mag_mask & a.good_salt_mask])
latex_std('saltcompsaltmagstd', a.salt_hr[a.good_mag_mask & a.good_salt_mask])

a.fit_gp(verbose=False, kind='salt_raw')
gp_uncertainties = np.sqrt(np.diag(a.gp_hyperparameter_covariance))
print("")
latex_command('saltgpcolor', '%.2f $\\pm$ %.2f', (a.gp_hyperparameters[0], gp_uncertainties[0]))
latex_command('saltgpintdisp', '%.3f $\\pm$ %.3f', (a.gp_hyperparameters[1], gp_uncertainties[1]))
latex_command('saltgpkernelamp', '%.3f $\\pm$ %.3f', (np.abs(a.gp_hyperparameters[2]), gp_uncertainties[2]))
latex_command('saltgpkernellengthscale', '%.2f $\\pm$ %.2f', (a.gp_hyperparameters[3], gp_uncertainties[3]))
latex_std('saltgprms', a.corr_mags[a.good_salt_mask & a.interp_mask])
latex_std('saltgpcompsaltrms', a.salt_hr[a.good_salt_mask & a.interp_mask])

a.fit_gp(verbose=False)
gp_uncertainties = np.sqrt(np.diag(a.gp_hyperparameter_covariance))
print("")
latex_command('rbtlgpcolor', '%.2f $\\pm$ %.2f', (a.fiducial_rv * (1 + a.gp_hyperparameters[0]), a.fiducial_rv * gp_uncertainties[0]))
latex_command('rbtlgpintdisp', '%.3f $\\pm$ %.3f', (a.gp_hyperparameters[1], gp_uncertainties[1]))
latex_command('rbtlgpkernelamp', '%.3f $\\pm$ %.3f', (np.abs(a.gp_hyperparameters[2]), gp_uncertainties[2]))
latex_command('rbtlgpkernellengthscale', '%.2f $\\pm$ %.2f', (a.gp_hyperparameters[3], gp_uncertainties[3]))

print("")
latex_std('rbtlgprms', a.corr_mags[a.good_mag_mask])
latex_command('rbtlgpnmad', '%.3f', math.nmad(a.corr_mags[a.good_mag_mask]))

print("")
x1 = a.salt_hr[(a.trans[:, 0] < 2) & a.good_salt_mask & a.interp_mask]
x2 = a.salt_hr[(a.trans[:, 0] > 2) & a.good_salt_mask & a.interp_mask]
m1 = np.mean(x1)
m2 = np.mean(x2)
err1 = np.std(x1) / np.sqrt(len(x1))
err2 = np.std(x2) / np.sqrt(len(x2))
latex_command('saltisomapdiff', '%.3f $\\pm$ %.3f', (np.abs(m1-m2), np.sqrt(err1**2 + err2**2)))

print("")
latex_command('pecvelcontribution', '%.3f', np.sqrt(np.mean(a.get_peculiar_velocity_uncertainty()[a.good_mag_mask & a.good_salt_mask]**2)))

\textbf{General selection requirements} & \\
Initial sample                                        & 438 \\
More than 5 spectra                                   & 280 \\
SALT2 date of maximum light uncertainty < 1 day       & 277 \\
At least one spectrum within 5 days of maximum light  & 252 \\
At least one spectrum with S/N 3300-3800~\AA\ > 100   & 204 \\
Host galaxy model converged                           & 202 \\
\hline
\textbf{Estimation of the spectra at maximum light} & \\
Spectrum uncertainty < 10\% of intrinsic power        & 178 \\
\hline
\textbf{Valid supernova brightness requirements} & \\
Host galaxy redshift available                        & 173 \\
Host galaxy redshift above 0.02                       & 149 \\
Measured $A_V$ < 0.5 mag                              & 138 \\
Blinded training subsample                            & 76 \\
\hline
\textbf{Comparisons to SALT2 fits} & \\
SNfactory SALT2 selection requirements                & 175 \\
Passes host galaxy redshift 

## Standardization comparison

In [272]:
a.fit_gp()

Fitting GP hyperparameters...
Fit result:
      fun: -59.26127018578216
 hess_inv: array([[ 4.98245421e-03,  3.23904835e-06, -6.30946395e-04,
        -3.05735506e-02],
       [ 3.23904835e-06,  1.61370479e-04,  2.64752292e-05,
         1.04339917e-02],
       [-6.30946395e-04,  2.64752292e-05,  1.17696783e-03,
         4.34280822e-02],
       [-3.05735506e-02,  1.04339917e-02,  4.34280822e-02,
         3.55195095e+00]])
      jac: array([-2.86102295e-06, -9.05990601e-06,  2.38418579e-06, -5.72204590e-06])
  message: 'Optimization terminated successfully.'
     nfev: 132
      nit: 15
     njev: 22
   status: 0
  success: True
        x: array([-0.046595  ,  0.07682295,  0.10195705,  2.69551498])
Finite difference covariance step sizes: [1.60e-04 2.00e-05 4.00e-05 2.56e-03]
Fit uncertainty: [0.07023724 0.01255326 0.03414024 1.8833214 ]
Fit NMAD:        0.08971590237637203
Fit std:         0.10109691316372353


In [286]:
pec_disp = a.get_peculiar_velocity_uncertainty()[a.good_mag_mask]
# int_disp()

In [285]:
a.get_peculiar_velocity_uncertainty?

[0;31mSignature:[0m [0ma[0m[0;34m.[0m[0mget_peculiar_velocity_uncertainty[0m[0;34m([0m[0mpeculiar_velocity[0m[0;34m=[0m[0;36m300[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Calculate dispersion added to the magnitude due to host galaxy
peculiar velocity
[0;31mFile:[0m      /home/scpdata06/kboone/snfactory/manifold_twins/analysis/manifold_twins.py
[0;31mType:[0m      method


In [287]:
pec_disp.shape

(76,)

In [290]:
def int_disp(vals, pec_vel_disps, axis=None):
    std = np.std(vals, ddof=1, axis=axis)
    corr = np.mean(pec_vel_disps**2, axis=axis)
    corr_std = np.sqrt(np.clip(std**2 - corr, 0, None))

    return corr_std

def get_stat_str(all_mags, cut, function, *args):
    line_str = ""
    for mags in all_mags:
        if line_str:
            line_str += " &"
            
        use_mags = mags[cut]
        if np.any(np.isnan(use_mags)):
            line_str += "%20s" % "--"
        else:
            res, res_err = math.bootstrap_statistic(function, use_mags, *args)
            line_str += "%7.3f $\pm$ %6.3f" % (res, res_err)
    
    line_str += " \\\\"
    
    return line_str

def make_table(all_mags, cut, label):
    stats = {
        'NMAD': (math.nmad,),
        'Standard deviation': (unbiased_std,),
        'Pec. vel. removed': (int_disp, a.get_peculiar_velocity_uncertainty()[cut]),
    }
    for i, (stat_name, stat_args) in enumerate(stats.items()):
        if len(label) > i:
            prefix = label[i]
        else:
            prefix = ""
            
        if i == 0:
            num_sne = "%d" % np.sum(cut)
        else:
            num_sne = ""
            
        stat_str = get_stat_str(all_mags, cut, *stat_args)
        print("%20s & %5s & %20s & %s" % (prefix, num_sne, stat_name, stat_str))
        
    print("\hline")
    
# RBTL only
rbtl_mags = a.mags.copy()
rbtl_mags[~a.good_mag_mask] = np.nan
        
# RBTL + Isomap
a.fit_gp(verbose=False)
rbtl_isomap_mags = a.corr_mags.copy()
rbtl_isomap_mags[~a.good_mag_mask] = np.nan

# SALT2
salt_mags = a.salt_hr.copy()
salt_mags[~a.good_salt_mask] = np.nan

# SALT2 + Isomap
a.fit_gp(kind='salt_raw', verbose=False)
salt_isomap_mags = a.corr_mags.copy()
salt_isomap_mags[~a.good_salt_mask] = np.nan
salt_isomap_mags[~a.interp_mask] = np.nan

all_mags = [rbtl_mags, rbtl_isomap_mags, salt_mags, salt_isomap_mags]

print("\hline")

# RBTL only
make_table(all_mags, a.interp_mask & a.redshift_color_mask & a.train_mask, ['Maximum spectrum', '+ training cuts'])

# SALT2 only
make_table(all_mags, a.good_salt_mask, ['SALT2 fit cuts'])

# SALT2 + Isomap
make_table(all_mags, a.good_salt_mask & a.interp_mask, ['Maximum spectrum', '+ SALT2 fit cuts'])

# All
make_table(all_mags, a.interp_mask & a.redshift_color_mask & a.salt_mask & a.train_mask, ['All cuts'])

\hline
    Maximum spectrum &    76 &                 NMAD &   0.112 $\pm$  0.014 &  0.090 $\pm$  0.013 &                  -- &                  -- \\
     + training cuts &       &   Standard deviation &   0.124 $\pm$  0.013 &  0.102 $\pm$  0.010 &                  -- &                  -- \\
                     &       &    Pec. vel. removed &   0.110 $\pm$  0.015 &  0.084 $\pm$  0.013 &                  -- &                  -- \\
\hline
      SALT2 fit cuts &   144 &                 NMAD &                   -- &                  -- &  0.110 $\pm$  0.013 &                  -- \\
                     &       &   Standard deviation &                   -- &                  -- &  0.152 $\pm$  0.011 &                  -- \\
                     &       &    Pec. vel. removed &                   -- &                  -- &  0.142 $\pm$  0.012 &                  -- \\
\hline
    Maximum spectrum &   126 &                 NMAD &                   -- &                  -- &  0.101 $\pm$  0.