# Setup

In [1]:
from manifold_twins import ManifoldTwinsAnalysis
from matplotlib import pyplot as plt
import numpy as np
from idrtools import math
from utils import frac_to_mag

In [2]:
%matplotlib ipympl

In [3]:
a = ManifoldTwinsAnalysis()
a.run_analysis()

Loading dataset...
    IDR:          BLACKSTON
    Phase range: [-5.0, 5.0] days
    Bin velocity: 1000.0


100%|██████████| 415/415 [00:17<00:00, 23.55it/s]


Estimating the spectra at maximum light...
    Loaded cached stan model
    Using saved stan result
Reading between the lines...
    Loaded cached stan model
    Using saved stan result
Building masks...
    Masking 30/203 targets whose uncertainty power is 
    more than 0.100 of the intrinsic power.
Generating the manifold learning embedding...
Calculating spectral indicators...
Fitting GP hyperparameters...
    Fit result:           Optimization terminated successfully.
    Color scale:          -0.007 ± 0.070
    Intrinsic dispersion: 0.065 ± 0.013 mag
    GP kernel amplitude:  0.111 ± 0.042 mag
    GP length scale:      3.348 ± 2.272
    Fit NMAD:             0.072 mag
    Fit std:              0.098 mag
Calculating SALT2 Hubble residuals...
SALT2 Hubble fit: 
    ref_mag: -10.449
    alpha:   0.142
    beta:    2.665
    σ_int:   0.134
    RMS:     0.156
    NMAD:    0.110
    WRMS:    0.156
Loading host galaxy data...
Done!


# Cuts

In [4]:
plt.figure()

plt.scatter(a.redshifts[a.uncertainty_mask], a.rbtl_mags[a.uncertainty_mask], s=15, c='C3', label='Supernovae rejected by cuts')
plt.scatter(a.redshifts[a.uncertainty_mask & a.redshift_color_mask], a.rbtl_mags[a.uncertainty_mask & a.redshift_color_mask], s=15, c='C0', label='Supernovae passing cuts')

z_range = np.linspace(0.001, 0.09, 100)
pec_vel_disp = a.calculate_peculiar_velocity_uncertainties(z_range)
plt.fill_between(z_range, -pec_vel_disp, pec_vel_disp, alpha=0.2, label='Peculiar velocity dispersion')
plt.axvline(0.02, lw=1, ls='--', c='k', label='Redshift cutoff')

plt.xlim(0.001, 0.09)
plt.ylim(-1, 1.5)
plt.xlabel('Redshift')
plt.ylabel('RBTL measured magnitude')
plt.legend()
plt.savefig('./figures/rbtl_magnitude.pdf')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [12]:
plt.figure()

base_mask = a.uncertainty_mask & a.salt_mask
salt_hr = a.salt_residuals['corr_mags']

plt.scatter(a.redshifts[base_mask], salt_hr[base_mask], s=15, c='C3', label='Supernovae rejected by cuts')
plt.scatter(a.redshifts[base_mask & a.redshift_color_mask], salt_hr[base_mask & a.redshift_color_mask], s=15, c='C0', label='Supernovae passing cuts')

z_range = np.linspace(0.001, 0.09, 100)
pec_vel_disp = 0.00217 / z_range
plt.fill_between(z_range, -pec_vel_disp, pec_vel_disp, alpha=0.2, label='Peculiar velocity dispersion')
plt.axvline(0.02, lw=1, ls='--', c='k', label='Redshift cutoff')

plt.xlim(0.001, 0.09)
# plt.ylim(-1, 1.5)
plt.ylim(-1, 2.)
plt.xlabel('Redshift')
plt.ylabel('SALT2 measured magnitude')
plt.legend()
plt.tight_layout()
plt.savefig('./figures/salt_magnitude_redshift.pdf')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [11]:
plt.figure()

plt.scatter(a.redshifts[a.uncertainty_mask & a.redshift_color_mask], a.rbtl_mags[a.uncertainty_mask & a.redshift_color_mask], s=15, c='C0', label='Supernovae passing cuts')
plt.xlabel('Redshift')
plt.ylabel('RBTL measured magnitude')

z_range = np.linspace(0.001, 0.09, 100)
pec_vel_disp = a.calculate_peculiar_velocity_uncertainties(z_range)
plt.fill_between(z_range, -pec_vel_disp, pec_vel_disp, alpha=0.2, label='Peculiar velocity dispersion')
plt.fill_between(z_range, -2*pec_vel_disp, 2*pec_vel_disp, alpha=0.1, facecolor='C0')
plt.fill_between(z_range, -3*pec_vel_disp, 3*pec_vel_disp, alpha=0.1, facecolor='C0')

plt.legend()
plt.tight_layout()
plt.xlim(0.01, 0.09)
plt.ylim(-0.5, 0.5)
plt.savefig('./figures/rbtl_magnitude_cut.pdf')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [14]:
print("Raw RBTL mag std:  %.3f mag" % np.std(a.rbtl_mags[a.uncertainty_mask & a.redshift_color_mask & a.train_mask]))
print("Raw RBTL mag NMAD: %.3f mag" % math.nmad(a.rbtl_mags[a.uncertainty_mask & a.redshift_color_mask & a.train_mask]))

Raw RBTL mag std:  0.125 mag
Raw RBTL mag NMAD: 0.100 mag


In [21]:
plt.figure()

plt.scatter(a.redshifts[a.uncertainty_mask & a.redshift_color_mask], a.corr_mags[a.uncertainty_mask & a.redshift_color_mask], s=15, c='C0', label='Supernovae passing cuts')
plt.xlabel('Redshift')
plt.ylabel('RBTL corrected magnitude')

z_range = np.linspace(0.001, 0.09, 100)
pec_vel_disp = a.calculate_peculiar_velocity_uncertainties(z_range)
plt.fill_between(z_range, -pec_vel_disp, pec_vel_disp, alpha=0.2, label='Peculiar velocity dispersion')
plt.fill_between(z_range, -2*pec_vel_disp, 2*pec_vel_disp, alpha=0.1, facecolor='C0')
plt.fill_between(z_range, -3*pec_vel_disp, 3*pec_vel_disp, alpha=0.1, facecolor='C0')

plt.legend()
plt.tight_layout()
plt.xlim(0.01, 0.09)
plt.ylim(-0.5, 0.5)
plt.savefig('./figures/rbtl_corr_magnitude_cut.pdf')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

# Standardization 

## Raw magnitudes

In [22]:
a.scatter(a.mags, a.good_mag_mask, vmin=-0.2, vmax=0.2, label='Residual magnitude', invert_colorbar=True)
plt.savefig('./figures/components_12_residual_magnitude.pdf')

AttributeError: 'ManifoldTwinsAnalysis' object has no attribute 'mags'

In [None]:
a.scatter(a.mags, a.good_mag_mask, vmin=-0.2, vmax=0.2, axis_1=0, axis_2=2, label='Residual magnitude', invert_colorbar=True)
plt.savefig('./figures/components_13_residual_magnitude.pdf')

In [None]:
a.scatter(a.mags, a.good_mag_mask, vmin=-0.2, vmax=0.2, axis_1=1, axis_2=2, label='Residual magnitude', invert_colorbar=True)
plt.savefig('./figures/components_23_residual_magnitude.pdf')

## GP standardization

In [None]:
# Reset to defaults in case things got messed up
a.do_embedding()
a.fit_gp()

In [None]:
a.plot_gp(axis_1=0, axis_2=1)
plt.savefig('./figures/gp_mag_components_12.pdf')

a.plot_gp(axis_1=0, axis_2=2)
plt.savefig('./figures/gp_mag_components_13.pdf')

a.plot_gp(axis_1=1, axis_2=2)
plt.savefig('./figures/gp_mag_components_23.pdf')

## Check vs phases of original spectra

In [None]:
plt.figure()

# Get the mean phase for every target.
mean_phases = []
for i in range(len(a.targets)):
    mean_phases.append(np.mean(a.salt_phases[a.target_map == i]))
mean_phases = np.array(mean_phases)

# plt.scatter(a.salt_phases[a.center_mask][a.good_mag_mask], a.corr_mags[a.good_mag_mask], label='Individual observations')
# math.plot_binned_mean(a.salt_phases[a.center_mask][a.good_mag_mask], a.corr_mags[a.good_mag_mask], c='C2', label='Binned mean')
plt.scatter(mean_phases[a.good_mag_mask], a.corr_mags[a.good_mag_mask], label='Individual observations')
math.plot_binned_mean(mean_phases[a.good_mag_mask], a.corr_mags[a.good_mag_mask], c='C2', label='Binned mean')
# plt.xlabel('Phase of closest spectrum to maximum (days)')
plt.xlabel('Mean phase of observed spectra (days)')
plt.ylabel('Residual magnitude')
plt.gca().invert_yaxis()
plt.legend()

# SALT2 comparison

In [None]:
# Load SALT2 Hubble residuals
a.calculate_salt_hubble_residuals()

## SALT2 colors

In [None]:
plt.figure()
plt.scatter(a.salt_color, a.colors, s=5)
plt.xlabel('SALT2 Color ($c$)')
plt.ylabel('RBTL Color ($A_V$)')
plt.tight_layout()

plt.savefig('./figures/salt2_color_comparison.pdf')

## SALT2 X1

In [None]:
a.scatter(a.salt_x1, a.interp_mask, label='SALT $x_1$')
plt.savefig('./figures/salt2_x1_components_full.pdf')

In [None]:
a.scatter(a.salt_x1, a.interp_mask & a.good_salt_mask, axis_1=0, axis_2=1, label='SALT $x_1$', vmin=-2., vmax=1.5)
plt.savefig('./figures/salt2_x1_components.pdf')

In [None]:
a.plot_gp(kind='salt_raw', vmin=-0.5, vmax=0.5)

In [None]:
# Find the best predictor of x1
def to_min(x):
    diff = a.salt_x1 - a.embedding.dot(x)
    return np.nanstd(diff[a.salt_mask])

res = minimize(to_min, [0, 0, 0])

norm_x = res.x / np.sqrt(np.sum(res.x**2))
print(norm_x)

plt.figure()
# plt.scatter(a.embedding.dot(res.x), a.salt_x1, c=a.salt_mask)
plt.scatter(a.embedding.dot(res.x)[~a.salt_mask], a.salt_x1[~a.salt_mask], c='C3', s=30, label='"Bad" SALT2 fits', alpha=0.8)
plt.scatter(a.embedding.dot(res.x)[a.salt_mask], a.salt_x1[a.salt_mask], c='C0', s=30, label='"Good" SALT2 fits', alpha=0.8)
plt.plot([-3, 3], [-3, 3], ls='--', c='k', label='One-to-one line')
plt.xlim(-3, 3)
plt.ylim(-3, 3)
plt.xlabel('Rotated Isomap components')
plt.ylabel('SALT2 $x_1$')
plt.legend()
plt.savefig('./figures/rotated_isomap_salt_x1.pdf')


print(np.corrcoef(a.embedding.dot(res.x)[a.interp_mask & a.salt_mask], a.salt_x1[a.interp_mask & a.salt_mask]))

## SALT2 outliers (Type Iax)

In [None]:
a.scatter(a.salt_x1)

In [None]:
iax_mask = (a.embedding[:, 0] > 4.) & (a.embedding[:, 1] < -3)
a.targets[iax_mask]

In [None]:
# Outlier spectra
mask = (a.embedding[:, 0] > 4) & (a.embedding[:, 1] < -2)
print(a.targets[mask])
print(a.colors[mask])
print(a.redshifts[mask])
print(a.mags[mask])

# Ref spectrum
ref_target = 'SNF20070803-005'
for idx2, target in enumerate(a.targets):
    if target.name == ref_target:
        break
        
# idx2 = np.where((a.embedding[:, 0] > 4.8) & (a.embedding[:, 1] < 2))[0][0]

plt.figure()
for i in np.where(mask)[0]:
    plt.plot(a.wave, a.scale_flux[i] * spectrum_plot_scale, label=a.targets[i].name)
    
plt.plot(a.wave, a.scale_flux[idx2] * spectrum_plot_scale, c='k', ls='--', label=a.targets[idx2].name)
print(a.embedding[idx2])

plt.legend()

plt.xlabel('Wavelength ($\AA$)')
plt.ylabel(spectrum_plot_ylabel)
plt.tight_layout()

plt.savefig('./figures/type_iax_comparison.pdf')

## Other outliers

In [None]:
a.targets[(a.embedding[:, 1] > 4.)]

In [None]:
# Outlier spectra
mask = (a.embedding[:, 0] < -4)
print(a.targets[mask])
print(a.colors[mask])
print(a.redshifts[mask])
print(a.mags[mask])

# Ref spectrum
# ref_target = 'SNF20070803-005'
# for idx2, target in enumerate(a.targets):
    # if target.name == ref_target:
        # break
        
# idx2 = np.where((a.embedding[:, 0] > 4.8) & (a.embedding[:, 1] < 2))[0][0]

plt.figure()
for i in np.where(mask)[0]:
    plt.plot(a.wave, a.scale_flux[i] * spectrum_plot_scale, label=a.targets[i].name)
    
# plt.plot(a.wave, a.scale_flux[idx2] * spectrum_plot_scale, c='k', ls='--', label=a.targets[idx2].name)
print(a.embedding[idx2])

plt.legend()

plt.xlabel('Wavelength ($\AA$)')
plt.ylabel(spectrum_plot_ylabel)
plt.tight_layout()

# plt.savefig('./figures/type_iax_comparison.pdf')

## SALT2 magnitudes vs components

In [None]:
a.scatter(a.salt_hr, mask=a.salt_mask & a.redshift_color_mask & a.uncertainty_mask, vmin=-0.3, vmax=0.3, label='SALT2-corrected residual magnitude', invert_colorbar=True)
# a.scatter(a.salt_hr, mask=a.good_salt_mask & a.interp_mask, vmin=-0.3, vmax=0.3, label='SALT2 Hubble residuals', invert_colorbar=True)
plt.savefig('./figures/salt2_hr_components.pdf')

In [None]:
plt.figure()

use_x = a.embedding[:, 0]

mask = a.salt_mask & a.redshift_color_mask & a.uncertainty_mask
plt.errorbar(use_x[mask], a.salt_hr[mask], a.salt_hr_uncertainties[mask], label='Individual supernovae', fmt='.', alpha=0.2, c='k')
math.plot_binned_mean(use_x[mask], a.salt_hr[mask], c='C3', lw=2, label='Binned mean')
plt.legend()

plt.gca().invert_yaxis()

plt.xlabel('Component 1')
plt.ylabel('SALT2 Hubble residuals')

plt.tight_layout()
plt.savefig('./figures/salt2_hr_component_1.pdf')

plt.figure()
plt.hist(a.salt_hr[(use_x < 2) & mask], 10, (-0.6, 0.4), alpha=0.3, color='C0', label='Component 1 < 2', density=True)
plt.hist(a.salt_hr[(use_x < 2) & mask], 10, (-0.6, 0.4), histtype='step', lw=2, color='C0', density=True)
plt.hist(a.salt_hr[(use_x > 2) & mask], 10, (-0.6, 0.4), alpha=0.3, color='C1', label='Component 1 > 2', density=True)
plt.hist(a.salt_hr[(use_x > 2) & mask], 10, (-0.6, 0.4), histtype='step', lw=2, color='C1', density=True)

plt.gca().invert_xaxis()

plt.xlabel('SALT2 residual magnitude')
plt.ylabel('Normalized counts')
plt.legend()

plt.tight_layout()
plt.savefig('./figures/salt2_hr_hist.pdf')

## SALT2 + Isomap standardization

In [155]:
a.fit_gp(kind='salt_raw')
a.plot_gp(kind='salt_raw', vmin=-0.5, vmax=0.5)

plt.savefig('./figures/salt_gp_component_12.pdf')

Fitting GP hyperparameters...
    Fit result:           Desired error not necessarily achieved due to precision loss.
    Color scale:          3.108 ± 0.211
    Intrinsic dispersion: 0.090 ± 0.014 mag
    GP kernel amplitude:  0.358 ± 0.203 mag
    GP length scale:      7.316 ± 4.765
    Fit NMAD:             0.095 mag
    Fit std:              0.119 mag


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [157]:
plt.figure()
m = a.good_salt_mask & a.interp_mask
plt.scatter(a.salt_x1[m], (a.corr_mags - a.salt_hr)[m], c=a.embedding[m, 0], cmap=plt.cm.coolwarm, vmin=-3, vmax=3)
plt.gca().invert_yaxis()
plt.colorbar(label='Value of Isomap component 1')
plt.xlabel('SALT2 $x_1$')
plt.ylabel('Difference between SALT2 and\nSALT2 + Isomap standardization (mag)')
plt.tight_layout()
plt.savefig('./figures/salt_isomap_difference.pdf')

  """Entry point for launching an IPython kernel.


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

AttributeError: 'ManifoldTwinsAnalysis' object has no attribute 'good_salt_mask'

In [None]:
# Redo the original GP fit to put the analysis back in the original state
a.fit_gp(verbose=True)

In [None]:
target = 'BOSS38'

for j, i in enumerate(a.targets):
    if target in i.name:
        idx = j
        break
else:
    raise Exception(f"No target {target} found!")
    
print(f"target:        {a.targets[idx].name}")
print(f"isomap coord:  {a.embedding[idx]}")
print(f"SALT x1:       {a.salt_x1[idx]}")

gp_pred = a.predict_gp(a.embedding[idx], a.salt_color[idx], kind='salt_raw')[0]
# salt_pred = a.salt_alpha * a.salt_x1[idx] - a.salt_beta * a.salt_color[idx]
salt_pred = a.salt_hr_raw[idx] - a.salt_hr[idx]

print(f"GP prediction: {gp_pred}")
print(f"SALT2 pred:    {salt_pred}")
print(f"pred diff:     {salt_pred - gp_pred}")

# print(f"HR difference: {(a.corr_mags - a.salt_hr)[idx]}")

## Same SALT2 parameters but not at the same location in the manifold

In [None]:
def outer_diff(x):
    return (x - x[:, None])

count = np.array([len(i.spectra) for i in a.targets])
first_phase = np.array([i.spectra[0].phase for i in a.targets])
mask = (count > 8) & (a.redshifts > 0.03) & (first_phase < -2)

salt_x1_diff = outer_diff(a.salt_x1)
salt_c_diff = outer_diff(a.salt_color)
salt_diff = outer_diff(a.salt_x1)**2 + 100 * outer_diff(a.salt_color)**2
manif_diff = outer_diff(a.embedding[:, 0])**2 + outer_diff(a.embedding[:, 1])**2 + outer_diff(a.embedding[:, 2])**2

phase_diff = outer_diff(a.salt_phases[a.center_mask])

np.fill_diagonal(salt_diff, np.nan)
np.fill_diagonal(manif_diff, np.nan)

salt_diff[~mask] = np.nan
salt_diff[:, ~mask] = np.nan
manif_diff[~mask] = np.nan
manif_diff[:, ~mask] = np.nan

In [None]:
cut = (salt_x1_diff > 0) & (salt_x1_diff < 0.2) & (np.abs(salt_c_diff) < 0.02) & (manif_diff > 30) & (np.abs(phase_diff) < 1.0)
print("Num objects:  %s" % np.sum(cut))
salt_diff_idx = [i[2] for i in np.where(cut)]

t1 = a.targets[salt_diff_idx[0]]
t2 = a.targets[salt_diff_idx[1]]

print("target 1:     %s" % t1)
print("target 2:     %s" % t2)
print("redshifts:    %.4f, %.4f" % (a.redshifts[salt_diff_idx[0]], a.redshifts[salt_diff_idx[1]]))
print("x_1 values:   %+.3f, %+.3f" % (a.salt_x1[salt_diff_idx[0]], a.salt_x1[salt_diff_idx[1]]))
print("c values:     %+.3f, %+.3f" % (a.salt_color[salt_diff_idx[0]], a.salt_color[salt_diff_idx[1]]))
print("mags:         %.3f, %.3f" % (a.mags[salt_diff_idx[0]], a.mags[salt_diff_idx[1]]))
print("salt HRs:     %.3f, %.3f" % (a.salt_hr[salt_diff_idx[0]], a.salt_hr[salt_diff_idx[1]]))
print("corr mags:    %.3f, %.3f" % (a.corr_mags[salt_diff_idx[0]], a.corr_mags[salt_diff_idx[1]]))
print("embedding 1:  %s" % a.embedding[salt_diff_idx[0]])
print("embedding 2:  %s" % a.embedding[salt_diff_idx[1]])

salt_comparision_mb_diff = np.abs(
    a.salt_hr[salt_diff_idx[0]]
    - a.salt_hr[salt_diff_idx[1]]
)
dm_1 = frac_to_mag(t1.salt_fit['x0_err'] / t1.salt_fit['x0'])
dm_2 = frac_to_mag(t2.salt_fit['x0_err'] / t2.salt_fit['x0'])

salt_comparison_mb_diff_err = np.sqrt(dm_1**2 + dm_2**2)
print("mag_diff:     %.3f +/- %.3f" % (salt_comparision_mb_diff, salt_comparison_mb_diff_err))

In [None]:
bands = ['U', 'B', 'V', 'R', 'I']

band_colors = {
    'U': 'C4',
    'B': 'C0',
    'V': 'C2',
    'R': 'C3',
    'I': 'C1',
}

gap = 1.

def plot_lightcurve(target, marker='o', ls='--', mfc='none', label_bands=True):
    photometry = target.get_photometry(bands, clip_filter=True)
    
    # Cut late phases
    photometry = photometry[photometry['time'] < 30]
        
    ref_mag = target.salt_fit['fitted_model'].source_peakmag('snfb', 'ab')
    
    for offset, band in enumerate(bands):
        band_photometry = photometry[photometry['band'] == 'snf%s' % band.lower()]
        
        if band == 'B':
            label = target.name
        else:
            label = ''
            
        color = band_colors[band]
        
        phases = band_photometry['time']
        mags = -2.5*np.log10(band_photometry['flux']) - ref_mag - (offset - 1) * gap
        magerrs = band_photometry['magerr']
    
        plt.errorbar(phases, mags, magerrs, fmt='none', c=color)
        plt.plot(phases, mags, label=label, marker=marker, c=color, mfc=mfc, ls=ls)
        
        if label_bands:
            plt.text(phases[-1] + 2, mags[-1] + 0.2, '%s%+d' % (band, offset - 1), c=color)
            plt.xlim(None, phases[-1] + 9)
            
def plot_salt_lightcurve(target, phase_min=-6, phase_max=50):
    model = target.salt_fit['fitted_model']
    phases = np.linspace(phase_min, phase_max, 200)
    
    # Compute the reference magnitude for B-band at max.
    ref_mag = model.source_peakmag('snfb', 'ab')

    # Plot each light curve
    for offset, band in enumerate(bands):
        mags = model.bandmag('snf%s' % band.lower(), 'ab', phases) - ref_mag - (offset - 1) * gap
        plt.plot(phases, mags, c='k')


plt.figure(figsize=(5, 5))
plot_lightcurve(t1, marker='^', ls='', mfc=None, label_bands=False)
plot_lightcurve(t2, marker='o', ls='', mfc='none', label_bands=True)
# plot_salt_lightcurve(t1)
plt.legend()
plt.xlabel('Phase (days)')
plt.ylabel('Normalized magnitude + offset')
plt.ylim(4.8, -2.5)
plt.tight_layout()

plt.savefig('./figures/same_salt_comparison.pdf')

# plt.gca().invert_yaxis()

In [None]:
plt.figure()

def plot_spec(idx):
    spec = a.spectra[a.center_mask][idx]

    ref_mag = spec.target.salt_fit['fitted_model'].source_peakmag('snfb', 'ab')
    
    label = '%s, %.2f days' % (spec.target.name, spec.phase)
    
    scale = 10**(+0.4*ref_mag)
    
    plt.plot(a.wave, spec.flux * scale * spectrum_plot_scale, label=label)

plot_spec(salt_diff_idx[0])
plot_spec(salt_diff_idx[1])

plt.legend()

plt.xlabel('Restframe wavelength ($\AA$)')
plt.ylabel(spectrum_plot_ylabel)
plt.tight_layout()

plt.savefig('./figures/same_salt_spectra.pdf')

In [None]:
# Dump details to latex
with open('latex/salt_comparison.tex', 'w') as f:
    latex_print(f, "")
    t1 = a.targets[salt_diff_idx[0]]
    t2 = a.targets[salt_diff_idx[1]]
    latex_command(f, 'saltcompnamea', '%s', t1.name)
    latex_command(f, 'saltcompnameb', '%s', t2.name)
    latex_command(f, 'saltcompxonea', '%.3f $\\pm$ %.3f', (t1['salt2.X1'], t1['salt2.X1.err']))
    latex_command(f, 'saltcompxoneb', '%.3f $\\pm$ %.3f', (t2['salt2.X1'], t2['salt2.X1.err']))
    latex_command(f, 'saltcompca', '%.3f $\\pm$ %.3f', (t1['salt2.Color'], t1['salt2.Color.err']))
    latex_command(f, 'saltcompcb', '%.3f $\\pm$ %.3f', (t2['salt2.Color'], t2['salt2.Color.err']))
    latex_command(f, 'saltcompcoorda', '%.2f', a.embedding[salt_diff_idx[0], 0])
    latex_command(f, 'saltcompcoordb', '%.2f', a.embedding[salt_diff_idx[1], 0])
    latex_command(f, 'saltcompmagbdiff', '%.3f $\\pm$ %.3f', (salt_comparision_mb_diff, salt_comparison_mb_diff_err))

# Host galaxy correlations

In [165]:
a.fit_gp(kind='rbtl')
rbtl_isomap_mags = a.corr_mags
a.fit_gp(kind='salt_raw')
salt_isomap_mags = a.corr_mags

Fitting GP hyperparameters...
    Fit result:           Optimization terminated successfully.
    Color scale:          -0.007 ± 0.070
    Intrinsic dispersion: 0.065 ± 0.013 mag
    GP kernel amplitude:  0.111 ± 0.042 mag
    GP length scale:      3.348 ± 2.272
    Fit NMAD:             0.072 mag
    Fit std:              0.098 mag
Fitting GP hyperparameters...
    Fit result:           Desired error not necessarily achieved due to precision loss.
    Color scale:          3.108 ± 0.211
    Intrinsic dispersion: 0.090 ± 0.014 mag
    GP kernel amplitude:  0.358 ± 0.203 mag
    GP length scale:      7.316 ± 4.765
    Fit NMAD:             0.095 mag
    Fit std:              0.119 mag


In [166]:
def calc_step(sample, sides, errs, axis=None):
    vals_1 = np.average(sample, weights=sides / errs**2, axis=axis)
    vals_2 = np.average(sample, weights=(1-sides) / errs**2, axis=axis)
    
    return vals_1, vals_2, vals_2 - vals_1

def bootstrap_step(sample, sides, errs=1):
    return math.bootstrap_statistic(calc_step, sample, sides, errs)

def likelihood_step(residuals, prob, errs=0.):
    def calc_likelihood(x):
        s1, s2, err1, err2 = x

        var1 = errs**2 + err1**2
        var2 = errs**2 + err2**2

        likelihood = np.sum(-np.log(
            prob * 1 / np.sqrt(2 * np.pi * var1) * np.exp(-(residuals - s1)**2 / var1)
            + (1 - prob) * 1 / np.sqrt(2 * np.pi * var2) * np.exp(-(residuals - s2)**2 / var2)
        ))

        return likelihood

    res = minimize(calc_likelihood, [0., 0., 0.1, 0.1], method='BFGS')
    param_errs = np.sqrt(np.diag(res.hess_inv))

    # Estimate the variances with the intrinsic components.
    total_vars = errs**2 + prob * res.x[2]**2 + (1 - prob) * res.x[3]**2

    step_means = (res.x[0], res.x[1], res.x[1] - res.x[0])
    step_errs = (param_errs[0], param_errs[1], np.sqrt(param_errs[0]**2 + param_errs[1]**2))

    return step_means, step_errs, total_vars

def int_disp(vals, pec_vel_disps, axis=None):
    std = np.std(vals, ddof=1, axis=axis)
    corr = np.mean(pec_vel_disps**2, axis=axis)
    corr_std = np.sqrt(np.clip(std**2 - corr, 0, None))

    return corr_std

def analyze_host_variable(variable, mags, mask, uncertainties=None, threshold=None,
                          use_probability=True, plot=True, bootstrap=None,
                          y_label='Residual magnitudes'):
    use_mask = np.where(mask & a.host_mask)[0]

    if isinstance(bootstrap, int):
        np.random.seed(bootstrap)
        bootstrap_idx = np.random.choice(len(use_mask), len(use_mask))
        use_mask = use_mask[bootstrap_idx]
        
    host_data = a.host_data[use_mask]
    use_mags = mags[use_mask]
    
    use_var = host_data[variable]
    # use_var_low = -host_data[variable + '_low']
    # use_var_high = host_data[variable + '_up']
    use_var_low = host_data[variable + '.err_down']
    use_var_high = host_data[variable + '.err_up']
    
    # Default thresholds from Rigault et al. 2019
    if threshold is None:
        if variable == 'lssfr':
            threshold = -10.8
        elif variable == 'gmass':
            threshold = 10
        else:
            # Default, use the median of the variable
            threshold = np.median(use_var)

    # Figure out labels.
    if variable == 'lssfr':
        x_label = 'log(lsSFR)'
    elif variable == 'gmass':
        x_label = 'log($M_* / M_\odot$) (global)'

    # Figure out which weights to use for the step. We want to actually use
    # the probabilities if they are available rather than hard cuts as is done
    # in Rigault et al. 2018.
    label = variable
    use_weights = None

    if use_probability:
        if variable == 'lssfr':
            # plot_color = host_data['p_young']
            # use_weights = 1 - host_data['p_young'] / 100
            plot_color = host_data['p(prompt)']
            use_weights = 1 - host_data['p(prompt)']
            label = '$P_{Young}$'
        elif variable == 'gmass':
            # plot_color = host_data['p_highmass']
            # use_weights = 1 - host_data['p_highmass'] / 100
            plot_color = host_data['p(highgmass)']
            use_weights = 1 - host_data['p(highgmass)']
            label = '$P_{high\ mass}$'

    if use_weights is None:
        # Backup: do hard cuts.
        use_weights = use_var < threshold
        plot_color = use_weights

    if uncertainties is None:
        # If we don't have explicit uncertainties, just use the peculiar velocity contributions.
        uncertainties = a.get_peculiar_velocity_uncertainty()

    use_uncertainties = uncertainties[use_mask]

    step_means, step_errs, total_var = likelihood_step(use_mags, use_weights, use_uncertainties)

    if plot:
        print("Step size: %.3f ± %.3f mag" % (step_means[2], step_errs[2]))
        print("Median step: %.3f" % (np.median(use_mags[use_weights < 0.5]) - np.median(use_mags[use_weights > 0.5])))
        plt.figure()
        plt.errorbar(use_var, use_mags, xerr=(use_var_low, use_var_high), yerr=np.sqrt(total_var), fmt='.', c='gray', alpha=0.5, zorder=-2)
        plt.scatter(use_var, use_mags, s=100, c=plot_color, edgecolors='gray', cmap=plt.cm.viridis_r)

        # Threshold
        plt.axvline(threshold, c='k', lw=2, ls='--')

        # Show means of each side
        plot_min, plot_max = plt.xlim()
        mean_low, mean_high, mean_diff = step_means
        mean_low_err, mean_high_err, mean_diff_err = step_errs
        plt.plot([plot_min, threshold], [mean_low, mean_low], c='k', zorder=-1)
        plt.fill_between([plot_min, threshold], [mean_low - mean_low_err, mean_low - mean_low_err], [mean_low + mean_low_err, mean_low + mean_low_err], color=plt.cm.viridis(1000), alpha=0.5, zorder=-3)
        plt.plot([threshold, plot_max], [mean_high, mean_high], c='k', zorder=-1)
        plt.fill_between([threshold, plot_max], [mean_high - mean_high_err, mean_high - mean_high_err], [mean_high + mean_high_err, mean_high + mean_high_err], color=plt.cm.viridis(0), alpha=0.5, zorder=-3)
        
        # plt.axhline(np.median(use_mags[use_weights > 0.5]))
        # plt.axhline(np.median(use_mags[use_weights < 0.5]))

        plt.xlabel(x_label)
        plt.ylabel(y_label)

        plt.xlim(plot_min, plot_max)
        plt.ylim(-0.6, 0.6)
        
        print(step_means, step_errs)


        plt.colorbar(label=label)
        plt.tight_layout()

    return step_means[-1], step_errs[-1]

def bootstrap_step_difference(variable, mags_1, uncertainties_1, mags_2, uncertainties_2, mask,
                              num_resamples=100, **kwargs):
    step_diffs = []
    for bootstrap_iter in range(num_resamples):
        step_size_1, step_err_1 = analyze_host_variable(variable, mags_1, mask, uncertainties_1, bootstrap=bootstrap_iter, plot=False, **kwargs)
        step_size_2, step_err_2 = analyze_host_variable(variable, mags_2, mask, uncertainties_2, bootstrap=bootstrap_iter, plot=False, **kwargs)
        
        step_diffs.append(step_size_2 - step_size_1)
    
    return np.mean(step_diffs), np.std(step_diffs)

In [167]:
print(bootstrap_step_difference('lssfr', a.salt_hr, a.salt_hr_raw_uncertainties, rbtl_isomap_mags, None,
                          a.salt_mask & a.uncertainty_mask & a.redshift_color_mask & a.train_mask))
print(bootstrap_step_difference('gmass', a.salt_hr, a.salt_hr_raw_uncertainties, rbtl_isomap_mags, None,
                          a.salt_mask & a.uncertainty_mask & a.redshift_color_mask & a.train_mask))
print(bootstrap_step_difference('lssfr', a.salt_hr, a.salt_hr_raw_uncertainties, salt_isomap_mags, a.salt_hr_raw_uncertainties,
                          a.salt_mask & a.uncertainty_mask & a.redshift_color_mask & a.train_mask))
print(bootstrap_step_difference('gmass', a.salt_hr, a.salt_hr_raw_uncertainties, salt_isomap_mags, a.salt_hr_raw_uncertainties,
                          a.salt_mask & a.uncertainty_mask & a.redshift_color_mask & a.train_mask))
print(bootstrap_step_difference('lssfr', salt_isomap_mags, a.salt_hr_raw_uncertainties, rbtl_isomap_mags, None,
                          a.salt_mask & a.uncertainty_mask & a.redshift_color_mask & a.train_mask))
print(bootstrap_step_difference('gmass', salt_isomap_mags, a.salt_hr_raw_uncertainties, rbtl_isomap_mags, None,
                          a.salt_mask & a.uncertainty_mask & a.redshift_color_mask & a.train_mask))

(-0.021615132882580122, 0.02875388947906787)
(0.013833119908810608, 0.03035360492703955)
(-0.026182550161478635, 0.031862275023164625)
(0.03366179290776711, 0.03522787030751771)
(0.0045674172788985066, 0.020624949152699183)
(-0.019828672998956506, 0.025629890358623418)


## lsSFR plots

In [168]:
a.settings['blinded'] = True
a.read_between_the_lines()
a.fit_gp()

Reading between the lines...
Loaded cached stan model
Using saved stan result
Fitting GP hyperparameters...
    Fit result:           Optimization terminated successfully.
    Color scale:          -0.007 ± 0.070
    Intrinsic dispersion: 0.065 ± 0.013 mag
    GP kernel amplitude:  0.111 ± 0.042 mag
    GP length scale:      3.348 ± 2.272
    Fit NMAD:             0.072 mag
    Fit std:              0.098 mag


In [169]:
analyze_host_variable('lssfr', a.salt_hr, a.salt_mask & a.uncertainty_mask & a.redshift_color_mask,# & ~np.array([i.name == 'PTF11mkx' for i in a.targets]),
                      a.salt_hr_raw_uncertainties, y_label='SALT2 + $x_1$ residual magnitudes')
plt.savefig('./figures/lssfr_salt_x1.pdf')

Step size: 0.063 ± 0.025 mag
Median step: 0.047




Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

(-0.02839692635279487, 0.034701827754462705, 0.06309875410725757) (0.01719251437000813, 0.01842279386612423, 0.025198846882280957)


In [170]:
analyze_host_variable('lssfr', a.salt_hr, a.salt_mask & a.uncertainty_mask & a.redshift_color_mask & a.train_mask, a.salt_hr_raw_uncertainties, y_label='SALT2 + $x_1$ residual magnitudes')
plt.savefig('./figures/lssfr_salt_x1_cuts.pdf')
analyze_host_variable('lssfr', rbtl_isomap_mags, a.salt_mask & a.uncertainty_mask & a.redshift_color_mask & a.train_mask, y_label='RBTL + Isomap residual magnitudes')
plt.savefig('./figures/lssfr_rbtl_isomap_cuts.pdf')
analyze_host_variable('lssfr', salt_isomap_mags, a.salt_mask & a.uncertainty_mask & a.redshift_color_mask & a.train_mask, a.salt_hr_raw_uncertainties, y_label='SALT2 + Isomap residual magnitudes')
plt.savefig('./figures/lssfr_salt_isomap_cuts.pdf')

Step size: 0.060 ± 0.030 mag
Median step: 0.019




Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

(-0.012016793525258923, 0.04795269471070949, 0.05996948823596841) (0.022060544362963404, 0.02057447394983037, 0.030165818336364173)
Step size: 0.041 ± 0.023 mag
Median step: 0.030




Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

(-0.017535596937315604, 0.023856737963441014, 0.04139233490075662) (0.01919004920439598, 0.012484562825303228, 0.022893717422168076)
Step size: 0.037 ± 0.029 mag
Median step: 0.016




Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

(-0.007739005480027818, 0.029434319670545855, 0.03717332515057367) (0.022261155371062765, 0.019308111272019295, 0.029467986007653716)


## Global mass plots

In [43]:
analyze_host_variable('gmass', a.salt_hr, a.salt_mask & a.uncertainty_mask & a.redshift_color_mask, a.salt_hr_raw_uncertainties, y_label='SALT2 + $x_1$ residual magnitudes')
plt.savefig('./figures/gmass_salt_x1.pdf')

Step size: -0.040 ± 0.026 mag
Median step: -0.045




Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

(0.025187826614678905, -0.015221528209334442, -0.040409354824013345) (0.02073988398205367, 0.015164576050075866, 0.025692550561740284)


In [64]:
m = a.salt_mask & a.uncertainty_mask & a.redshift_color_mask & a.host_mask & a.train_mask
zz = a.host_data[m]

In [76]:
np.mean(zz['HR'][zz['p(prompt)'] > 0.5])

0.009203790391174835

In [74]:
plt.figure()
plt.scatter(a.salt_colors[m], zz['HR_o'])

  """Entry point for launching an IPython kernel.


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.collections.PathCollection at 0x7f84abdf57d0>

In [47]:
a.targets[(a.salt_hr < -0.4) & (a.salt_mask & a.uncertainty_mask & a.redshift_color_mask & a.host_mask)]

array([Target(name="PTF11mkx")], dtype=object)

In [39]:
analyze_host_variable('gmass', a.salt_hr, a.train_mask & a.salt_mask & a.uncertainty_mask & a.redshift_color_mask & a.train_mask, y_label='SALT2 + $x_1$ residual magnitudes')
plt.savefig('./figures/gmass_salt_x1_cuts.pdf')
analyze_host_variable('gmass', rbtl_isomap_mags, a.train_mask & a.salt_mask & a.uncertainty_mask & a.redshift_color_mask & a.train_mask, y_label='RBTL + Isomap residual magnitudes')
plt.savefig('./figures/gmass_rbtl_isomap_cuts.pdf')
analyze_host_variable('gmass', salt_isomap_mags, a.train_mask & a.salt_mask & a.uncertainty_mask & a.redshift_color_mask & a.train_mask, y_label='SALT2 + Isomap residual magnitudes')
plt.savefig('./figures/gmass_salt_isomap_cuts.pdf')

Step size: -0.037 ± 0.032 mag
Median step: 0.000


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

(0.03632344398195818, -0.00021574776434418226, -0.03653919174630236) (0.022685671770290454, 0.02221739851361818, 0.03175299199102782)
Step size: -0.031 ± 0.022 mag
Median step: -0.016


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

(0.01920695685338997, -0.011999867262688303, -0.031206824116078273) (0.011602038954744636, 0.01831127382123058, 0.021677408905667123)
Step size: -0.009 ± 0.030 mag
Median step: 0.034


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

(0.014017260024520525, 0.005269200270353779, -0.008748059754166745) (0.0199407252103083, 0.022625971803562277, 0.030159030521033334)


## Summary plot

In [40]:
host_results = {}

step_vars = [
    ('gmass', 'Global Mass', -1),
    ('lmass', 'Local Mass', -1),
    ('lssfr', 'Local SSFR', +1),
]

mag_vars = [
    ('SALT2 + $x_1$', a.salt_hr),
    ('SALT2 + Manifold', salt_isomap_mags),
    ('Spectrum + Manifold', rbtl_isomap_mags),
]

host_results = {}

for var_name, var_label, var_sign in step_vars:
    steps = {}
    for mag_label, mags in mag_vars:
        var, var_err = analyze_host_variable(var_name, mags, a.salt_mask & a.uncertainty_mask & a.redshift_color_mask & a.train_mask, plot=False)
        steps[mag_label] = (var * var_sign, var_err)
        
    host_results[var_label] = steps

KeyError: 'lmass'

In [28]:
plt.figure()
labels = []
for prop_idx, (prop_label, prop_values) in enumerate(host_results.items()):
    for mag_idx, (mag_label, mag_values) in enumerate(prop_values.items()):
        step_value, step_err = mag_values
        
        marker = 'oooo'[mag_idx]
        color = 'C%d' % (mag_idx)
        if prop_idx == 0:
            label = mag_label
        else:
            label = None
            
            
        gap = 0.1
        xpos = prop_idx - 0.5*gap + gap * mag_idx
        plt.errorbar(xpos, step_value, step_err, c=color, alpha=1.)
        plt.plot(xpos, step_value, marker=marker, c=color, label=label)
        
    labels.append(prop_label)

plt.xticks(np.arange(len(labels)), labels=labels)
plt.axhline(0., c='k')
plt.xlim(-0.5, len(labels) - 0.5)
plt.ylim(-0.045, 0.155)

plt.ylabel('Step size (mag)')
plt.legend(loc=2)
plt.tight_layout()

plt.savefig('./figures/host_correlations_summary.pdf')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Summary table

In [None]:
# All of these functions are defined in the Host galaxy correlations section. Make
# sure to run that first.
def print_row(f, mags, cut, label, cut_label):
    step_vars = [
        ('lssfr', '$\\Delta_{lsSFR}$', +1),
        ('gmass', '$\\Delta_{mass}$', -1),
    ]
    stat_str = "%20s & %20s" % (label, cut_label)
    for i, (var_name, var_label, var_sign) in enumerate(step_vars):
        var, var_err = analyze_host_variable(var_name, mags, cut, plot=False)
        stat_str += " & %.3f $\pm$ %.3f" % (var_sign * var, var_err)
        
    stat_str += " \\\\"
    latex_print(f, stat_str)
    
with open('./latex/host_steps.tex', 'w') as f:
    print_row(f, a.salt_hr, a.good_salt_mask, 'SALT2 + $x_1$', 'SALT2')
    print_row(f, a.salt_hr, a.good_salt_mask & a.good_mag_mask, 'SALT2 + $x_1$', 'SALT2 + max + train')
    print_row(f, rbtl_isomap_mags, a.good_salt_mask & a.good_mag_mask, 'RBTL + Isomap', 'SALT2 + max + train')
    print_row(f, salt_isomap_mags, a.good_salt_mask & a.good_mag_mask, 'SALT2 + Isomap', 'SALT2 + max + train')

# Attrition and LaTeX variables for things

## Attrition

In [None]:
with open('./latex/attrition_parametrization.tex', 'w') as f:
    latex_print(f, "\\textbf{General selection requirements} & \\\\")
    latex_print(f, "Initial sample                                        & %d \\\\" % len(a.dataset.targets))
    latex_print(f, "More than 5 spectra                                   & %d \\\\" % a.attrition_enough_spectra)
    latex_print(f, "SALT2 date of maximum light uncertainty < 1 day       & %d \\\\" % a.attrition_salt_daymax)
    latex_print(f, "At least one spectrum within 5 days of maximum light  & %d \\\\" % a.attrition_range)
    latex_print(f, "At least one spectrum with S/N 3300-3800~\AA\ > 100   & %d \\\\" % a.attrition_usable)
    latex_print(f, "\hline")
    latex_print(f, "\\textbf{Manifold learning selection requirements} & \\\\")
    latex_print(f, r"\textbf{(Section~\ref{sec:isomap_sample})} & \\")
    latex_print(f, "Spectrum at max. uncertainty < 10\\%% of intrinsic power        & %d \\\\" % np.sum(a.interp_mask))

In [None]:
with open('./latex/attrition_standardization.tex', 'w') as f:
    latex_print(f, "\\textbf{General selection requirements} & \\\\")
    latex_print(f, "Full SNfactory dataset                                & %d \\\\" % len(a.dataset.targets))
    latex_print(f, "Included in Isomap latent space                       & %d \\\\" % np.sum(a.interp_mask))
    latex_print(f, "\hline")
    
    latex_print(f, "\\textbf{Standardization of near-maximum spectra} & \\\\")
    latex_print(f, "Host galaxy redshift available                        & %d \\\\" % np.sum(a.interp_mask & (a.redshift_errs < 0.004)))
    latex_print(f, "Host galaxy redshift above 0.02                       & %d \\\\" % np.sum(a.interp_mask & (a.redshift_errs < 0.004) & (a.redshifts > 0.02)))
    latex_print(f, "Measured $A_V$ < 0.5 mag                              & %d \\\\" % np.sum(a.interp_mask & a.redshift_color_mask))
    latex_print(f, "Blinded training subsample                            & %d \\\\" % np.sum(a.good_mag_mask))
    # latex_print(f, "Validation subsample                                  & %d \\\\" % np.sum(a.good_mag_mask))
    latex_print(f, "Validation subsample                                  & TODO \\\\")
    latex_print(f, "\hline")
    
    latex_print(f, "\\textbf{Comparisons to SALT2 standardization} & \\\\")
    latex_print(f, r"\textbf{(Section~\ref{sec:salt2_standardization})} & \\")
    latex_print(f, "SNfactory SALT2 selection requirements                & %d \\\\"% np.sum(a.salt_mask & a.interp_mask))
    latex_print(f, "Passes host galaxy redshift and color requirements    & %d \\\\"% np.sum(a.good_salt_mask & a.interp_mask))
    latex_print(f, "Has a valid interpolation to maximum light            & %d \\\\"% np.sum(a.good_salt_mask & a.interp_mask))
    latex_print(f, "Blinded training subsample                            & %d \\\\"% np.sum(a.good_salt_mask & a.good_mag_mask))
    # latex_print(f, "Validation subsample                                  & %d \\\\"% np.sum(a.good_salt_mask & a.good_mag_mask))
    latex_print(f, "Validation subsample                                  & TODO \\\\")

In [None]:
with open('./latex/attrition.tex', 'w') as f:
    latex_print(f, "\\textbf{General selection requirements} & \\\\")
    latex_print(f, "Initial sample                                        & %d \\\\" % len(a.dataset.targets))
    latex_print(f, "More than 5 spectra                                   & %d \\\\" % a.attrition_enough_spectra)
    latex_print(f, "SALT2 date of maximum light uncertainty < 1 day       & %d \\\\" % a.attrition_salt_daymax)
    latex_print(f, "At least one spectrum within 5 days of maximum light  & %d \\\\" % a.attrition_range)
    latex_print(f, "At least one spectrum with S/N 3300-3800~\AA\ > 100   & %d \\\\" % a.attrition_usable)
    latex_print(f, "\hline")
    latex_print(f, "\\textbf{Estimation of the spectra at maximum light} & \\\\")
    latex_print(f, r"\textbf{(Section~\ref{sec:int_disp_uncertainty})} & \\")
    latex_print(f, "Spectrum uncertainty < 10\\%% of intrinsic power        & %d \\\\" % np.sum(a.interp_mask))
    latex_print(f, "\hline")
    latex_print(f, "\\textbf{Valid supernova brightness requirements} & \\\\")
    latex_print(f, r"\textbf{(Section~\ref{sec:magnitude_requirements})} & \\")
    latex_print(f, "Host galaxy redshift available                        & %d \\\\" % np.sum(a.interp_mask & (a.redshift_errs < 0.004)))
    latex_print(f, "Host galaxy redshift above 0.02                       & %d \\\\" % np.sum(a.interp_mask & (a.redshift_errs < 0.004) & (a.redshifts > 0.02)))
    latex_print(f, "Measured $A_V$ < 0.5 mag                              & %d \\\\" % np.sum(a.interp_mask & a.redshift_color_mask))
    latex_print(f, "Blinded training subsample                            & %d \\\\" % np.sum(a.good_mag_mask))
    latex_print(f, "\hline")
    latex_print(f, "\\textbf{Comparisons to SALT2 fits} & \\\\")
    latex_print(f, r"\textbf{(Section~\ref{sec:salt2_standardization})} & \\")
    latex_print(f, "SNfactory SALT2 selection requirements                & %d \\\\"% np.sum(a.salt_mask))
    latex_print(f, "Passes host galaxy redshift and color requirements    & %d \\\\"% np.sum(a.good_salt_mask))
    latex_print(f, "Has a valid interpolation to maximum light            & %d \\\\"% np.sum(a.good_salt_mask & a.interp_mask))
    latex_print(f, "Blinded training subsample                            & %d \\\\"% np.sum(a.good_salt_mask & a.good_mag_mask))

## General variables

In [None]:
# Define a bunch of functions to make things easier.
def latex_host_step(file, name, var, mags, mask):
    step, step_err = analyze_host_variable(var, mags, mask, plot=False)
    latex_command(file, name, '%.3f $\\pm$ %.3f', (np.abs(step), step_err))

a.fit_gp(kind='salt_raw', verbose=False)
salt_isomap_mags = a.corr_mags.copy()

a.fit_gp(verbose=False)
rbtl_isomap_mags = a.corr_mags.copy()

with open('latex/commands.tex', 'w') as f:
    latex_print(f, "")
    latex_command(f, 'numdatasetsne', '%d', len(a.dataset.targets))
    latex_command(f, 'numdatasetspectra', '%d', np.sum([len(i.spectra) for i in a.dataset.targets]))
    latex_print(f, "")
    latex_command(f, 'nummanifoldsne', '%d', len(a.targets))
    latex_command(f, 'nummanifoldspectra', '%d', len(a.spectra))
    latex_command(f, 'numinterpsne', '%d', np.sum(a.interp_mask))
    latex_print(f, "")
    latex_command(f, 'numsnftrain', '%d', np.sum([i.subset == 'training' for i in a.targets[a.interp_mask]]))
    latex_command(f, 'numsnfvalid', '%d', np.sum([i.subset == 'validation' for i in a.targets[a.interp_mask]]))
    latex_command(f, 'numsnfother', '%d', np.sum([i.subset not in ['training', 'validation'] for i in a.targets[a.interp_mask]]))
    latex_print(f, "")
    latex_command(f, 'numsnredshift', '%d', np.sum(a.interp_mask & (a.redshift_errs >= 0.004)))
    latex_command(f, 'numlowredshift', '%d', np.sum(a.interp_mask & (a.redshifts <= 0.02)))
    latex_command(f, 'numhighav', '%d', np.sum(a.interp_mask & (a.colors - np.nanmedian(a.colors) >= 0.5)))
    latex_print(f, "")
    latex_command(f, 'nummagsne', '%d', np.sum(a.interp_mask & a.redshift_color_mask))
    latex_command(f, 'nummagsnetrain', '%d', np.sum(a.good_mag_mask))
    latex_command(f, 'nummagsnevalidation', '%d', np.sum(a.interp_mask & a.redshift_color_mask & ~a.good_mag_mask))
    latex_print(f, "")
    latex_command(f, 'saltparammb', '%.2f', a.salt_MB)
    latex_command(f, 'saltparamalpha', '%.3f', a.salt_alpha)
    latex_command(f, 'saltparambeta', '%.2f', a.salt_beta)
    latex_command(f, 'saltparamsigmaint', '%.3f', a.salt_intrinsic_dispersion)
    # latex_command(f, 'saltparamrms', '%.3f', np.std(a.salt_hr[a.good_salt_mask]))
    latex_std(f, 'saltparamrms', a.salt_hr[a.good_salt_mask])
    latex_nmad(f, 'saltparamnmad', a.salt_hr[a.good_salt_mask])
    latex_command(f, 'saltparamwrms', '%.3f', a.salt_wrms)
    latex_command(f, 'saltparammindisp', '%.2f', np.min(a.salt_hr_uncertainties[a.good_salt_mask]))
    latex_command(f, 'saltparammaxdisp', '%.2f', np.max(a.salt_hr_uncertainties[a.good_salt_mask]))
    latex_print(f, "")
    latex_std(f, 'rawrbtlmagstd', a.mags[a.good_mag_mask])
    latex_nmad(f, 'rawrbtlmagnmad', a.mags[a.good_mag_mask])
    # latex_print(f, "")
    # latex_command(f, 'twinrbtlmagstd', '%.3f', a.twins_rms)
    # latex_command(f, 'twinrbtlmagnmad', '%.3f', a.twins_nmad)
    latex_print(f, "")
    latex_std(f, 'saltcomprawrbtlmagstd', a.mags[a.good_mag_mask & a.good_salt_mask])
    latex_std(f, 'saltcompsaltmagstd', a.salt_hr[a.good_mag_mask & a.good_salt_mask])

    a.fit_gp(verbose=False, kind='salt_raw')
    gp_uncertainties = np.sqrt(np.diag(a.gp_hyperparameter_covariance))
    latex_print(f, "")
    latex_command(f, 'saltgpcolor', '%.2f $\\pm$ %.2f', (a.gp_hyperparameters[0], gp_uncertainties[0]))
    latex_command(f, 'saltgpintdisp', '%.3f $\\pm$ %.3f', (a.gp_hyperparameters[1], gp_uncertainties[1]))
    latex_command(f, 'saltgpkernelamp', '%.3f $\\pm$ %.3f', (np.abs(a.gp_hyperparameters[2]), gp_uncertainties[2]))
    latex_command(f, 'saltgpkernellengthscale', '%.2f $\\pm$ %.2f', (a.gp_hyperparameters[3], gp_uncertainties[3]))
    latex_std(f, 'saltgprms', a.corr_mags[a.good_salt_mask & a.interp_mask])
    latex_std(f, 'saltgpcompsaltrms', a.salt_hr[a.good_salt_mask & a.interp_mask])

    a.fit_gp(verbose=False)
    gp_uncertainties = np.sqrt(np.diag(a.gp_hyperparameter_covariance))
    latex_print(f, "")
    latex_command(f, 'rbtlgpcolor', '%.2f $\\pm$ %.2f', (a.fiducial_rv * (1 + a.gp_hyperparameters[0]), a.fiducial_rv * gp_uncertainties[0]))
    latex_command(f, 'rbtlgpintdisp', '%.3f $\\pm$ %.3f', (a.gp_hyperparameters[1], gp_uncertainties[1]))
    latex_command(f, 'rbtlgpkernelamp', '%.3f $\\pm$ %.3f', (np.abs(a.gp_hyperparameters[2]), gp_uncertainties[2]))
    latex_command(f, 'rbtlgpkernellengthscale', '%.2f $\\pm$ %.2f', (a.gp_hyperparameters[3], gp_uncertainties[3]))

    latex_print(f, "")
    latex_std(f, 'rbtlgprms', a.corr_mags[a.good_mag_mask])
    latex_command(f, 'rbtlgpnmad', '%.3f', math.nmad(a.corr_mags[a.good_mag_mask]))

    latex_print(f, "")
    x1 = a.salt_hr[(a.embedding[:, 0] < 3) & a.good_salt_mask & a.interp_mask]
    x2 = a.salt_hr[(a.embedding[:, 0] > 3) & a.good_salt_mask & a.interp_mask]
    m1 = np.mean(x1)
    m2 = np.mean(x2)
    err1 = np.std(x1) / np.sqrt(len(x1))
    err2 = np.std(x2) / np.sqrt(len(x2))
    latex_command(f, 'saltisomapdiff', '%.3f $\\pm$ %.3f', (np.abs(m1-m2), np.sqrt(err1**2 + err2**2)))

    latex_print(f, "")
    latex_command(f, 'pecvelcontribution', '%.3f', np.sqrt(np.mean(a.get_peculiar_velocity_uncertainty()[a.good_mag_mask & a.good_salt_mask]**2)))

    latex_print(f, "")
    latex_host_step(f, 'lssfrsaltxifull', 'lssfr', a.salt_hr, a.good_salt_mask)
    latex_host_step(f, 'gmasssaltxifull', 'gmass', a.salt_hr, a.good_salt_mask)
    latex_host_step(f, 'lssfrsaltxicut', 'lssfr', a.salt_hr, a.good_salt_mask & a.good_mag_mask)
    latex_host_step(f, 'gmasssaltxicut', 'gmass', a.salt_hr, a.good_salt_mask & a.good_mag_mask)
    latex_host_step(f, 'lssfrsaltisomapcut', 'lssfr', salt_isomap_mags, a.good_salt_mask & a.good_mag_mask)
    latex_host_step(f, 'gmasssaltisomapcut', 'gmass', salt_isomap_mags, a.good_salt_mask & a.good_mag_mask)
    latex_host_step(f, 'lssfrrbtlisomapcut', 'lssfr', a.corr_mags, a.good_salt_mask & a.good_mag_mask)
    latex_host_step(f, 'gmassrbtlisomapcut', 'gmass', a.corr_mags, a.good_salt_mask & a.good_mag_mask)
    latex_command(f, 'hostcutsnsnetrain', '%d', np.sum(a.good_mag_mask & a.good_salt_mask & a.host_mask))
    latex_command(f, 'hostcutsnsnefull', '%d', np.sum(a.redshift_color_mask & a.interp_mask & a.good_salt_mask & a.host_mask))

## Standardization comparison

In [20]:
def int_disp(vals, pec_vel_disps, axis=None):
    std = np.std(vals, ddof=1, axis=axis)
    corr = np.mean(pec_vel_disps**2, axis=axis)
    corr_std = np.sqrt(np.clip(std**2 - corr, 0, None))

    return corr_std

def get_stat_str(all_mags, cut, function, *args):
    line_str = ""
    for mags in all_mags:
        if line_str:
            line_str += " &"
            
        use_mags = mags[cut]
        if np.any(np.isnan(use_mags)):
            line_str += "%20s" % "--"
        else:
            res, res_err = math.bootstrap_statistic(function, use_mags, *args)
            line_str += "%7.3f $\pm$ %6.3f" % (res, res_err)
    
    line_str += " \\\\"
    
    return line_str

def make_table(f, all_mags, cut, label):
    stats = {
        'NMAD': (math.nmad,),
        'Standard deviation': (np.std,),
        'Pec. vel. removed': (int_disp, a.get_peculiar_velocity_uncertainty()[cut]),
    }
    for i, (stat_name, stat_args) in enumerate(stats.items()):
        if len(label) > i:
            prefix = label[i]
        else:
            prefix = ""
            
        if i == 0:
            num_sne = "%d" % np.sum(cut)
        else:
            num_sne = ""
            
        stat_str = get_stat_str(all_mags, cut, *stat_args)
        utils.latex_print(f, "%20s & %5s & %20s & %s" % (prefix, num_sne, stat_name, stat_str))
        
good_mag_mask = a.uncertainty_mask & a.redshift_color_mask
good_salt_mask = a.redshift_color_mask & a.salt_mask
good_salt_isomap_mask = a.uncertainty_mask & a.redshift_color_mask & a.salt_mask

# RBTL only
rbtl_mags = a.rbtl_mags.copy()
rbtl_mags[~good_mag_mask] = np.nan
        
# RBTL + Isomap
a.fit_gp()
rbtl_isomap_mags = a.corr_mags.copy()
rbtl_isomap_mags[~good_mag_mask] = np.nan

# SALT2
salt_mags = a.salt_hr.copy()
salt_mags[~good_salt_mask] = np.nan

# SALT2 + Isomap
a.fit_gp(kind='salt_raw')
salt_isomap_mags = a.corr_mags.copy()
salt_isomap_mags[~good_salt_isomap_mask] = np.nan

all_mags = [rbtl_mags, rbtl_isomap_mags, salt_mags, salt_isomap_mags]

with open('./latex/dispersions.tex', 'w') as f:
    # RBTL only
    make_table(f, all_mags, good_mag_mask, ['Maximum spectrum', '+ training cuts'])
    utils.latex_print(f, "\hline")

    # SALT2 only
    make_table(f, all_mags, good_salt_mask, ['SALT2 fit cuts'])
    utils.latex_print(f, "\hline")

    # SALT2 + Isomap
    make_table(f, all_mags, good_salt_isomap_mask, ['Maximum spectrum', '+ SALT2 fit cuts'])
    utils.latex_print(f, "\hline")

    # All
    make_table(f, all_mags, good_salt_isomap_mask, ['All cuts'])

Fitting GP hyperparameters...
    Fit result:           Optimization terminated successfully.
    Color scale:          -0.127 ± 0.051
    Intrinsic dispersion: 0.064 ± 0.009 mag
    GP kernel amplitude:  0.177 ± 0.089 mag
    GP length scale:      5.553 ± 3.271
    Fit NMAD:             0.081 mag
    Fit std:              0.100 mag
Fitting GP hyperparameters...
    Fit result:           Desired error not necessarily achieved due to precision loss.
    Color scale:          2.799 ± 0.151
    Intrinsic dispersion: -0.087 ± 0.009 mag
    GP kernel amplitude:  0.424 ± 0.250 mag
    GP length scale:      9.365 ± 5.524
    Fit NMAD:             0.103 mag
    Fit std:              0.117 mag
    Maximum spectrum &   134 &                 NMAD &   0.108 $\pm$  0.013 &  0.081 $\pm$  0.011 &                  -- &                  -- \\
     + training cuts &       &   Standard deviation &   0.130 $\pm$  0.010 &  0.100 $\pm$  0.008 &                  -- &                  -- \\
                  

In [22]:
a.fit_gp()

Fitting GP hyperparameters...
    Fit result:           Optimization terminated successfully.
    Color scale:          -0.127 ± 0.051
    Intrinsic dispersion: 0.064 ± 0.009 mag
    GP kernel amplitude:  0.177 ± 0.089 mag
    GP length scale:      5.553 ± 3.271
    Fit NMAD:             0.081 mag
    Fit std:              0.100 mag


In [26]:
np.std(a.corr_mags[a.uncertainty_mask & a.redshift_color_mask & a.salt_mask])

0.09975133114394495

# Dump parameters

In [None]:
import pandas as pd

In [None]:
pd.DataFrame({
    'name': [i.name for i in a.targets],
    't1': a.embedding[:, 0],
    't2': a.embedding[:, 1],
    't3': a.embedding[:, 2],
    # 'salt_x1': a.salt_x1,
    # 'salt_mag': a.salt_hr,
}).to_csv('./manifold_parameters.csv')

# Plots for people after Dec. 12 VC

## SN2006X comparison

In [None]:
# Note: SN2006X fails the interpolation cut, so first have to add it back in.
mask = np.array([i.name == 'SN2006X' for i in a.targets])
a.interp_mask[mask] = True
a.do_embedding()

In [None]:
plt.figure()
plt.scatter(-a.embedding[:, 0], a.embedding[:, 1])

plt.scatter(-a.embedding[mask, 0], a.embedding[mask, 1], label='SN2006X')
plt.legend()
plt.xlabel('Isomap Component 1')
plt.ylabel('Isomap Component 2')

In [None]:
plt.figure()
m2 = a.embedding[:, 0] > 4
plt.plot(a.wave, a.scale_flux[mask].T * spectrum_plot_scale[:, None], c='C1', label='SN2006X')
plt.plot(a.wave, a.scale_flux[m2].T * spectrum_plot_scale[:, None], c='C0', label='Nearby SNe Ia')
plt.plot(a.wave, a.scale_flux[mask].T * spectrum_plot_scale[:, None], c='C1', label='SN2006X')
plt.xlabel('Wavelength ($\AA$)')
plt.ylabel(spectrum_plot_ylabel)
plt.legend(['SN2006X', 'Nearby SNe Ia'])
plt.tight_layout()

In [None]:
a.interp_mask[mask] = False
a.do_embedding()

In [None]:
plt.figure()
plt.scatter(a.embedding[:, 0], a.embedding[:, 1])

## Interpolations for unusual SNe

In [None]:
spec_counts = np.array([np.sum(a.target_map == i) for i in range(len(a.targets))])

found = np.where((a.embedding[:, 1] > 4) & (spec_counts >= 3))[0]
print(found)
a.targets[found]

In [None]:
import matplotlib.backends.backend_pdf

pdf = matplotlib.backends.backend_pdf.PdfPages("unusual_sne_interpolations.pdf")

plot_targets = ['PTF11mkx', 'SNF20071021-000', 'PTF11kjn']

for plot_target in plot_targets:
    target_names = np.array([i.name for i in a.targets])
    plot_idx = np.where(target_names == plot_target)[0][0]

    fig1, fig2, fig3 = plot_same_night(plot_idx, figsize=(8, 5))
    
    # pdf.savefig(fig1)
    fig2.gca().set_title(f'{plot_target} - Model')
    pdf.savefig(fig2)
    fig3.gca().set_title(f'{plot_target} - Residuals')
    pdf.savefig(fig3)
    
pdf.close()

## Intrinsic dispersion

In [None]:
# Ran with only wavelengths bluer than 6000 AA and saved the results to this file.
# np.savetxt('./test_blue_6000_intrinsic_dispersion.txt',
           #np.vstack([a.wave, frac_to_mag(a.rbtl_result['fractional_dispersion'])]))

In [None]:
wave, int_disp = np.genfromtxt('./test_blue_6000_intrinsic_dispersion.txt')

In [None]:
plt.figure(figsize=(8, 5))

intrinsic_dispersion = frac_to_mag(a.rbtl_result['fractional_dispersion'])

plt.plot(a.wave, intrinsic_dispersion, label='Full fit')
plt.plot(wave, int_disp, label='Fit to $\lambda$<6000$\AA$ only')

plt.legend()
plt.xlabel('Wavelength ($\AA$)')
plt.ylabel('Intrinsic dispersion (mag)')
plt.ylim(0, None)
plt.tight_layout()
plt.savefig('./figures/rbtl_intrinsic_dispersion.pdf')

## K-folding the embedding

In [None]:
orig_interp_mask = a.interp_mask.copy()

In [None]:
import matplotlib.backends.backend_pdf

mpl.rcParams['figure.figsize'] = (6., 5.)

pdf = matplotlib.backends.backend_pdf.PdfPages("k_fold_manifold.pdf")

folds = np.random.randint(0, 5, size=len(a.interp_mask))

for i in range(5):
    a.interp_mask = orig_interp_mask & (folds != i)
    a.do_embedding()
    a.do_component_blondin_plot()
    pdf.savefig(plt.gcf())
pdf.close()

a.interp_mask = orig_interp_mask
a.do_embedding()

## Test of the size of the phase differences vs the components

In [None]:
phase_diff = evaluate_phase_difference(2.5) - evaluate_phase_difference(-2.5)

In [None]:
t1_max = np.median(a.scale_flux[a.embedding[:, 0] > np.nanpercentile(a.embedding[:, 0], 90)], axis=0)
t1_min = np.median(a.scale_flux[a.embedding[:, 0] > np.nanpercentile(a.embedding[:, 0], 10)], axis=0)
t2_max = np.median(a.scale_flux[a.embedding[:, 1] > np.nanpercentile(a.embedding[:, 1], 90)], axis=0)
t2_min = np.median(a.scale_flux[a.embedding[:, 1] > np.nanpercentile(a.embedding[:, 1], 10)], axis=0)
t3_max = np.median(a.scale_flux[a.embedding[:, 2] > np.nanpercentile(a.embedding[:, 2], 90)], axis=0)
t3_min = np.median(a.scale_flux[a.embedding[:, 2] > np.nanpercentile(a.embedding[:, 2], 10)], axis=0)

t1_diff = -2.5*np.log10(t1_max / t1_min)
t2_diff = -2.5*np.log10(t2_max / t2_min)
t3_diff = -2.5*np.log10(t3_max / t3_min)

In [None]:
print(np.sum(phase_diff**2))
print(np.sum(t1_diff**2))
print(np.sum(t2_diff**2))
print(np.sum(t3_diff**2))

In [None]:
plt.figure()
plt.plot(a.wave, t1_diff)
plt.plot(a.wave, t2_diff)
plt.plot(a.wave, t3_diff)
plt.plot(a.wave, phase_diff)

In [None]:
a.interpolation_result['phase_slope']

In [None]:
med_phase = a.interpolation_result

## SNe Ia missing host galaxy properties

In [None]:
[i for i in a.targets[0].meta.keys() if 'salt2' not in i]

In [None]:
a.host_data['name'][-30:]

In [None]:
np.sum(a.host_mask)

In [None]:
len(a.host_mask)

In [None]:
mm = a.interp_mask & a.redshift_color_mask

In [None]:
a.redshifts[np.array([i.name == 'SN2005ki' for i in a.targets])]

In [None]:
for i in a.targets[mm & ~a.good_mag_mask & ~a.host_mask]:
    print(i.name)

In [None]:
plt.figure()
plt.scatter(a.redshifts, a.mags, c=a.good_mag_mask & ~a.host_mask)

## SNfactory timeseries example

In [None]:
plt.figure(figsize=(6, 7))
a.targets[101].plot(f_nu=True, offset_factor=0.4)

## Stretch example

In [None]:
salt_model = sncosmo.Model(source='salt2')

salt_model.set(z=0.001)
salt_model.set(t0=0)
salt_model.set(x0=1.7e4)
salt_model.set_source_peakmag(0, 'snfb', 'ab')

times = np.linspace(-15, 30, 100)

plt.figure(figsize=(4, 3))

for x1 in np.arange(-2, 2, 0.5):
    salt_model.set(x1=x1)
    mag = salt_model.bandmag('snfb', 'ab', times) - 0.05 * x1
    plt.plot(times, mag, c=plt.cm.coolwarm(x1 / 4 + 0.5))
    
plt.ylim(3, -0.5)
plt.ylabel('Relative brightness (mag)')
plt.xlabel('Phase (days)')
plt.tight_layout()

In [None]:
salt_model = sncosmo.Model(source='salt2')

salt_model.set(z=0.001)
salt_model.set(t0=0)
salt_model.set(x0=1.7e4)
salt_model.set_source_peakmag(0, 'snfb', 'ab')

times = np.linspace(-15, 30, 100)

plt.figure(figsize=(4, 3))

for c in np.arange(0, 0.3, 0.04):
    salt_model.set(c=c)
    flux = salt_model.flux(0, a.wave)
    # mag = salt_model.bandmag('snfb', 'ab', times) - 0.05 * x1
    # plt.plot(times, mag, c=plt.cm.coolwarm(x1 / 4 + 0.5))
    plt.plot(a.wave, flux*1e9 * 10**(0.4 * 3.1 * c), c=plt.cm.coolwarm_r(c / 0.3))
    
# plt.ylim(3, -0.5)
plt.ylabel('Flux')
plt.xlabel('Wavelength ($\AA$)')
plt.tight_layout()

In [None]:
salt_model = sncosmo.Model(source='salt2')

salt_model.set(z=0.001)
salt_model.set(t0=0)
salt_model.set(x0=1e14)
# salt_model.set_source_peakmag(0, 'snfb', 'ab')

times = np.linspace(-15, 30, 100)

wave = a.wave

ref_spec = salt_model.flux(0, wave)
salt_model.set(x1=1)
x1_spec = salt_model.flux(0, wave) - ref_spec

plt.figure(figsize=(4, 3))
plt.plot(a.wave, ref_spec * spectrum_plot_scale)
plt.xlabel('Wavelength ($\AA$)')
plt.ylabel('Flux')
plt.tight_layout()

plt.figure(figsize=(4, 3))
plt.plot(a.wave, x1_spec * spectrum_plot_scale)
plt.xlabel('Wavelength ($\AA$)')
plt.ylabel('Flux')
plt.tight_layout()

In [None]:
plt.figure(figsize=(1, 2), dpi=300)
plt.title("$+ x_2 \cdot$ ...")
plt.tight_layout()

In [None]:
def plot_steps_values(values, num_steps=10, xlim=None, figsize=spectrum_plot_figsize, colorbar=True, label=''):
    mask = a.uncertainty_mask

    use_embedding = values[mask]
    use_flux = a.scale_flux[mask]

    # min_embedding = np.percentile(use_embedding, 5)
    # max_embedding = np.percentile(use_embedding, 95)
    min_embedding = np.min(use_embedding)
    max_embedding = np.max(use_embedding)
    
    bin_edges = np.linspace(min_embedding, max_embedding, num_steps+1)
    
    bin_edges[0] = -1e20
    bin_edges[-1] = 1e20
    
    plt.figure(figsize=figsize, dpi=250)
    
    cmap = plot_cmap
    sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=min_embedding, vmax=max_embedding))
    sm._A = []

    if xlim is not None:
        wave_mask = (a.wave > xlim[0] - 50) & (a.wave < xlim[1] + 50)
    else:
        wave_mask = np.ones(len(a.wave), dtype=bool)
        
    all_step_flux = []
    all_mean_vals = []
    
    for step in range(num_steps):
        step_mask = (use_embedding >= bin_edges[step]) & (use_embedding < bin_edges[step+1])
        step_embedding = use_embedding[step_mask]

        mean_val = np.mean(step_embedding)
        step_flux = np.median(use_flux[step_mask], axis=0)
        
        # if step == 0:
            # label = 'Median spectra in each component bin'
        # else:
            # label = ''
            
        # Make the extreme values of components get plotted on top if everything overlaps.
        zorder = np.abs(mean_val - np.mean(values))
        
        # plt.plot(a.wave[wave_mask], step_flux[wave_mask], c=sm.to_rgba(mean_val), label=label)
        plt.plot(a.wave[wave_mask], step_flux[wave_mask] * spectrum_plot_scale[wave_mask], c=sm.to_rgba(mean_val), zorder=zorder)
        
        all_step_flux.append(step_flux)
        all_mean_vals.append(mean_val)
        
    if xlim is not None:
        plt.xlim(*xlim)
        
    if colorbar:
        plt.colorbar(sm, label=label)
        # plt.title('Component %d' % (component + 1))
    
    plt.xlabel('Wavelength ($\AA$)')
    plt.ylabel('Flux')
    plt.ylim(0, None)
    
    # plt.legend()
    
    plt.tight_layout()
    
    # if xlim is None:
        # plt.savefig('./figures/component_%d_steps.pdf' % (component + 1))
    # else:
        # plt.savefig('./figures/component_%d_steps_zoom_%d_%d.pdf' % (component + 1, xlim[0], xlim[1]))
        
    return np.array(all_mean_vals), np.array(all_step_flux)

In [None]:
# step_vel, step_flux = plot_steps_values(-a.spectral_indicators['vSiII6355'] - 11000, 15, figsize=(4, 3), colorbar=False)
# step_vel, step_flux = plot_steps_values(-a.spectral_indicators['vSiII6355'] / 1000, 20, xlim=(5800, 6400), figsize=(5, 3), label='Si II line velocity ($10^3$ km/s)')
step_vel, step_flux = plot_steps_values(a.spectral_indicators['EWSiII6355'], 30, xlim=(5800, 6400), figsize=(4, 3), label='Si II line velocity ($10^3$ km/s)', colorbar=False)
plt.ylim(0.1, 0.5)
# plt.tight_layout()
# plt.ylabel('Flux')

In [None]:
idx1 = 182
idx2 = 188
plt.axvline(a.wave[idx1], ls='--', c='k', lw=2, zorder=1000)
plt.axvline(a.wave[idx2], ls='--', c='k', lw=2, zorder=1000)

plt.figure(figsize=(4, 3), dpi=250)
plt.scatter(step_flux[:, 182], step_flux[:, 188], c=step_vel, cmap=plt.cm.coolwarm, s=50)
plt.xlabel('Flux at %d $\AA$' % a.wave[idx1])
plt.ylabel('Flux at %d $\AA$' % a.wave[idx2])
plt.colorbar(label='Si II line velocity ($10^3$ km/s)')
plt.tight_layout()

In [None]:
plt.figure()
idx = 180
plt.scatter(step_vel, step_flux[:, idx], label='%d $\AA$' % a.wave[idx])
idx = 183
plt.scatter(step_vel, step_flux[:, idx], label='%d $\AA$' % a.wave[idx])
idx = 190
plt.scatter(step_vel, step_flux[:, idx], label='%d $\AA$' % a.wave[idx])
plt.legend()

In [None]:
plt.figure()

wave = np.linspace(0, 100, 100)

def sim_line(vel, depth):
    val = 1 / np.sqrt(np.pi * )
    


plt.plot(a.wave, )

## Outliers

In [38]:
outliers_91t = [ 
    'SNF20070528-003', # Scalzo++ 2014
    'SNF20070803-005', # Scalzo++ 2014
    'SNF20070825-001', # Scalzo++ 2010
    'SNF20070912-000', # Scalzo++ 2014
    'SNF20080522-000', # Scalzo++ 2014
    'SNF20080723-012', # Scalzo++ 2014
    'SNF20080805-007', # Lin++ 2020
    'LSQ12cyz', # Lin++ 2020
    'LSQ12fhe', # Lin++ 2020
    'PTF11bju', # Lin++ 2020
    'PTF11mkx', # Lin++ 2020
]

outliers_91bg = [
    'LSQ12cfx', # Lin++ 2020
    'PTF10ops', # Maguire++ 2011
    'PTF11bkf', # Lin++ 2020
    'PTF11kjn', # Lin++ 2020
    'PTF11okh', # Lin++ 2020
    'PTF11pra', # Lin++ 2020
    'PTF12dwm', # Lin++ 2020
    'SN2005bl', # Lin++ 2020
    'SN2005dh', # Lin++ 2020 
    'SN2005dm', # Lin++ 2020
    'SN2007ba', # Lin++ 2020
    'SN2009hs', # Lin++ 2020
    'SNNGC6430', # Lin++ 2020
]

outliers_02cx = [
    'SN2005cc', # Lin++ 2020
]

mask_91t = np.array([i.name in outliers_91t for i in a.targets])
mask_91bg = np.array([i.name in outliers_91bg for i in a.targets])
mask_02cx = np.array([i.name in outliers_02cx for i in a.targets])

In [39]:
plt.figure()

use_x = a.embedding[:, 0]

mask = a.salt_mask & a.redshift_color_mask & a.uncertainty_mask
plt.errorbar(use_x[mask], a.salt_hr[mask], a.salt_hr_uncertainties[mask], label='Individual supernovae', fmt='.', alpha=0.2, c='k')
math.plot_binned_mean(use_x[mask], a.salt_hr[mask], c='C3', lw=2, label='Binned mean')

# mask = mask & mask_91t
# plt.scatter(use_x[mask], a.salt_hr[mask], label='91T-like SNe IA', c='C2', marker='s', zorder=10)

plt.legend()

plt.gca().invert_yaxis()

plt.xlabel('Component 1')
plt.ylabel('SALT2 Hubble residuals')

plt.tight_layout()

plt.savefig('/home/kyle/supernova/meetings/2020_01_04_aas_hawaii/talk/salt_bias.png')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [45]:
def do_plot(label, mask=None, **kwargs):
    if mask is None:
        mask = a.uncertainty_mask
    else:
        mask = mask & a.uncertainty_mask

    plt.scatter(a.embedding[mask, 0], a.embedding[mask, 1], label=label, s=50, **kwargs)
    
plt.figure()
do_plot('All SNe Ia', c='k', alpha=0.1)
do_plot('91T-like', mask_91t)
do_plot('91bg-like', mask_91bg)
do_plot('02cx-like', mask_02cx)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Manifold learning example

In [None]:
N = 150
loc = -0.5 + np.random.uniform(0, 2 * np.pi - 2, N)
x = np.cos(loc) + np.random.normal(0, 0.1, N)
y = np.sin(2 * loc) + np.random.normal(0, 0.1, N)

plt.figure()
plt.scatter(x, y, c='k', s=20)

plt.xticks([])
plt.yticks([])
plt.tight_layout()

plt.savefig('/home/kyle/supernova/meetings/2020_01_04_aas_hawaii/talk/manifold_sim.png')

for iter_x, iter_y in zip(x, y):
    closest = np.argsort((x - iter_x)**2 + (y - iter_y)**2)
    for i in closest[:8]:
        plt.plot([iter_x, x[i]], [iter_y, y[i]], c='C3', alpha=0.3)
        
plt.savefig('/home/kyle/supernova/meetings/2020_01_04_aas_hawaii/talk/manifold_sim_neighbors.png')

In [None]:
a.dataset.targets[100].spectra[0].wave

## Redshift comparison

In [30]:
a.dataset.get_target('SNF20080812-003').spectra

array([IdrSpectrum(target="SNF20080812-003", name="SNF20080812-003_08229081003", phase=-5.611)],
      dtype=object)

In [28]:
a.redshift_color_mask[np.array([i.name == 'SNF20080812-003' for i in a.targets])]

array([], dtype=bool)

In [None]:
from astropy.table import 

In [32]:
upd_redshifts = Table.read('./data/greg_updated_redshifts.txt', format='ascii')

In [35]:
t = upd_redshifts
t

col1,col2,col3,col4,col5,col6
str15,float64,float64,float64,float64,float64
PTF11bnb,0.03,0.005,0.0416,0.0005,-0.0116
SNF20080812-003,0.052,0.006,0.0617,0.0005,-0.0097
SNF20080918-004,0.051,0.005,0.0574,0.0025,-0.0064
PTF10qyz,0.065,0.005,0.0683,0.0025,-0.0033
PTF13asv,0.034,0.005,0.0366,0.0025,-0.0026
PTF11mkx,0.05482,0.001,0.0566,0.0005,-0.00178
LSQ13bbz,0.06,0.02,0.0612,0.0005,-0.0012
PTF10ops,0.06,0.01,0.0612,0.0025,-0.0012
SNF20080905-005,0.0579,0.005,0.0588,0.0025,-0.0009
SN2005ak,0.02671,0.00181,0.02744,6e-05,-0.00073


In [44]:
target_names = [i.name for i in a.targets]

use_mask = np.zeros(len(t), dtype=bool)

for i, name in enumerate(t['col1']):
    use_mask[i] = name in target_names

In [63]:
m = (t['col3'] < 0.004)
diffs = (t['col4'] - t['col2'])[m & use_mask]
from astropy.cosmology import Planck15 as cosmo

for row in t[m & use_mask]:
    cosmo_diff = cosmo.distmod(row['col4']) - cosmo.distmod(row['col2'])
    z = np.array([i.name == row['col1'] for i in a.targets])
    if (np.abs(cosmo_diff.value) > 0.01) & a.redshift_color_mask[z]:
        print(row['col1'], row['col4'], cosmo_diff)

PTF11mkx 0.0566 0.07212223217248948 mag
SN2007bd 0.03102 0.04191017490165194 mag
SN2005cg 0.03176 0.02532730616020018 mag
SNF20060521-001 0.06698 0.010542177126986019 mag
SNF20080514-002 0.02206 0.018081582166928456 mag
SN2004ef 0.03098 0.010778463260528781 mag
SN2007qe 0.02371 -0.024101040511794736 mag


In [65]:
z = np.array([i.name == 'SN2004ef' for i in a.targets])
a.corr_mags[z]

array([-0.069283])

In [58]:
a.train_mask[z]

array([False])

In [62]:
plt.figure()
plt.scatter([i['host.zhelio.err'] for i in a.targets[a.redshift_color_mask]], a.corr_mags[a.redshift_color_mask])

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.collections.PathCollection at 0x7f0f39c3f710>

In [25]:
plt.figure()
plt.scatter(np.sqrt(a.embedding[:, 0]**2 + a.embedding[:, 2]**2), a.embedding[:, 1], s=20)

plt.xlabel('R = sqrt(C1^2 + C3^2)')
plt.ylabel('Component 2')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0, 0.5, 'Component 2')

## What is going on with Mickael's sample?

In [42]:
rigault_data = Table.read('./data/host_properties_rigault_2019.txt', format='ascii')
rigault_mask = np.array([a.iau_name_map.get(i.name, i.name) in rigault_data['name'] for i in a.targets])

remove_list = [
    # SNREDSHIFT - most likely wrong host
    # "CSS110918_01",
    # "PTF10qyz",
    # "PTF10ufj",
    # "SNF20060908-004",
    # "SNF20080918-004",
    # "SNF20070817-003",
    
    
    # Photo Optical data are unusable and no SNfactory optical data.
    "PTF09fox",
    "SN2004gs",
    "SNF20080919-000",
    "SNF20080626-002",
    
    # Spectral Data are unuable
    "PTF13asv",
    
    
    # HUBBLIZER - Bad in the zoo, Nico removed it for any reason.
    # "LSQ12gxj",
    
    # PECULIARS - JAKOB / GREG
    # "PTF10ygu",
    # "PTF10ops",
    
    # SUPERC_91T
    # "SNF20070528-003","SNF20070803-005","SN2007if",
    # "SNF20070912-000","SNF20080522-000","SNF20080723-012", # SCALZO et al 2012
    # "SN2012dn", # Childress 2016 Super-C
    # "LSQ12cyz", "PTF11bju","SNNGC2691", "LSQ12gdj","LSQ12fhe", "PTF11mkx",
    
    
    # PTF collaboration
    # "PTF13anh","PTF13ayw","PTF13azs","PTF13asv",
    # "PTF12jqh","PTF11bgv","PTF11drz","PTF12eer",
    # "PTF12evo", "PTF12fuu","PTF12ghy","PTF12grk",
    # "PTF12ikt",
]

extra_mask = np.array([i.name not in remove_list for i in a.targets])

analyze_host_variable(
    'gmass',
    
    # rbtl_isomap_mags,
    # salt_isomap_mags,
    a.salt_hr,
    # rigault_data['HR'],

    # extra_mask & a.salt_mask & a.redshift_color_mask & a.uncertainty_mask & a.train_mask,
    # extra_mask & a.salt_mask & a.redshift_color_mask & a.uncertainty_mask & a.train_mask,
    # extra_mask & a.salt_mask & a.redshift_color_mask & a.uncertainty_mask,
    # extra_mask & a.salt_mask & a.redshift_color_mask & a.uncertainty_mask,
    # extra_mask & a.salt_mask & a.redshift_color_mask & a.uncertainty_mask,# & a.salt_mask & a.uncertainty_mask & a.redshift_color_mask,
    # rigault_mask & a.salt_mask & a.redshift_color_mask & a.uncertainty_mask, #extra_mask & a.salt_mask,# & a.salt_mask & a.uncertainty_mask & a.redshift_color_mask,
    # extra_mask & a.salt_mask & a.redshift_color_mask & (a.maximum_uncertainty_fraction > 0.2),
    # rigault_mask & a.train_mask,
    # extra_mask & a.salt_mask & a.redshift_color_mask & a.uncertainty_mask,#& a.redshift_color_mask & a.uncertainty_mask,
    # rigault_mask & a.salt_mask & a.redshift_color_mask & a.uncertainty_mask,
    extra_mask & a.salt_mask & a.redshift_color_mask & a.uncertainty_mask,
    # extra_mask & a.salt_mask & a.redshift_color_mask & a.uncertainty_mask,
    # extra_mask & a.salt_mask & a.redshift_color_mask & a.uncertainty_mask,# & a.salt_mask & a.uncertainty_mask & a.redshift_color_mask,
    # extra_mask & a.salt_mask & a.redshift_color_mask & a.uncertainty_mask,# & a.salt_mask & a.uncertainty_mask & a.redshift_color_mask,

    a.salt_hr_raw_uncertainties,
    # a.host_data['HR.err'],
    y_label='SALT2 + $x_1$ residual magnitudes'
)

[ 0.03227949 -0.02095304  0.19496385  0.13502417 -0.01226244 -0.08559092]
Step size: -0.053 ± 0.028 mag
Median step: -0.052


  if sys.path[0] == '':
  grad[k] = (f(*((xk + d,) + args)) - f0) / d[k]
  if sys.path[0] == '':


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

(0.03227949291230736, -0.02095303978530645, -0.05323253269761381) (0.02287181031221938, 0.015494234564298468, 0.027625911961266476)


(-0.05323253269761381, 0.027625911961266476)

In [43]:
[i.redshift_cmb for i in a.targets[
    rigault_mask & a.salt_mask & a.redshift_color_mask & a.uncertainty_mask
    !=
    extra_mask & a.salt_mask & a.redshift_color_mask & a.uncertainty_mask,
]]

[0.02660507560282821,
 0.026354678600011283,
 0.03136053025249885,
 0.07040706468939884,
 0.03293767593120367,
 0.08108187220578844,
 0.07214453551620825,
 0.033426754770932154,
 0.03331427608026649,
 0.0573387253533606,
 0.05357493286085502,
 0.0564835940826256,
 0.06567250373061562,
 0.04863297118741028,
 0.06936814625875565,
 0.04408730097152369,
 0.047075296617337115,
 0.06349718249109948,
 0.05418686093515612,
 0.02583569279038378,
 0.0303070618213106,
 0.09327102520605712,
 0.08149881202575204,
 0.030443252892658368,
 0.06917777445666973,
 0.08342003469170711,
 0.08834381831953686,
 0.04627214726196316,
 0.07484683494534972,
 0.0851775007503206,
 0.026714525990187532]

In [44]:
rigault_data_full = Table.read('./data/host_properties_rigault_full.csv', format='csv')
# rigault_mask = np.array([a.iau_name_map.get(i.name, i.name) in rigault_data['name'] for i in a.targets])

remove_list = [
    # HUBBLIZER - Bad in the zoo, Nico removed it for any reason.
    "LSQ12gxj",
    
    # SNREDSHIFT - most likely wrong host
    "CSS110918_01",
    "PTF10qyz",
    "PTF10ufj",
    "SNF20060908-004",
    "SNF20080918-004",
    "SNF20070817-003",
    
    # PECULIARS - JAKOB / GREG
    "PTF10ygu",
    "PTF10ops",
    
    # Photo Optical data are unusable and no SNfactory optical data.
    "PTF09fox",
    "SN2004gs",
    "SNF20080919-000",
    "SNF20080626-002",
    
    # Spectral Data are unuable
    "PTF13asv",
    
    # SUPERC_91T
    "SNF20070528-003","SNF20070803-005","SN2007if",
    "SNF20070912-000","SNF20080522-000","SNF20080723-012", # SCALZO et al 2012
    "SN2012dn", # Childress 2016 Super-C
    "LSQ12cyz", "PTF11bju","SNNGC2691", "LSQ12gdj","LSQ12fhe", "PTF11mkx",
    
    # PTF collaboration
    "PTF13anh","PTF13ayw","PTF13azs","PTF13asv",
    "PTF12jqh","PTF11bgv","PTF11drz","PTF12eer",
    "PTF12evo", "PTF12fuu","PTF12ghy","PTF12grk",
    "PTF12ikt",
]

extra_mask = np.array([i not in remove_list for i in rigault_data_full['name']])

rigault_data_cut = rigault_data_full[extra_mask]

res = likelihood_step(rigault_data_cut['HR'], rigault_data_cut['p(highgmass)'], rigault_data_cut['HR.err'])
print("Step size: %.3f $\pm$ %.3f" % (res[0][2], res[1][2]))

TypeError: likelihood_step() missing 1 required positional argument: 'prob'

In [45]:
plt.figure()
plt.scatter(rigault_data_cut['salt2.X1'], rigault_data_cut['HR'])

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.collections.PathCollection at 0x7fcc4c630990>

## Simultaneous host properties fit

In [46]:
def fit_gp_host(self, step_probabilities_1, step_probabilities_2, kind="rbtl", start_hyperparameters=[0.0, 0.05, 0.2, 5, 0., 0.]):
    """Fit a Gaussian Process to predict the residual magnitudes."""
    self.print_verbose("Fitting GP hyperparameters...")

    # Fit the hyperparameters on the full conditioning sample.
    coordinates, mags, mag_errs, colors, condition_mask = self.get_gp_data(kind)
    
    condition_mask &= self.host_mask

    condition_coordinates = coordinates[condition_mask]
    condition_mags = mags[condition_mask]
    condition_mag_errs = mag_errs[condition_mask]
    condition_colors = colors[condition_mask]
    condition_step_probabilities_1 = step_probabilities_1[condition_mask]
    condition_step_probabilities_2 = step_probabilities_2[condition_mask]

    def negative_log_likelihood(hyperparameters):
        gp, color_slope = self._build_gp(
            condition_coordinates, condition_mag_errs, hyperparameters[:-2]
        )
        host_step_1 = hyperparameters[-2]
        host_step_2 = hyperparameters[-1]
        residuals = (
            condition_mags
            - condition_colors * color_slope
            + host_step_1 * condition_step_probabilities_1
            + host_step_2 * condition_step_probabilities_2
        )
        result = -gp.log_likelihood(residuals)

        return result

    res = minimize(negative_log_likelihood, start_hyperparameters)

    self.gp_hyperparameters = res.x
    self.gp_negative_log_likelihood = negative_log_likelihood

    pred_mags, pred_vars = self._predict_gp_oos(
        coordinates,
        mags,
        mag_errs,
        colors,
        condition_mask = condition_mask,
        return_var=True,
    )

    self.corr_mags = mags - pred_mags
    self.corr_vars = pred_vars + mag_errs**2
    good_corr_mags = self.corr_mags[condition_mask]

    # Calculate the parameter covariance using a custom code that numerically
    # estimates the Hessian using a finite difference method with adaptive step
    # sizes.
    param_names = ["param_%d" for i in range(len(self.gp_hyperparameters))]
    
    print(self.gp_hyperparameters)

    cov = math.calculate_covariance_finite_difference(
        negative_log_likelihood,
        param_names,
        self.gp_hyperparameters,
        [(None, None)] * len(param_names),
        verbose=self.settings['verbosity'] > 2,
    )
    self.gp_hyperparameter_covariance = cov

    uncertainties = np.sqrt(np.diag(cov))

    self.print_verbose("    Fit result:           %s" % res['message'])
    self.print_verbose("    Color scale:          %.3f ± %.3f"
                       % (res.x[0], uncertainties[0]))
    self.print_verbose("    Intrinsic dispersion: %.3f ± %.3f mag"
                       % (res.x[1], uncertainties[1]))
    self.print_verbose("    GP kernel amplitude:  %.3f ± %.3f mag"
                       % (res.x[2], uncertainties[2]))
    self.print_verbose("    GP length scale:      %.3f ± %.3f"
                       % (res.x[3], uncertainties[3]))
    self.print_verbose("    lSSFR step size:      %.3f ± %.3f"
                       % (res.x[4], uncertainties[4]))
    self.print_verbose("    gmass step size:      %.3f ± %.3f"
                       % (res.x[5], uncertainties[5]))

    self.print_verbose("    Fit NMAD:             %.3f mag"
                       % math.nmad(good_corr_mags))
    self.print_verbose("    Fit std:              %.3f mag"
                       % np.std(good_corr_mags))

In [47]:
a.fit_gp(kind='salt_raw')

Fitting GP hyperparameters...
    Fit result:           Desired error not necessarily achieved due to precision loss.
    Color scale:          3.108 ± 0.211
    Intrinsic dispersion: 0.090 ± 0.014 mag
    GP kernel amplitude:  0.358 ± 0.203 mag
    GP length scale:      7.316 ± 4.765
    Fit NMAD:             0.095 mag
    Fit std:              0.119 mag


In [48]:
a.read_between_the_lines()

Reading between the lines...
Loaded cached stan model
Using saved stan result


In [49]:
fit_gp_host(a, a.host_data['p(prompt)'], a.host_data['p(highgmass)'], kind='rbtl')

Fitting GP hyperparameters...
[ 0.00284523  0.04228175  0.08850327  2.09917564 -0.03908652 -0.00441083]
    Fit result:           Optimization terminated successfully.
    Color scale:          0.003 ± 0.073
    Intrinsic dispersion: 0.042 ± 0.043 mag
    GP kernel amplitude:  0.089 ± 0.026 mag
    GP length scale:      2.099 ± 3.159
    lSSFR step size:      -0.039 ± 0.034
    gmass step size:      -0.004 ± 0.026
    Fit NMAD:             0.065 mag
    Fit std:              0.095 mag


## SALT2 simultaneous host properties fit

In [62]:
def likelihood_step(residuals, x1, c, prob, errs=0.):
    def calc_likelihood(x):
        s1, s2, err1, err2, alpha, beta = x

        var1 = errs**2 + err1**2
        var2 = errs**2 + err2**2
        
        model_residuals = residuals - alpha * x1 + beta * c

        # likelihood = np.sum(-np.log(
            # prob * 1 / np.sqrt(2 * np.pi * var1) * np.exp(-(model_residuals - s1)**2 / var1)
            # + (1 - prob) * 1 / np.sqrt(2 * np.pi * var2) * np.exp(-(model_residuals - s2)**2 / var2)
        # ))
        
        likelihood = np.sum(-np.log(
            1 / np.sqrt(2 * np.pi * var1) * np.exp(-(model_residuals - (prob * s1 + (1-prob) * s2))**2 / var1)
            # prob * 1 / np.sqrt(2 * np.pi * var1) * np.exp(-(model_residuals - s1)**2 / var1)
            # + (1 - prob) * 1 / np.sqrt(2 * np.pi * var2) * np.exp(-(model_residuals - s2)**2 / var2)
        ))

        return likelihood

    res = minimize(calc_likelihood, [0., 0., 0.1, 0.1, 0.1, 3], method='BFGS')
    param_errs = np.sqrt(np.diag(res.hess_inv))

    # Estimate the variances with the intrinsic components.
    total_vars = errs**2 + prob * res.x[2]**2 + (1 - prob) * res.x[3]**2

    step_means = (res.x[0], res.x[1], res.x[1] - res.x[0])
    step_errs = (param_errs[0], param_errs[1], np.sqrt(param_errs[0]**2 + param_errs[1]**2))
    
    corr_mags = residuals - res.x[4] * x1 + res.x[5] * c
    
    print(res.x)

    return step_means, step_errs, total_vars, corr_mags

def int_disp(vals, pec_vel_disps, axis=None):
    std = np.std(vals, ddof=1, axis=axis)
    corr = np.mean(pec_vel_disps**2, axis=axis)
    corr_std = np.sqrt(np.clip(std**2 - corr, 0, None))

    return corr_std

def analyze_host_variable(variable, mags, mask, uncertainties=None, threshold=None,
                          use_probability=True, plot=True, bootstrap=None,
                          y_label='Residual magnitudes'):
    use_mask = np.where(mask & a.host_mask)[0]

    if isinstance(bootstrap, int):
        np.random.seed(bootstrap)
        bootstrap_idx = np.random.choice(len(use_mask), len(use_mask))
        use_mask = use_mask[bootstrap_idx]
        
    host_data = a.host_data[use_mask]
    use_mags = mags[use_mask]
    
    use_var = host_data[variable]
    # use_var_low = -host_data[variable + '_low']
    # use_var_high = host_data[variable + '_up']
    use_var_low = host_data[variable + '.err_down']
    use_var_high = host_data[variable + '.err_up']
    
    # Default thresholds from Rigault et al. 2019
    if threshold is None:
        if variable == 'lssfr':
            threshold = -10.8
        elif variable == 'gmass':
            threshold = 10
        else:
            # Default, use the median of the variable
            threshold = np.median(use_var)

    # Figure out labels.
    if variable == 'lssfr':
        x_label = 'log(lsSFR)'
    elif variable == 'gmass':
        x_label = 'log($M_* / M_\odot$) (global)'

    # Figure out which weights to use for the step. We want to actually use
    # the probabilities if they are available rather than hard cuts as is done
    # in Rigault et al. 2018.
    label = variable
    use_weights = None

    if use_probability:
        if variable == 'lssfr':
            # plot_color = host_data['p_young']
            # use_weights = 1 - host_data['p_young'] / 100
            plot_color = host_data['p(prompt)']
            use_weights = 1 - host_data['p(prompt)']
            label = '$P_{Young}$'
        elif variable == 'gmass':
            # plot_color = host_data['p_highmass']
            # use_weights = 1 - host_data['p_highmass'] / 100
            plot_color = host_data['p(highgmass)']
            use_weights = 1 - host_data['p(highgmass)']
            label = '$P_{high\ mass}$'

    if use_weights is None:
        # Backup: do hard cuts.
        use_weights = use_var < threshold
        plot_color = use_weights

    if uncertainties is None:
        # If we don't have explicit uncertainties, just use the peculiar velocity contributions.
        uncertainties = a.get_peculiar_velocity_uncertainty()

    use_uncertainties = uncertainties[use_mask]

    step_means, step_errs, total_var, corr_mags = likelihood_step(use_mags, a.salt_x1[use_mask],
                                                       a.salt_colors[use_mask], use_weights,
                                                       use_uncertainties)

    if plot:
        print("Step size: %.3f ± %.3f mag" % (step_means[2], step_errs[2]))
        print("Median step: %.3f" % (np.median(corr_mags[use_weights < 0.5]) - np.median(corr_mags[use_weights > 0.5])))
        plt.figure()
        plt.errorbar(use_var, corr_mags, xerr=(use_var_low, use_var_high), yerr=np.sqrt(total_var), fmt='.', c='gray', alpha=0.5, zorder=-2)
        plt.scatter(use_var, corr_mags, s=100, c=plot_color, edgecolors='gray', cmap=plt.cm.viridis_r)

        # Threshold
        plt.axvline(threshold, c='k', lw=2, ls='--')

        # Show means of each side
        plot_min, plot_max = plt.xlim()
        mean_low, mean_high, mean_diff = step_means
        mean_low_err, mean_high_err, mean_diff_err = step_errs
        plt.plot([plot_min, threshold], [mean_low, mean_low], c='k', zorder=-1)
        plt.fill_between([plot_min, threshold], [mean_low - mean_low_err, mean_low - mean_low_err], [mean_low + mean_low_err, mean_low + mean_low_err], color=plt.cm.viridis(1000), alpha=0.5, zorder=-3)
        plt.plot([threshold, plot_max], [mean_high, mean_high], c='k', zorder=-1)
        plt.fill_between([threshold, plot_max], [mean_high - mean_high_err, mean_high - mean_high_err], [mean_high + mean_high_err, mean_high + mean_high_err], color=plt.cm.viridis(0), alpha=0.5, zorder=-3)

        plt.xlabel(x_label)
        plt.ylabel(y_label)

        plt.xlim(plot_min, plot_max)
        plt.ylim(-0.6, 0.6)
        
        print(step_means, step_errs)


        plt.colorbar(label=label)
        plt.tight_layout()

    return step_means[-1], step_errs[-1]

def bootstrap_step_difference(variable, mags_1, uncertainties_1, mags_2, uncertainties_2, mask,
                              num_resamples=100, **kwargs):
    step_diffs = []
    for bootstrap_iter in range(num_resamples):
        step_size_1, step_err_1 = analyze_host_variable(variable, mags_1, mask, uncertainties_1, bootstrap=bootstrap_iter, plot=False, **kwargs)
        step_size_2, step_err_2 = analyze_host_variable(variable, mags_2, mask, uncertainties_2, bootstrap=bootstrap_iter, plot=False, **kwargs)
        
        step_diffs.append(step_size_2 - step_size_1)
    
    return np.mean(step_diffs), np.std(step_diffs)

In [63]:
analyze_host_variable('lssfr', a.salt_hr_raw, rigault_mask & a.salt_mask & a.uncertainty_mask & a.redshift_color_mask & a.train_mask,
                      a.salt_hr_raw_uncertainties, y_label='SALT2 + $x_1$ residual magnitudes')
plt.savefig('./figures/lssfr_salt_x1.pdf')

[-0.14880984 -0.01468983  0.11297145  0.1        -0.18819478 -2.9538647 ]
Step size: 0.134 ± 0.032 mag
Median step: 0.092


  app.launch_new_instance()
  grad[k] = (f(*((xk + d,) + args)) - f0) / d[k]
  app.launch_new_instance()


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

(-0.1488098444463924, -0.014689834606313972, 0.1341200098400784) (0.019264453047408373, 0.025495588768845175, 0.03195534693734158)


## Output for Stella Yu's group

In [89]:
import h5py

m = a.uncertainty_mask

hf = h5py.File('stella_yu_data.h5', 'w')
hf.create_dataset('wavelength', data=a.wave)
hf.create_dataset('flux', data=a.maximum_flux[m])
hf.create_dataset('fluxerr', data=a.maximum_fluxerr[m])
hf.create_dataset('color_law', data=a.rbtl_color_law)
hf.create_dataset('magnitudes', data=a.rbtl_mags[m])
hf.create_dataset('colors', data=a.rbtl_colors[m])
hf.create_dataset('magnitude_measurement_errors', data=a.get_peculiar_velocity_uncertainty()[m])
hf.create_dataset('mag_mask', data=a.redshift_color_mask[m])
hf.close()

In [75]:
a.maximum_flux[m].shape

(173, 288)

In [57]:
a.rbtl_mags[a.uncertainty_mask & a.mag_mask]

AttributeError: 'ManifoldTwinsAnalysis' object has no attribute 'mag_mask'

In [52]:
a.wave

array([3305.49675906, 3316.50858892, 3327.55710325, 3338.64242427,
       3349.76467457, 3360.9239772 , 3372.12045558, 3383.35423357,
       3394.62543542, 3405.9341858 , 3417.2806098 , 3428.66483293,
       3440.08698111, 3451.54718068, 3463.04555841, 3474.58224148,
       3486.15735749, 3497.7710345 , 3509.42340094, 3521.11458572,
       3532.84471815, 3544.61392799, 3556.4223454 , 3568.27010101,
       3580.15732587, 3592.08415147, 3604.05070972, 3616.057133  ,
       3628.1035541 , 3640.19010628, 3652.31692322, 3664.48413907,
       3676.69188841, 3688.94030626, 3701.22952812, 3713.55968991,
       3725.93092802, 3738.3433793 , 3750.79718102, 3763.29247096,
       3775.82938733, 3788.40806879, 3801.02865448, 3813.691284  ,
       3826.39609741, 3839.14323524, 3851.9328385 , 3864.76504865,
       3877.64000762, 3890.55785784, 3903.51874218, 3916.52280402,
       3929.57018718, 3942.661036  , 3955.79549526, 3968.97371025,
       3982.19582675, 3995.46199099, 4008.77234973, 4022.12705

In [78]:
import pandas as pd

In [81]:
f = h5py.File('/home/kyle/Downloads/stella_yu_data.h5', 'r')

In [83]:
np.array(f['mag_mask'])

array([ True,  True,  True,  True, False,  True,  True, False,  True,
       False,  True,  True,  True,  True,  True,  True,  True, False,
        True,  True,  True,  True, False,  True,  True,  True,  True,
       False,  True,  True,  True,  True,  True,  True,  True, False,
       False,  True,  True,  True,  True, False,  True,  True, False,
        True,  True,  True,  True,  True, False,  True,  True,  True,
       False, False,  True, False, False,  True,  True,  True, False,
        True,  True,  True,  True,  True,  True,  True,  True, False,
        True,  True, False, False, False,  True,  True, False, False,
       False, False, False,  True,  True,  True,  True,  True,  True,
        True, False,  True,  True, False,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True, False,  True,  True,  True,  True,  True,
        True,  True,

In [80]:
pd.read_hdf('/home/kyle/Downloads/stella_yu_data.h5', 'mask')

KeyError: 'No object named mask in the file'

In [None]:
a = pd.fr

In [87]:
plt.figure()
m = (a.rbtl_colors < 0.5) & (a.redshift_errs < 0.004) & a.uncertainty_mask
plt.scatter(a.redshifts[m], a.rbtl_mags[m], c='k', s=20)

  """Entry point for launching an IPython kernel.


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.collections.PathCollection at 0x7fb797cddd10>

## Kara exposure list dump

In [91]:
all_obs = [i['obs.exp'] for j in a.targets for i in j.spectra]

In [94]:
len(all_obs)

2969

In [92]:
with open('./spectra_list.txt', 'w') as outfile:
    for line in all_obs:
        print(line, file=outfile)