# Load the dataset

In [1]:
run manifold_twins

In [2]:
%matplotlib ipympl

In [3]:
# Default settings for matplotlib figures
#import matplotlib as mpl

# Choose how big to make figures. This will scale the text size.
# mpl.rcParams['figure.figsize'] = (5, 4)

# Set the DPI. This will change how big things appear in Jupyter lab
# mpl.rcParams['figure.dpi'] = 120

In [4]:
# a = ManifoldTwinsAnalysis(idr='HICKORY', verbosity=1)
a = ManifoldTwinsAnalysis(idr='CASCAD', verbosity=1)
# a = ManifoldTwinsAnalysis(idr='CASCAD', verbosity=1, bin_velocity=2000, max_count=100)

Loading dataset...
IDR:          CASCAD
Phase range: [-5.0, 5.0]
Center phase: 0.0
Bin velocity: 1000.0
Cutting SNe:  ['PTF12ecm', 'PTF11mty']


  return getattr(self.data, op)(other)
100%|██████████| 438/438 [00:19<00:00, 22.28it/s]


# Run the analysis

## Interpolate the spectra

In [5]:
# Choose how much of the model to refit. For a full analysis, use 'refit' mode.
# If you already ran everything with the exact same configuration, 'cached' will
# load the previous result. If anything changed, 'cached_uncertainty' can be used
# to keep the uncertainty model from the last fit (which takes a long time to fit,
# and is very stable) but refit everything else. Don't use cached_uncertainty for
# the final analysis, it is only for debugging!

# fit_type = 'refit'              # Refit everything
fit_type = 'cached'             # Use a full cached model that was previously fit.
# fit_type = 'cached_uncertainty' # Use only the uncertainty from a cached model.

In [6]:
print("Modeling maximum spectra with fit type '%s'" % fit_type)

if fit_type == 'refit':
    a.model_maximum_spectra(use_cache=False)
elif fit_type == 'cached':
    a.model_maximum_spectra(use_cache=True)
elif fit_type == 'cached_uncertainty':
    a.model_maximum_spectra(use_cached_model_uncertainty=True)
else:
    print("Unknown fit type! Can't handle")
    
print("Done!")

Modeling maximum spectra with fit type 'cached'
Using saved interpolation result
Done!


## Read between the lines

In [7]:
a.read_between_the_lines()

print("Done!")

Loaded cached stan model
Masking 24/202 targets whose interpolation uncertainty power is more than 0.100 of the intrinsic power.
Done!


## Do embedding

In [8]:
a.do_embedding()

## Apply standardization

In [9]:
a.fit_gp()
# a.apply_polynomial_standardization(1)

a.plot_gp(show_mask=False)

Fitting GP hyperparameters...
Fit result:
      fun: -60.0238198348283
 hess_inv: array([[ 4.95660432e-03, -1.57739048e-05, -6.42191574e-04,
        -3.09649052e-02],
       [-1.57739048e-05,  1.66184963e-04,  4.29648600e-05,
         1.06513050e-02],
       [-6.42191574e-04,  4.29648600e-05,  1.17599305e-03,
         4.21992810e-02],
       [-3.09649052e-02,  1.06513050e-02,  4.21992810e-02,
         3.25004757e+00]])
      jac: array([-8.58306885e-06,  4.29153442e-06,  8.10623169e-06,  0.00000000e+00])
  message: 'Optimization terminated successfully.'
     nfev: 192
      nit: 20
     njev: 32
   status: 0
  success: True
        x: array([-0.0493858 ,  0.07427882,  0.10368073,  2.59055417])
Fit NMAD:        0.08315816172162796
Fit std:         0.09993352315667943


FigureCanvasNbAgg()

# Interpolation plots

## Examples of interpolations

In [11]:
def plot_same_night(idx, save=False):
    night_flux = a.flux[a.target_map == idx]
    phases = a.salt_phases[a.target_map == idx]
    model = a.interpolation_result['maximum_flux'][idx]
    model_err = a.interpolation_result['maximum_fluxerr'][idx]
    plt.figure()
    for flux, phase in zip(night_flux, phases):
        plt.plot(a.wave, flux, label='%.2f days' % phase)
    plt.plot(a.wave, model, c='k', ls='--', label='Model')
    plt.fill_between(a.wave, model - model_err, model + model_err, facecolor='k', alpha=0.3)
    plt.legend()
    plt.title(a.targets[idx])
    plt.xlabel('Wavelength ($\AA$)')
    plt.ylabel('Flux')
    
    if save:
        plt.savefig('./figures/interpolation_model_%s.pdf' % a.targets[idx])
    
    # plt.figure()
    # shift_frac = (a.interpolation_result['shift_fluxerr'] / a.interpolation_result['shift_flux'])[a.target_map == idx]
    # plt.plot(a.wave, shift_frac.T)
    # orig_frac = (a.fluxerr / a.flux)[a.target_map == idx]
    # plt.plot(a.wave, orig_frac.T, ls='--')
    
    
    phase_slope = a.interpolation_result['phase_slope']
    phase_quadratic = a.interpolation_result['phase_quadratic']
    gray_offsets = a.interpolation_result['gray_offsets'][a.target_map == idx]
    model_diffs = a.interpolation_result['model_diffs'][a.target_map == idx]
    
    plt.figure()
    for i, (flux, phase, gray_offset, model_diff) in enumerate(zip(night_flux, phases, gray_offsets, model_diffs)):
        plt.plot(a.wave, -2.5*np.log10(flux / model), label='Data %.2f days' % phase, c='C%d' % i)
    for i, (flux, phase, gray_offset, model_diff) in enumerate(zip(night_flux, phases, gray_offsets, model_diffs)):
        plt.plot(a.wave, model_diff, label='Model %.2f days' % phase, c='C%d' % i, ls='--')
    plt.legend(ncol=2, loc=1)
    plt.title(a.targets[idx])
    plt.xlabel('Wavelength ($\AA$)')
    plt.ylabel('Difference from maximum light (mag)')
    
    if save:
        plt.savefig('./figures/interpolation_difference_%s.pdf' % a.targets[idx])
    
    plt.figure()
    for i, (flux, phase, gray_offset, model_diff) in enumerate(zip(night_flux, phases, gray_offsets, model_diffs)):
        plt.plot(a.wave, -2.5*np.log10(flux / model) - model_diff, label='Residuals %.2f days' % phase, c='C%d' % i)
    plt.legend()
    plt.title(a.targets[idx])
    plt.xlabel('Wavelength ($\AA$)')
    plt.ylabel('Interpolation residuals (mag)')
    
    if save:
        plt.savefig('./figures/interpolation_residuals_%s.pdf' % a.targets[idx])
    
from ipywidgets import interact
interact(plot_same_night, idx=(0, len(a.targets)-1))

interactive(children=(IntSlider(value=100, description='idx', max=201), Checkbox(value=False, description='sav…

<function __main__.plot_same_night(idx, save=False)>

In [12]:
plot_targets = ['PTF13ayw', 'SN2004gc']
for plot_target in plot_targets:
    target_names = np.array([i.name for i in a.targets])
    plot_idx = np.where(target_names == plot_target)[0][0]

    plot_same_night(plot_idx, save=True)

FigureCanvasNbAgg()

FigureCanvasNbAgg()

FigureCanvasNbAgg()

FigureCanvasNbAgg()

FigureCanvasNbAgg()

FigureCanvasNbAgg()

## Interpolation model

In [14]:
phase_slope = a.interpolation_result['phase_slope']
phase_quadratic = a.interpolation_result['phase_quadratic']
phase_slope_x1 = a.interpolation_result['phase_slope_x1']
phase_quadratic_x1 = a.interpolation_result['phase_quadratic_x1']

def evaluate_phase_difference(phase, x1=0):
    phase_difference = (
        phase_slope * phase
        + phase_quadratic * phase * phase
        + phase_slope_x1 * x1 * phase
        + phase_quadratic_x1 * x1 * phase * phase
    )
    
    return phase_difference

# Look at change in phase for the same x1
max_phase = a.phase_width
min_phase = -a.phase_width
num_phases = 10
phases = np.linspace(min_phase, max_phase, num_phases)

plt.figure()
norm = plt.Normalize(vmin=min_phase, vmax=max_phase)
cmap = plt.cm.Spectral_r
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array(phases)

for phase in phases:
    plt.plot(a.wave, evaluate_phase_difference(phase), c=cmap(norm(phase)))
    
plt.colorbar(sm, label='Phase (days)')

# plt.xlim(-5.2, 5.2)
plt.xlabel('Wavelength ($\AA$)')
plt.ylabel('Applied difference (mag)')
plt.gca().invert_yaxis()
plt.tight_layout()
plt.savefig('./figures/interpolation_phase_difference.pdf')


def plot_x1_difference(phase):
    # Look at change in phase for the same x1
    min_x1 = -3
    max_x1 = 2
    num_x1s = 10
    x1s = np.linspace(min_x1, max_x1, num_x1s)

    plt.figure()
    norm = plt.Normalize(vmin=min_x1, vmax=max_x1)
    cmap = plt.cm.Spectral_r
    sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
    sm.set_array(x1s)

    for x1 in x1s:
        plt.plot(a.wave, evaluate_phase_difference(phase, x1) - evaluate_phase_difference(phase, 0), c=cmap(norm(x1)))

    plt.colorbar(sm, label='SALT2 $x_1$')

    plt.xlabel('Wavelength ($\AA$)')
    plt.ylabel('Applied difference relative to $x_1=0$ (mag)')
    plt.title('Difference in interpolation for at %+d days' % phase)
    plt.gca().invert_yaxis()
    plt.tight_layout()
    plt.savefig('./figures/interpolation_x1_difference_phase_%d.pdf' % phase)
    
for phase in [-5, -3, -1, 1, 3, 5]:
    plot_x1_difference(phase)

FigureCanvasNbAgg()

FigureCanvasNbAgg()



FigureCanvasNbAgg()



FigureCanvasNbAgg()



FigureCanvasNbAgg()



FigureCanvasNbAgg()



FigureCanvasNbAgg()

## Interpolation uncertainty

In [15]:
try:
    coefs = a.interpolation_result['phase_dispersion_coefficients']
except KeyError:
    coefs = a.stan_data['phase_dispersion_coefficients']
num_phase_coefficients = len(coefs)

def evaluate_phase_dispersion(phase):
    phase_scale = np.abs((num_phase_coefficients / 2) * (phase / a.phase_width))
    full_bins = int(np.floor(phase_scale))
    remainder = phase_scale - full_bins
    
    phase_coefficients = np.zeros(num_phase_coefficients)
    
    for j in range(full_bins + 1):
        if j == full_bins:
            weight = remainder
        else:
            weight = 1
            
        if weight == 0:
            break
            
        if phase > 0:
            phase_bin = num_phase_coefficients // 2 + j
        else:
            phase_bin = num_phase_coefficients // 2 - 1 - j
            
        phase_coefficients[phase_bin] = weight
        
    fractional_dispersion = phase_coefficients.dot(coefs)
    
    # Convert to magnitudes
    mag_dispersion = frac_to_mag(fractional_dispersion)
    
    return mag_dispersion

phases = np.linspace(-a.phase_width, a.phase_width, 1 + num_phase_coefficients)

eval_coefs = np.array([evaluate_phase_dispersion(phase) for phase in phases])

# Uncertainties for different wavelengths
plt.figure()
num_wave = 10
for i in range(num_wave):
    min_wave = a.wave[0]
    max_wave = a.wave[-1]
    wave_range = max_wave - min_wave
    target_wave = min_wave + wave_range * i / (num_wave - 1)
    idx = np.argmin(np.abs(a.wave - target_wave))
    use_wave = a.wave[idx]
    color = plt.cm.rainbow((use_wave - min_wave) / wave_range)
    plt.plot(phases, eval_coefs[:, idx], label='%d $\AA$' % use_wave, c=color)
    
plt.xlim(-5.2, 5.2)
plt.xlabel('Phase (days)')
plt.ylabel('Interpolation uncertainty (mag)')
plt.legend()
plt.savefig('./figures/interpolation_uncertainty_phase.pdf')

plt.figure()
for i in range(len(phases)):
    plt.plot(a.wave, eval_coefs[i], label='%.2f days' % phases[i])
plt.legend()
plt.xlabel('Wavelength $(\AA$)')
plt.ylabel('Interpolation uncertainty (mag)')
plt.savefig('./figures/interpolation_uncertainty_wavelength.pdf')



FigureCanvasNbAgg()



FigureCanvasNbAgg()

## Model accuracy

In [16]:
max_flux = a.interpolation_result['maximum_flux']
max_fluxerr = a.interpolation_result['maximum_fluxerr']

max_magerr = frac_to_mag(max_fluxerr / max_flux)

rbtl_dispersion = frac_to_mag(a.rbtl_result['fractional_dispersion'])

def plot_uncertainties(show_rbtl=False):
    plt.figure()
    offset = 29
    
    # Make sure that we include the worst offender.
    max_loc = np.argmax(np.sum(max_magerr**2, axis=1))
    start = max_loc % offset
    
    for idx in range(start, len(a.targets), offset):
        plt.plot(a.wave, max_magerr[idx], label=a.targets[idx].name)
    plt.legend(ncol=2)
    
    plt.xlabel('Wavelength ($\AA$)')
    
    if show_rbtl:
        plt.plot(a.wave, rbtl_dispersion, label='Supernova intrinsic dispersion', c='k', lw=2, ls='--')
        plt.ylabel('Dispersion (magnitude)')
        path = './figures/interpolation_uncertainty_rbtl.pdf'
    else:
        plt.ylabel('Interpolation uncertainty (magnitude)')
        path = './figures/interpolation_uncertainty_norbtl.pdf'
        
    plt.legend(ncol=2)
    plt.tight_layout()
    plt.savefig(path)
        
plot_uncertainties(False)
plot_uncertainties(True)


plt.figure()
for idx in range(len(a.targets)):
    if idx == 0:
        label = 'Individual interpolation uncertainties'
    else:
        label = ''
    plt.plot(a.wave, max_magerr[idx], label=label, alpha=0.02, c='C0')
plt.plot(a.wave, rbtl_dispersion, label='Supernova intrinsic dispersion', lw=2, ls='--', c='k')
plt.plot(a.wave, np.median(max_magerr, axis=0), label='Median interpolation uncertainty', lw=2, ls='--', c='C0')
plt.legend()
plt.ylabel('Dispersion (magnitude)')
plt.xlabel('Wavelength ($\AA$)')
plt.tight_layout()
plt.savefig('./figures/interpolation_uncertainty_median.pdf')



FigureCanvasNbAgg()



FigureCanvasNbAgg()



FigureCanvasNbAgg()

# Reading between the lines plots

## Show spectra before and after

In [17]:
plt.figure()
plt.plot(a.wave, a.maximum_flux[a.interp_mask][0], c='C0', alpha=1, lw=0.2, label='Individual spectra')
plt.plot(a.wave, a.maximum_flux[a.interp_mask][1:].T, c='C0', alpha=1., lw=0.2)
plt.xlabel('Wavelength ($\AA$)')
plt.ylabel('Flux (arbitrary units)')
plt.legend()
plt.tight_layout()
plt.savefig('./figures/spectra_at_maximum.pdf')

plt.figure()
plt.plot(a.wave, a.scale_flux[a.interp_mask][0], c='C0', alpha=1, lw=0.2, label='Individual spectra')
plt.plot(a.wave, a.scale_flux[a.interp_mask][1:].T, c='C0', alpha=0.5, lw=0.1)
plt.plot(a.wave, a.mean_flux, c='k', lw=2, ls='--', label='Mean spectrum')
plt.legend()
plt.xlabel('Wavelength ($\AA$)')
plt.ylabel('Flux (arbitrary units)')
plt.tight_layout()
plt.savefig('./figures/scale_spectra.pdf')

plt.figure()
fractional_dispersion = a.rbtl_result['fractional_dispersion']
plt.plot(a.wave, a.mean_flux, c='k', lw=2, ls='--', label='Mean spectrum')
plt.fill_between(a.wave, a.mean_flux * (1 - fractional_dispersion), a.mean_flux * (1 + fractional_dispersion), label='Supernova intrinsic dispersion', alpha=0.5)
plt.legend()
plt.xlabel('Wavelength ($\AA$)')
plt.ylabel('Flux (arbitrary units)')
plt.tight_layout()
plt.savefig('./figures/scale_spectra.pdf')



FigureCanvasNbAgg()



FigureCanvasNbAgg()



FigureCanvasNbAgg()

## Magnitudes

In [18]:
plt.figure()

plt.scatter(a.redshifts[a.interp_mask], a.mags[a.interp_mask], s=15, c='C3', label='Supernovae rejected by cuts')
plt.scatter(a.redshifts[a.good_mag_mask], a.mags[a.good_mag_mask], s=15, c='C0', label='Supernovae passing cuts')

z_range = np.linspace(0.001, 0.09, 100)
pec_vel_disp = 0.00217 / z_range
plt.fill_between(z_range, -pec_vel_disp, pec_vel_disp, alpha=0.2, label='Peculiar velocity dispersion')
plt.axvline(0.02, lw=1, ls='--', c='k', label='Redshift cutoff')

plt.xlim(0.001, 0.09)
plt.ylim(-1, 1.5)
plt.xlabel('Redshift')
plt.ylabel('RBTL measured magnitude')
plt.legend()
plt.tight_layout()
plt.savefig('./figures/rbtl_magnitude.pdf')



FigureCanvasNbAgg()

In [19]:
plt.figure()

plt.scatter(a.redshifts[a.good_mag_mask], a.mags[a.good_mag_mask], s=15, c='C0', label='Supernovae passing cuts')
plt.xlabel('Redshift')
plt.ylabel('RBTL measured magnitude')


z_range = np.linspace(0.001, 0.09, 100)
pec_vel_disp = 0.00217 / z_range
plt.fill_between(z_range, -pec_vel_disp, pec_vel_disp, alpha=0.2, label='Peculiar velocity dispersion')

plt.legend()
plt.tight_layout()
plt.savefig('./figures/rbtl_magnitude_cut.pdf')
plt.xlim(0.01, 0.09)
plt.ylim(-0.5, 0.5)



FigureCanvasNbAgg()

(-0.5, 0.5)

In [20]:
print("Raw RBTL mag std:  %.3f mag" % np.std(a.mags[a.good_mag_mask]))
print("Raw RBTL mag NMAD: %.3f mag" % math.nmad(a.mags[a.good_mag_mask]))

Raw RBTL mag std:  0.122 mag
Raw RBTL mag NMAD: 0.111 mag


# Manifold learning plots

## Reconstruction uncertainty

In [21]:
from sklearn.preprocessing import KernelCenterer

num_show = 5

# Do an initial embedding with an absurd number of components to get the full variance.
num_ref = 100
a.do_embedding(n_components=num_ref)
variances = a.iso.kernel_pca_.lambdas_
ref_var = np.sum(variances)

plt.figure()
plt.scatter(np.arange(num_show), variances[:num_show] / ref_var, label='Explained variance of each component')
plt.axhline(0.1, label='Interpolation uncertainty cut', ls='--', c='C3')
plt.axhline(np.mean(a.interp_power_fraction[a.interp_mask]), label='Mean interpolation uncertainty of used spectra', ls='--', c='C2')
plt.ylim(0, None)
plt.xlabel('Component number')
plt.ylabel('Fraction of total variance')
plt.xticks(np.arange(num_show), np.arange(num_show) + 1)
plt.legend()
plt.tight_layout()

plt.savefig('./figures/isomap_component_variance.pdf')

  X_transformed = self.alphas_ * np.sqrt(self.lambdas_)


FigureCanvasNbAgg()

## Twin reconstruction

In [22]:
# Plot where twins and non-twins end up for different number of components.
# We also make a summary plot.
confused_fraction = []

for n_components in range(1, 6):
    a.do_embedding(n_components=n_components)
    leakage_matrix = a.plot_twin_distances()
    if n_components == 1:
        title = '1 Component + Color'
    else:
        title = '%d Components + Color' % n_components
    plt.title(title)
    plt.tight_layout()
    plt.savefig('./figures/twins_recovery_%d_components.pdf' % n_components)
    
    confused_fraction.append(leakage_matrix[3, 0] + leakage_matrix[3, 1])

plt.figure()
plt.scatter(np.arange(len(confused_fraction)) + 1, confused_fraction)
plt.ylim(0, 0.1)
plt.xlabel('Number of components (in addition to color)')
plt.ylabel('Fraction of non-twins confused as twins')
plt.savefig('./figures/twins_confusion.pdf')



FigureCanvasNbAgg()

Unnamed: 0,To Best 10% of twins,To 10-20%,To 20-50%,To Worst 50% of twins
From Best 10% of twins,0.300761,0.273477,0.420685,0.005076
From 10-20%,0.16381,0.179048,0.565079,0.092063
From 20-50%,0.111746,0.106455,0.415873,0.365926
From Worst 50% of twins,0.040122,0.045582,0.15325,0.761046




FigureCanvasNbAgg()

Unnamed: 0,To Best 10% of twins,To 10-20%,To 20-50%,To Worst 50% of twins
From Best 10% of twins,0.585025,0.296954,0.11802,0.0
From 10-20%,0.193651,0.308571,0.493333,0.004444
From 20-50%,0.055873,0.112381,0.597037,0.234709
From Worst 50% of twins,0.010792,0.011427,0.119477,0.858177




FigureCanvasNbAgg()

Unnamed: 0,To Best 10% of twins,To 10-20%,To 20-50%,To Worst 50% of twins
From Best 10% of twins,0.725888,0.241751,0.03236,0.0
From 10-20%,0.194921,0.427937,0.377143,0.0
From 20-50%,0.02455,0.104127,0.702434,0.168889
From Worst 50% of twins,0.001143,0.003555,0.096623,0.898553




FigureCanvasNbAgg()

Unnamed: 0,To Best 10% of twins,To 10-20%,To 20-50%,To Worst 50% of twins
From Best 10% of twins,0.744924,0.226523,0.028553,0.0
From 10-20%,0.194921,0.452063,0.353016,0.0
From 20-50%,0.018201,0.102222,0.723386,0.15619
From Worst 50% of twins,0.001143,0.00292,0.089639,0.906171




FigureCanvasNbAgg()

Unnamed: 0,To Best 10% of twins,To 10-20%,To 20-50%,To Worst 50% of twins
From Best 10% of twins,0.756345,0.214467,0.029188,0.0
From 10-20%,0.187937,0.463492,0.347937,0.000635
From 20-50%,0.017354,0.103915,0.722751,0.155979
From Worst 50% of twins,0.000762,0.002031,0.090909,0.906171




FigureCanvasNbAgg()

## Plot slices through the manifold

In [23]:
a.do_embedding()

def plot_slice(scan_component, closest_count=10, max_dist=1., loc=np.zeros(a.trans.shape[1] - 1)):
    loc = np.asarray(loc)
    mask = a.interp_mask

    use_trans = a.trans[mask]
    use_flux = a.scale_flux[mask]

    other_trans = np.delete(use_trans, scan_component, axis=1)
    dists = np.sqrt(np.sum((other_trans - loc)**2, axis=1))

    dist_limit = np.min([np.sort(dists)[closest_count], max_dist])
    scan_cut = dists < dist_limit
    
    scan_trans = use_trans[scan_cut, scan_component]
    
    sort_trans = np.sort(scan_trans)
    min_comp = sort_trans[0]
    max_comp = sort_trans[-1]
    # min_comp = np.min(scan_trans)
    # max_comp = np.max(scan_trans)
    # cmap = plt.cm.viridis
    cmap = plt.cm.coolwarm

    plt.figure(figsize=(8, 5))
    for spec, val in zip(use_flux[scan_cut], scan_trans):
        plt.plot(a.wave, spec, c=cmap((val - min_comp) / (max_comp - min_comp)))

    plt.xlabel('Wavelength ($\AA$)')
    plt.ylabel('Normalized flux')

    sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=min_comp, vmax=max_comp))
    sm._A = []
    plt.colorbar(sm, label='Value of Component %d' % (scan_component + 1))
    plt.title('Component %d' % (scan_component + 1))
    
    plt.savefig('./figures/component_%d_effect.pdf' % (scan_component + 1))

In [24]:
plot_slice(0, loc=[0.5, -0.5])
plot_slice(1)
plot_slice(2, loc=[0, 1])



FigureCanvasNbAgg()



FigureCanvasNbAgg()



FigureCanvasNbAgg()

## Plot steps through a components values

In [25]:
# a.do_embedding()

def plot_steps(component, num_steps=10, xlim=None):
    mask = a.interp_mask

    use_trans = a.trans[mask, component]
    use_flux = a.scale_flux[mask]
    
    min_trans = np.percentile(use_trans, 5)
    max_trans = np.percentile(use_trans, 95)
    
    bin_edges = np.linspace(min_trans, max_trans, num_steps+1)
    
    bin_edges[0] = -1e20
    bin_edges[-1] = 1e20
    
    plt.figure(figsize=(8, 5))
    
    cmap = plt.cm.coolwarm
    sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=min_trans, vmax=max_trans))
    sm._A = []

    if xlim is not None:
        wave_mask = (a.wave > xlim[0] - 50) & (a.wave < xlim[1] + 50)
    else:
        wave_mask = np.ones(len(a.wave), dtype=bool)
    
    for step in range(num_steps):
        step_mask = (use_trans >= bin_edges[step]) & (use_trans < bin_edges[step+1])
        step_trans = use_trans[step_mask]

        mean_val = np.mean(step_trans)
        step_flux = np.median(use_flux[step_mask], axis=0)
        
        if step == 0:
            label = 'Median spectra in each component bin'
        else:
            label = ''
        
        plt.plot(a.wave[wave_mask], step_flux[wave_mask], c=sm.to_rgba(mean_val), label=label)
        
    if xlim is not None:
        plt.xlim(*xlim)
        
    plt.colorbar(sm, label='Value of Component %d' % (component + 1))
    plt.title('Component %d' % (component + 1))
    
    plt.xlabel('Wavelength ($\AA$)')
    plt.ylabel('Normalized flux')
    plt.ylim(0, None)
    
    plt.legend()
    
    if xlim is None:
        plt.savefig('./figures/component_%d_steps.pdf' % (component + 1))
    else:
        plt.savefig('./figures/component_%d_steps_zoom_%d_%d.pdf' % (component + 1, xlim[0], xlim[1]))

In [26]:
plot_steps(0)
plot_steps(0, xlim=(3300, 4500))
plot_steps(0, xlim=(5200, 6700))



FigureCanvasNbAgg()



FigureCanvasNbAgg()



FigureCanvasNbAgg()

In [27]:
plot_steps(1)
plot_steps(1, xlim=(5200, 6700))



FigureCanvasNbAgg()



FigureCanvasNbAgg()

In [28]:
plot_steps(2)
plot_steps(2, xlim=(4900, 6900))



FigureCanvasNbAgg()



FigureCanvasNbAgg()

## Comparison to original twins

In [29]:
a.plot_twin_pairings()
plt.ylim(0, None)
plt.savefig('./figures/twin_dispersion.pdf')

RMS  20%: 0.10353032011245762
NMAD 20%: 0.05477729735584065




FigureCanvasNbAgg()

## Comparision to previous spectral classifications

In [46]:
a.do_blondin_plot()



FigureCanvasNbAgg()

In [31]:
a.do_component_blondin_plot()



FigureCanvasNbAgg()

# Standardization 

## GP standardization

In [33]:
# Reset to defaults in case things got messed up
a.do_embedding()
a.fit_gp()

Fitting GP hyperparameters...
Fit result:
      fun: -60.0238198348283
 hess_inv: array([[ 4.95660432e-03, -1.57739048e-05, -6.42191574e-04,
        -3.09649052e-02],
       [-1.57739048e-05,  1.66184963e-04,  4.29648600e-05,
         1.06513050e-02],
       [-6.42191574e-04,  4.29648600e-05,  1.17599305e-03,
         4.21992810e-02],
       [-3.09649052e-02,  1.06513050e-02,  4.21992810e-02,
         3.25004757e+00]])
      jac: array([-8.58306885e-06,  4.29153442e-06,  8.10623169e-06,  0.00000000e+00])
  message: 'Optimization terminated successfully.'
     nfev: 192
      nit: 20
     njev: 32
   status: 0
  success: True
        x: array([-0.0493858 ,  0.07427882,  0.10368073,  2.59055417])
Fit NMAD:        0.08315816172162796
Fit std:         0.09993352315667943


In [50]:
a.scatter(a.salt_x1)



FigureCanvasNbAgg()

In [44]:
a.scatter(a.mags, mask=a.good_mag_mask, vmin=0.2, vmax=-0.2, marker_size=60)
plt.



FigureCanvasNbAgg()

In [35]:
a.plot_gp(show_mask=False)
plt.savefig('./figures/gp_mags.pdf')



FigureCanvasNbAgg()

## Check vs phases of original spectra

In [78]:
plt.figure()
plt.scatter(a.salt_phases[a.center_mask][a.good_mag_mask], a.corr_mags[a.good_mag_mask], label='Individual observations')
math.plot_binned_mean(a.salt_phases[a.center_mask][a.good_mag_mask], a.corr_mags[a.good_mag_mask], c='C2', label='Binned mean')
plt.xlabel('Phase of closest spectrum to maximum (days)')
plt.ylabel('Hubble residual (mag)')
plt.legend()



FigureCanvasNbAgg()

<matplotlib.legend.Legend at 0x7f4c3b955908>

# SALT2 comparison

In [79]:
# Load SALT2 Hubble residuals
a.calculate_salt_hubble_residuals()

Pass 0, MB=-19.149, alpha=0.129, beta=2.985
  -> new intrinsic_dispersion=0.119
Pass 1, MB=-19.149, alpha=0.129, beta=2.987
  -> new intrinsic_dispersion=0.119
Pass 2, MB=-19.149, alpha=0.129, beta=2.987
  -> new intrinsic_dispersion=0.119
Pass 3, MB=-19.149, alpha=0.129, beta=2.987
  -> new intrinsic_dispersion=0.119
Pass 4, MB=-19.149, alpha=0.129, beta=2.987
  -> new intrinsic_dispersion=0.119
SALT2 Hubble fit: 
    MB:    -19.14936035870967
    alpha: 0.12912169654736533
    beta:  2.986549046949107
    σ_int: 0.11893666801711794
    std:   0.1506544775204048
    NMAD:  0.10991458888441766


## SALT2 colors

In [80]:
plt.figure()
plt.scatter(a.salt_color, a.colors, s=5)
plt.xlabel('SALT2 Color ($c$)')
plt.ylabel('RBTL Color ($A_V$)')
plt.tight_layout()

plt.savefig('./figures/salt2_color_comparison.pdf')



FigureCanvasNbAgg()

## SALT2 X1

In [44]:
a.scatter(a.salt_x1, a.interp_mask, label='SALT $x_1$')
plt.savefig('./figures/salt2_x1_components.pdf')



FigureCanvasNbAgg()

In [62]:
# Find the best predictor of x1
def to_min(x):
    diff = a.salt_x1 - a.trans.dot(x)
    return np.nanstd(diff[a.good_salt_mask])

res = minimize(to_min, [0, 0, 0])

norm_x = res.x / np.sqrt(np.sum(res.x**2))
print(norm_x)

plt.figure()
plt.scatter(a.trans.dot(res.x), a.salt_x1)
plt.xlabel('Rotated Isomap components')
plt.ylabel('SALT2 $x_1$')
plt.savefig('./figures/rotated_isomap_salt_x1.pdf')

[ 0.30436401  0.93165044 -0.19846913]




FigureCanvasNbAgg()

## SALT2 outliers (Type Iax)

In [63]:
# Outlier spectra
mask = (a.trans[:, 0] > 5) & (a.trans[:, 1] > 2)
print(a.targets[mask])
print(a.colors[mask])
print(a.redshifts[mask])
print(a.mags[mask])

idx2 = np.where((a.trans[:, 0] > 5) & (a.trans[:, 1] < 2))[0][0]

plt.figure()
for i in np.where(mask)[0]:
    plt.plot(a.wave, a.scale_flux[i], label=a.targets[i].name)
    
plt.plot(a.wave, a.scale_flux[idx2], c='k', ls='--', label=a.targets[idx2].name)

plt.legend()

plt.xlabel('Wavelength ($\AA$)')
plt.ylabel('Flux (arbitrary units)')

plt.savefig('./figures/type_iax_comparison.pdf')

[Target(name="LSQ12fhs") Target(name="SN2005cc") Target(name="SN2011ay")]
[0.99106963 1.14867713 1.01389495]
[0.03202726 0.00785178 0.02125125]
[ 0.24880851  1.42649857 -0.45278761]




FigureCanvasNbAgg()

## SALT2 magnitudes vs components

In [70]:
a.scatter(a.salt_hr, mask=a.good_salt_mask, vmin=-0.3, vmax=0.3, label='SALT2 Hubble residuals')
plt.savefig('./figures/salt2_hr_components.pdf')



FigureCanvasNbAgg()

In [69]:
plt.figure()

use_x = a.trans[:, 0]

mask = a.salt_mask & a.redshift_color_mask & a.interp_mask
plt.scatter(use_x[mask], a.salt_hr[mask], label='Individual supernovae')
math.plot_binned_mean(use_x[mask], a.salt_hr[mask], c='C2', lw=2, label='Binned mean')
plt.legend()

plt.xlabel('Component 1')
plt.ylabel('SALT2 Hubble residual (mag)')

plt.tight_layout()
plt.savefig('./figures/salt2_hr_component_1.pdf')

plt.figure()
plt.hist(a.salt_hr[(use_x < 2) & mask], 10, (-0.6, 0.4), alpha=0.3, color='C0', label='Component 1 < 2', density=True)
plt.hist(a.salt_hr[(use_x < 2) & mask], 10, (-0.6, 0.4), histtype='step', lw=2, color='C0', density=True)
plt.hist(a.salt_hr[(use_x > 2) & mask], 10, (-0.6, 0.4), alpha=0.3, color='C1', label='Component 1 > 2', density=True)
plt.hist(a.salt_hr[(use_x > 2) & mask], 10, (-0.6, 0.4), histtype='step', lw=2, color='C1', density=True)

plt.xlabel('SALT2 Hubble residual (mag)')
plt.ylabel('Normalized counts')
plt.legend(loc=2)

plt.tight_layout()
plt.savefig('./figures/salt2_hr_hist.pdf')



FigureCanvasNbAgg()



FigureCanvasNbAgg()

# Host galaxy correlations

In [72]:
a.load_host_data()

In [73]:
a.fit_gp()
a.plot_host_variable('lssfr', threshold=-10.8, mag_type='twins', match_masks=True)

Fitting GP hyperparameters...
Fit result:
      fun: -60.0238198348283
 hess_inv: array([[ 4.95660432e-03, -1.57739048e-05, -6.42191574e-04,
        -3.09649052e-02],
       [-1.57739048e-05,  1.66184963e-04,  4.29648600e-05,
         1.06513050e-02],
       [-6.42191574e-04,  4.29648600e-05,  1.17599305e-03,
         4.21992810e-02],
       [-3.09649052e-02,  1.06513050e-02,  4.21992810e-02,
         3.25004757e+00]])
      jac: array([-8.58306885e-06,  4.29153442e-06,  8.10623169e-06,  0.00000000e+00])
  message: 'Optimization terminated successfully.'
     nfev: 192
      nit: 20
     njev: 32
   status: 0
  success: True
        x: array([-0.0493858 ,  0.07427882,  0.10368073,  2.59055417])
Fit NMAD:        0.08315816172162796
Fit std:         0.09993352315667943
Threshold:   -10.800
Mean diff:   -0.0242 ± 0.0246 mag
Median diff: -0.0050 mag




FigureCanvasNbAgg()

In [76]:
a.fit_gp(kind='salt_raw')
a.plot_host_variable('lssfr', threshold=-10.8, mag_type='twins', match_masks=True)

Fitting GP hyperparameters...
Fit result:
      fun: -81.71301705245749
 hess_inv: array([[ 2.30903743e-02, -1.41065728e-04, -2.00234329e-04,
        -9.51596122e-05],
       [-1.41065728e-04,  3.65133161e-05,  2.71106750e-05,
         2.43247052e-05],
       [-2.00234329e-04,  2.71106750e-05,  1.86129315e-03,
         1.83327085e-05],
       [-9.51596122e-05,  2.43247052e-05,  1.83327085e-05,
         1.64348039e-05]])
      jac: array([ 9.53674316e-07, -4.76837158e-06,  9.53674316e-07,  2.86102295e-06])
  message: 'Optimization terminated successfully.'
     nfev: 510
      nit: 47
     njev: 85
   status: 0
  success: True
        x: array([ 3.10156108, -0.08807445,  0.31346387,  6.53565387])
Fit NMAD:        0.14060603350646037
Fit std:         0.15148556191266227
Threshold:   -10.800
Mean diff:   -0.0121 ± 0.0407 mag
Median diff: 0.0202 mag




FigureCanvasNbAgg()

In [77]:
a.load_host_data()
a.calculate_salt_hubble_residuals()

a.fit_gp(kind='salt_raw')
# a.fit_gp()
a.plot_gp()
# a.apply_polynomial_standardization(kind='twins')

Pass 0, MB=-19.149, alpha=0.129, beta=2.985
  -> new intrinsic_dispersion=0.119
Pass 1, MB=-19.149, alpha=0.129, beta=2.987
  -> new intrinsic_dispersion=0.119
Pass 2, MB=-19.149, alpha=0.129, beta=2.987
  -> new intrinsic_dispersion=0.119
Pass 3, MB=-19.149, alpha=0.129, beta=2.987
  -> new intrinsic_dispersion=0.119
Pass 4, MB=-19.149, alpha=0.129, beta=2.987
  -> new intrinsic_dispersion=0.119
SALT2 Hubble fit: 
    MB:    -19.14936035870967
    alpha: 0.12912169654736533
    beta:  2.986549046949107
    σ_int: 0.11893666801711794
    std:   0.1506544775204048
    NMAD:  0.10991458888441766
Fitting GP hyperparameters...
Fit result:
      fun: -81.71301705245749
 hess_inv: array([[ 2.30903743e-02, -1.41065728e-04, -2.00234329e-04,
        -9.51596122e-05],
       [-1.41065728e-04,  3.65133161e-05,  2.71106750e-05,
         2.43247052e-05],
       [-2.00234329e-04,  2.71106750e-05,  1.86129315e-03,
         1.83327085e-05],
       [-9.51596122e-05,  2.43247052e-05,  1.83327085e-05,
  



FigureCanvasNbAgg()

In [71]:
a.plot_host_variable?

[0;31mSignature:[0m
[0ma[0m[0;34m.[0m[0mplot_host_variable[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mvariable[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmask[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmag_type[0m[0;34m=[0m[0;34m'twins'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmatch_masks[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mthreshold[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Plot diagnostics for some host variable.

Valid variable names are the keys in the host_data Table that comes
from load_host_data.

mag_type selects which magnitudes to plot. The options are:
- twins: use the manifold twins magnitudes (default)
- salt: use the SALT2 corrected Hubble residuals

If match_masks is True, then the masks required for both the twins
manifold and SALT2 are applied (leaving a smaller dataset).
[0;31mFile:[0m      /hom

In [38]:
a.plot_host(threshold=-10.8)
# a.plot_host(cut=a.mag_cut & (a.colors > -0.1))
# a.plot_host()

interactive(children=(Dropdown(description='variable', options=('lmass', 'lmass_up', 'lmass_low', 'lsfr', 'lsf…

In [23]:
plt.figure()
plt.scatter(a.salt_x1[a.good_salt_mask], a.corr_mags[a.good_salt_mask])



FigureCanvasNbAgg()

<matplotlib.collections.PathCollection at 0x7f1b63092dd8>

In [25]:
plt.figure()
plt.scatter(a.host_data['lssfr'][a.salt_mask], a.salt_hr[a.salt_mask])



FigureCanvasNbAgg()

<matplotlib.collections.PathCollection at 0x7f1b6304a4e0>

In [97]:
host_results = {
    # 'Local Mass':  [0.008,  0.04,   -0.02,  0.028],
    # 'Local SFR':   [0.013,  -0.007, -0.018, 0.015],
    # 'Local SSFR':  [0.058,  0.0956, 0.029,  0.070],
    # 'Global Mass': [0.0258,  0.061, -0.0136,-0.008],
    'Local Mass':  [-0.0041,  0.0488],
    'Local SSFR':  [0.0164,  0.0744],
    'Global Mass': [-0.0121,  0.0517],
}

err = 0.025

plt.figure()
labels = []
for idx, (key, values) in enumerate(host_results.items()):
    for val_idx, value in enumerate(values):
        if val_idx >= 2:
            continue
        marker = 'oovv'[val_idx]
        # color = 'C%d' % (val_idx % 2)
        color = 'C%d' % (val_idx % 2)
        if idx == 0:
            label = ['Manifold twins', 'SALT2', 'Manifold twins color cut', 'SALT2 color cut'][val_idx]
        else:
            label = None
        gap = 0.1
        # xpos = idx - 3.*gap + 2*gap * val_idx + gap * (val_idx // 2)
        # xpos = idx - 1.5*gap + gap * val_idx
        xpos = idx - 0.5*gap + gap * val_idx
        plt.errorbar(xpos, value, err, c=color, alpha=1.)
        plt.plot(xpos, value, marker=marker, c=color, label=label)
        
    labels.append(key)

plt.xticks(np.arange(len(labels)), labels=labels)
plt.axhline(0., c='k')
plt.xlim(-0.5, 2.5)
# plt.ylim(-0.05, 0.13)

plt.ylabel('Step size (mag)')
plt.legend(loc=2)



FigureCanvasNbAgg()

<matplotlib.legend.Legend at 0x7f74a8bfd400>

In [231]:
plt.savefig('host_correlations.eps')

In [223]:
host_results = {
    # 'Local Mass':  [0.008,  0.04,   -0.02,  0.028],
    # 'Local SFR':   [0.013,  -0.007, -0.018, 0.015],
    # 'Local SSFR':  [0.058,  0.0956, 0.029,  0.070],
    # 'Global Mass': [0.0258,  0.061, -0.0136,-0.008],
    'Local Mass':  [0.008,  0.04,   -0.004, 0.051],
    'Local SFR':   [0.013,  -0.007, 0.0047, -0.031],
    'Local SSFR':  [0.058,  0.0956, 0.027, 0.065],
    'Global Mass': [0.0258,  0.061, -0.001, 0.040],
}

err = 0.025

plt.figure()
labels = []
for idx, (key, values) in enumerate(host_results.items()):
    for val_idx, value in enumerate(values):
        if val_idx >= 2:
            continue
        marker = 'oovv'[val_idx]
        # color = 'C%d' % (val_idx % 2)
        color = 'C%d' % (val_idx % 2)
        if idx == 0:
            label = ['Manifold twins', 'SALT2', 'Manifold twins color cut', 'SALT2 color cut'][val_idx]
        else:
            label = None
        gap = 0.1
        # xpos = idx - 3.*gap + 2*gap * val_idx + gap * (val_idx // 2)
        # xpos = idx - 1.5*gap + gap * val_idx
        xpos = idx - 0.5*gap + gap * val_idx
        plt.errorbar(xpos, value, err, c=color, alpha=0.3)
        plt.plot(xpos, value, marker=marker, c=color, label=label)
        
    labels.append(key)

plt.xticks(np.arange(len(labels)), labels=labels)
plt.axhline(0., c='k')
# plt.ylim(-0.05, 0.13)

plt.ylabel('Step size (mag)')
plt.legend(loc=2)

<matplotlib.legend.Legend at 0x7f22a7efd4a8>

In [28]:
plt.figure()

c1 = (a.host_data['lssfr'][a.train_cut[a.host_mask]] > -11)[:, 0]
c2 = (a.corr_mags[a.host_mask[a.train_cut]] > 0.05)
cc = c1 + 2*c2

x = a.trans[:, 0][a.train_cut & a.host_mask]
y = a.trans[:, 1][a.train_cut & a.host_mask]
plt.scatter(x, y, c=cc, cmap=plt.cm.jet)
plt.colorbar()



FigureCanvasNbAgg()

<matplotlib.colorbar.Colorbar at 0x7fa0bdacec88>

In [32]:
a.host_mask & a.train_cut

array([False,  True, False,  True, False, False, False, False, False,
       False, False, False,  True, False, False, False, False, False,
       False, False, False, False, False,  True,  True,  True, False,
        True, False,  True, False, False,  True, False, False, False,
        True, False, False, False, False, False, False,  True,  True,
        True,  True, False,  True,  True, False, False, False, False,
        True,  True, False, False,  True, False, False,  True, False,
       False, False, False,  True, False, False, False, False, False,
       False,  True,  True, False, False,  True, False, False,  True,
        True, False, False, False, False, False, False,  True, False,
        True, False,  True, False, False,  True, False, False, False,
       False, False,  True,  True,  True, False,  True,  True, False,
        True,  True,  True, False,  True, False, False,  True,  True,
       False, False,  True,  True, False, False,  True,  True, False,
       False, False,

In [140]:
plt.figure()

lssfr = a.host_data['lssfr'][a.train_cut[a.host_mask]]
mag1 = a.salt_hr[a.host_mask & a.train_cut]
mag2 = a.corr_mags[a.host_mask[a.train_cut]]

for i in range(len(lssfr)):
    bigger = mag1[i] > mag2[i]
    if bigger:
        c = 'C3'
    else:
        c = 'C0'
    
    plt.arrow(lssfr[i], mag1[i], 0, mag2[i] - mag1[i], width=0.01, head_width=0.1, head_length=0.03, edgecolor=c, facecolor=c)
    
plt.xlim(np.min(lssfr) - 0.5, np.max(lssfr) + 0.5)
plt.ylim(-0.4, 0.4)

plt.figure()
plt.scatter(lssfr, mag1)

plt.figure()
plt.scatter(lssfr, mag2)



FigureCanvasNbAgg()



FigureCanvasNbAgg()



FigureCanvasNbAgg()

<matplotlib.collections.PathCollection at 0x7f56c1a7bb38>

# Modeling the full timeseries

In [30]:
all_phases = []
all_trans = []
all_fluxes = []
all_spectra = []

for idx in tqdm.tqdm(range(len(a.targets))):
    target = a.targets[idx]
    # scale = a.applied_scale[idx]
    # scale = a.scale_flux[idx] / a.interpolation_result['target_flux'][idx]
    scale = 1 / a.model_scales[idx]
    trans = a.trans[idx]

    for raw_spectrum in target.spectra:
        spectrum = raw_spectrum.bin_by_velocity(1000).apply_scale(scale)
        
        all_phases.append(spectrum.phase)
        all_fluxes.append(spectrum.flux)
        all_trans.append(trans)
        all_spectra.append(spectrum)
        
all_phases = np.array(all_phases)
all_fluxes = np.array(all_fluxes)
all_trans = np.array(all_trans)
all_spectra = np.array(all_spectra)

100%|██████████| 191/191 [02:17<00:00,  2.30it/s]


In [31]:
plt.figure()
plt.scatter(all_phases, all_fluxes[:, 20], c=all_trans[:, 0])

FigureCanvasNbAgg()

<matplotlib.collections.PathCollection at 0x7fbaaaea0fd0>

In [32]:
wave_idx = 50

a.scatter(a.scale_flux[:, wave_idx], label='Flux')
plt.title('%d $\AA$' % a.wave[wave_idx])

FigureCanvasNbAgg()

Text(0.5, 1.0, '3903 $\\AA$')

In [33]:
interp_x = np.hstack([all_phases[:, None], all_trans])

In [34]:
# Take ratios to the median spectrum and see how components affect things.
def do_plot(trans_idx, wave_idx):
    order = np.argsort(all_phases)
    x = all_trans[order, trans_idx]
    y = all_fluxes[order, wave_idx]

    mean_func = math.windowed_median(y)
    scale_y = y / mean_func

    plt.figure()
    plt.scatter(x, scale_y, s=10, alpha=0.2)
    math.plot_binned_median(x, scale_y, mode='error')
    plt.ylim(0, 2)
    # plt.ylim(-1, 2)
    # plt.xlim(-30, 50)
    
from ipywidgets import interact
interact(do_plot, trans_idx=(0, all_trans.shape[1]-1), wave_idx=(0, all_fluxes.shape[1]-1))

interactive(children=(IntSlider(value=1, description='trans_idx', max=2), IntSlider(value=143, description='wa…

<function __main__.do_plot(trans_idx, wave_idx)>

## Spline model

In [35]:
from scipy.interpolate import BSpline

min_phase = -15
max_phase = 60

t = np.arange(min_phase, max_phase, 5)
t = np.hstack([t[0], t[0], t, t[-1], t[-1]])
k = 3
len_c = len(t) - k - 1

def super_spline(all_c, phases, trans):
    use_c = all_c

    center_spl = BSpline(t, use_c[0], k)
    x0_spl = BSpline(t, use_c[1], k)
    x1_spl = BSpline(t, use_c[2], k)
    x2_spl = BSpline(t, use_c[3], k)
    x00_spl = BSpline(t, use_c[4], k)
    x01_spl = BSpline(t, use_c[5], k)
    x02_spl = BSpline(t, use_c[6], k)
    x11_spl = BSpline(t, use_c[7], k)
    x12_spl = BSpline(t, use_c[8], k)
    x22_spl = BSpline(t, use_c[9], k)
    
    c0 = trans[:, 0] / 10.
    c1 = trans[:, 1] / 10.
    c2 = trans[:, 2] / 10.
    
    model = (
        center_spl(phases) +
        c0 * x0_spl(phases) +
        c1 * x1_spl(phases) +
        c2 * x2_spl(phases) +
        c0 * c0 * x00_spl(phases) +
        c0 * c1 * x01_spl(phases) +
        c0 * c2 * x02_spl(phases) +
        c1 * c1 * x11_spl(phases) +
        c1 * c2 * x12_spl(phases) +
        c2 * c2 * x22_spl(phases)
    )
    
    # print(np.max(mag_model), np.min(mag_model))
    # model = 10**(-0.4*mag_model)
    
    return model

In [36]:
def build_spline_basis(phases, trans):
    basis = []
    
    for i in range(10):
        for j in range(len_c):
            impulse_coeffs = np.zeros((10, len_c))
            impulse_coeffs[i, j] = 1.
            spl = super_spline(impulse_coeffs, use_phases, use_trans)
            basis.append(spl)
            
    basis = np.array(basis)
    
    return basis

In [37]:
cut = (all_phases > min_phase) & (all_phases < max_phase)
use_phases = all_phases[cut]
use_trans = all_trans[cut]
basis = build_spline_basis(use_phases, use_trans)
print(np.max(basis), np.min(basis))

6.715901288398874 -9.521467539915262


In [38]:
all_spl_c = []
for wave_idx in range(len(a.wave)):
    use_flux = all_fluxes[cut, wave_idx]
    coef, residuals, rank, s = np.linalg.lstsq(basis.T, use_flux, rcond=None)
    spl_c = coef.reshape((10, len_c))
    all_spl_c.append(spl_c)
all_spl_c = np.array(all_spl_c)

In [39]:
np.max(a.trans)

8.41951815955247

In [40]:
vary_idx = 1
center_loc = [0, 0, 0.]

center_loc = np.array(center_loc, dtype=float)
deltas = all_trans - center_loc
diff = np.delete(deltas, vary_idx, axis=1)
dist = np.sqrt(np.sum(diff**2, axis=1))
close_cut = dist < 1.

close_trans = all_trans[close_cut, vary_idx]

min_vary = np.min(close_trans)
max_vary = np.max(close_trans)
diff = max_vary - min_vary
min_vary -= 0.1*diff
max_vary += 0.1*diff


plt.figure()
for plot_loc in np.linspace(min_vary, max_vary, 10):
    plot_t = np.linspace(-15, 60, 1000)
    loc = center_loc.copy()
    loc[vary_idx] = plot_loc
    pred_trans = np.array([loc] * len(plot_t))
    
    model = super_spline(spl_c, plot_t, pred_trans)
    
    plt.plot(
        plot_t, model,
        c=plt.cm.coolwarm((plot_loc - min_vary) / (max_vary - min_vary)),
    )

plt.scatter(all_phases[close_cut], all_fluxes[close_cut, wave_idx], c=all_trans[close_cut, vary_idx],
            vmin=min_vary, vmax=max_vary, cmap=plt.cm.coolwarm)
plt.xlim(-20, 40)
# plt.ylim(-5, 25)

FigureCanvasNbAgg()

(-20, 40)

In [41]:
# Residuals
# phases = np.array([i.phase for i in a.spectra])
phases = np.zeros(len(a.targets))
a.scatter(a.scale_flux[:, wave_idx], cmap=plt.cm.coolwarm)
residuals = a.scale_flux[:, wave_idx] - super_spline(spl_c, phases, a.trans)
scale = np.max(np.abs(residuals))
a.scatter(residuals, cmap=plt.cm.coolwarm, vmin=-scale, vmax=scale)

plt.figure()
plt.scatter(a.trans[:, 2], residuals)



FigureCanvasNbAgg()



FigureCanvasNbAgg()



FigureCanvasNbAgg()

<matplotlib.collections.PathCollection at 0x7fbb41114a58>

In [42]:
plt.figure()
plt.scatter(residuals[a.train_cut & a.mag_cut], a.corr_mags[a.mag_cut[a.train_cut]])



FigureCanvasNbAgg()

<matplotlib.collections.PathCollection at 0x7fbb4105a438>

In [49]:
plt.figure()
plt.scatter(all_trans[:, 1], all_trans[:, 2])



FigureCanvasNbAgg()

<matplotlib.collections.PathCollection at 0x7fbaaa7b3b38>

In [53]:
# Reproduced spectra
for vary_idx in range(3):
    # center_loc = [-1.4, -0.7, 0.]
    # center_loc = [-0.5, 0.5, 0]
    center_loc = [0, 0.5, 0]
    phase = 0

    center_loc = np.array(center_loc, dtype=float)
    deltas = all_trans - center_loc
    diff = np.delete(deltas, vary_idx, axis=1)
    dist = np.sqrt(np.sum(diff**2, axis=1))
    close_cut = dist < 1.

    close_trans = all_trans[close_cut, vary_idx]

    # min_vary = np.clip(np.min(close_trans), -5, -2)
    # max_vary = np.clip(np.max(close_trans), 2, 5)
    min_vary = np.min(close_trans)
    max_vary = np.max(close_trans)
    diff = max_vary - min_vary
    min_vary += 0.1*diff
    max_vary -= 0.1*diff
    vary_loc = np.linspace(min_vary, max_vary, 10)

    print(min_vary, max_vary)

    plt.figure(figsize=(8, 6))
    models = []
    for wave_idx in range(len(a.wave)):
        plot_t = np.ones(len(vary_loc)) * phase
        plot_loc = np.tile(center_loc, (10, 1))
        plot_loc[:, vary_idx] = vary_loc

        model = super_spline(all_spl_c[wave_idx], plot_t, plot_loc)

        models.append(model)

    models = np.array(models)
    for model, model_loc in zip(models.T, vary_loc):
        plt.plot(a.wave, model, c=plt.cm.coolwarm((model_loc - min_vary) / (max_vary - min_vary)))

    plt.xlabel('Wavelength ($\AA$)')
    plt.ylabel('Flux')
    plt.title('Component %d' % vary_idx)


        # plt.plot(
            # plot_t, model,
            # c=plt.cm.coolwarm((plot_loc - min_vary) / (max_vary - min_vary)),
        # )

    # plt.scatter(all_phases[close_cut], all_fluxes[close_cut, wave_idx], c=all_trans[close_cut, vary_idx],
                # vmin=min_vary, vmax=max_vary, cmap=plt.cm.coolwarm)
    # plt.xlim(-20, 40)
    # plt.ylim(-5, 25)

-3.446814751127695 3.675216663579458




FigureCanvasNbAgg()

-3.356471377253979 1.793067745943505




FigureCanvasNbAgg()

-1.0119235169273137 3.1224048892665506




FigureCanvasNbAgg()

In [224]:
from idrtools.tools import snf_filters
from idrtools.spectrum import _get_snf_magnitude

lc_phases = np.linspace(min_phase, max_phase, 100)

def calc_lc(band, loc):
    models = []
    min_wave, max_wave = snf_filters[band]

    min_wave_idx = np.min(np.where(a.wave > min_wave))
    max_wave_idx = np.max(np.where(a.wave < max_wave))
    for wave_idx in range(min_wave_idx, max_wave_idx+1):
        plot_loc = np.tile(loc, (len(lc_phases), 1))
        model = super_spline(all_spl_c[wave_idx], lc_phases, plot_loc)
        models.append(model)

    models = np.array(models)
    bandmag = []
    for model in models.T:
        bandmag.append(_get_snf_magnitude(a.wave[min_wave_idx:max_wave_idx+1], model, band))
    bandmag = np.array(bandmag)
    # bandflux = np.sum(models, axis=0) / (max_wave - min_wave)
    # bandmag = 2.5*np.log10(bandflux)
    
    return bandmag

In [227]:
colors = {'u': 'C4', 'b': 'C0', 'v': 'C2', 'r': 'C3'}

def plot_lc(idx):
    loc = a.trans[idx]

    target = a.targets[idx]
    scale = 1 / a.model_scales[idx]
    
    p = []
    m = []
    e = []
    c = []
    for raw_spectrum in target.spectra:
        spectrum = raw_spectrum.bin_by_velocity(1000).apply_scale(scale)
        for band in ['u', 'b', 'v', 'r']:
            p.append(spectrum.phase)
            mag, err = spectrum.get_snf_magnitude(band, calculate_error=True)
            m.append(mag)
            e.append(err)
            c.append(colors[band])
            
    print(target)
    print(target.subset)
    print(a.trans[idx])
            
    plt.figure()
    plt.scatter(p, m, c=c)
    plt.errorbar(p, m, e, c=c)
        
    for band in ['u', 'b', 'v', 'r']:
        plt.plot(lc_phases, calc_lc(band, loc), c=colors[band])

    plt.xlim(min_phase, max_phase)
    plt.ylim(-15, -20)
    plt.xlabel('Phase')
    plt.ylabel('Mag')
        
interact(plot_lc, idx=(0, len(a.targets)-1))
    
    
# loc = [0, 0, 0]

# u = calc_lc('u', loc)
# b = calc_lc('b', loc)
# v = calc_lc('v', loc)
# r = calc_lc('r', loc)

# plt.figure()
# plt.plot(lc_phases, u, c='C4', label='u')
# plt.plot(lc_phases, b, c='C0', label='b')
# plt.plot(lc_phases, v, c='C2', label='v')
# plt.plot(lc_phases, r, c='C3', label='r')

# plt.legend()


interactive(children=(IntSlider(value=95, description='idx', max=190), Output()), _dom_classes=('widget-intera…

<function __main__.plot_lc(idx)>

In [213]:
a.spectra[13]

ModifiedSpectrum(target="PTF09dnp", name="PTF09dnp_M001293")

In [214]:
dists = np.sum((a.trans - a.trans[13])**2, axis=1)

In [215]:
np.argsort(dists)

array([ 13,  72, 111, 119,  49, 129,  59,   9, 140,  75,  34,  50,  45,
        92, 102, 138, 113,  38,   3,  37, 106,  27,  87,  17, 154, 131,
        51,  64,  94,  70,  54, 150, 151,  77,  18,  80, 110,  48, 133,
       146, 114, 103,  55,  63,  97,  53, 155, 153,  86,  24, 132,  82,
        21,  57, 144,  79,  28,   5,  66, 125,  41,   4, 130,  56,  58,
       123,  19,  96, 147,  84,   8, 109, 127,   1, 139,  14,   0,  67,
       117,  42, 108, 120,  35, 115,  90,  81, 118, 122,  85,  89,  76,
        62, 101,  36,  10,  93,  46,  12,  98,  99,  30, 141,  65,  39,
        32,  22, 116,   2,  60, 137,  15, 128,   7, 112, 136,  83, 121,
        29,  78,  25,  52, 143,  61,  71,  47,  69,  16,  33, 124,  23,
        26,  11, 126,  40,   6,  68, 149,  95, 104,  73, 152,  43,  31,
       145, 107,  20, 148,  88,  91, 142, 134, 100, 135,  44, 105,  74])

In [248]:
vary_idx = 0
band = 'v'

# center_loc = [-1.5, -2.1, 0.]
center_loc = [0, 1, -1.]

center_loc = np.array(center_loc, dtype=float)
deltas = all_trans - center_loc
diff = np.delete(deltas, vary_idx, axis=1)
dist = np.sqrt(np.sum(diff**2, axis=1))
close_cut = dist < 0.5

close_trans = all_trans[close_cut, vary_idx]

if len(close_trans) > 0:
    min_vary = np.clip(np.min(close_trans), -5, -2)
    max_vary = np.clip(np.max(close_trans), 2, 5)
else:
    print("WARNING: NO DATA IN RANGE")
    min_vary = -2
    max_vary = 2
    
diff = max_vary - min_vary
min_vary -= 0.1*diff
max_vary += 0.1*diff
vary_loc = np.linspace(min_vary, max_vary, 10)

print(min_vary, max_vary)

plt.figure()

for plot_loc in vary_loc:
    use_loc = center_loc.copy()
    use_loc[vary_idx] = plot_loc
    mag = calc_lc(band, use_loc)
    
    plt.plot(
        lc_phases, mag,
        c=plt.cm.coolwarm((plot_loc - min_vary) / (max_vary - min_vary)),
        zorder=-1,
    )
    
# Overplot real data
plot_phases = []
plot_mags = []
plot_colors = []
for idx in np.where(close_cut)[0]:
    phase = all_phases[idx]
    spectrum = all_spectra[idx]
    val = all_trans[idx, vary_idx]
    
    mag = spectrum.get_snf_magnitude(band)
    
    color = plt.cm.coolwarm((val - min_vary) / (max_vary - min_vary))
    
    plot_phases.append(phase)
    plot_mags.append(mag)
    plot_colors.append(color)
    
plt.scatter(plot_phases, plot_mags, c=plot_colors, s=30, edgecolors='k')

plt.ylim(np.nanpercentile(plot_mags, 90) + 0.5, np.nanpercentile(plot_mags, 10) - 0.5)
plt.xlim(min_phase, max_phase)
# plt.ylim(-4, 0)

# plt.legend()

-2.7 5.7


(-15, 60)

In [246]:
# Reproduced spectra
vary_idx = 1
center_loc = [0., 0., 0.]
phase = 0

center_loc = np.array(center_loc, dtype=float)

all_model = []

for wave_idx in range(len(a.wave)):
    loc = center_loc.copy()
    loc[vary_idx] = plot_loc
    model = super_spline(all_spl_c[wave_idx], phase, loc)
    all_model.append(model)
    
plt.figure()
plt.plot(a.wave, all_model)

vary_idx = 1
center_loc = [0., 0., 0.]

center_loc = np.array(center_loc, dtype=float)
deltas = all_trans - center_loc
diff = np.delete(deltas, vary_idx, axis=1)
dist = np.sqrt(np.sum(diff**2, axis=1))
close_cut = dist < 1.0

close_trans = all_trans[close_cut, vary_idx]

min_vary = np.min(close_trans)
max_vary = np.max(close_trans)
diff = max_vary - min_vary
min_vary -= 0.1*diff
max_vary += 0.1*diff


plt.figure()
for plot_loc in np.linspace(min_vary, max_vary, 10):
    plot_t = np.linspace(-15, 60, 1000)
    loc = center_loc.copy()
    loc[vary_idx] = plot_loc
    pred_trans = np.array([loc] * len(plot_t))
    
    model = super_spline(spl_c, plot_t, pred_trans)
    
    plt.plot(
        plot_t, model,
        c=plt.cm.coolwarm((plot_loc - min_vary) / (max_vary - min_vary)),
    )

plt.scatter(all_phases[close_cut], all_fluxes[close_cut, wave_idx], c=all_trans[close_cut, vary_idx],
            vmin=min_vary, vmax=max_vary, cmap=plt.cm.coolwarm)
plt.xlim(-20, 40)
# plt.ylim(-5, 25)

ValueError: setting an array element with a sequence.

In [159]:
a.spectra[(np.abs(a.trans[:, 0] - 0.73) < 0.1) & (np.abs(a.trans[:, 1] - 1.8) < 0.1)]

array([], dtype=object)

## GP model

In [223]:
import george
from george import kernels

def build_gp(x):
    kernel = x[0]**2 * kernels.Matern32Kernel([x[1]**2, x[2]**2, x[2]**2], ndim=3)
    gp = george.GP(kernel)
    yerr = x[3] * np.ones(all_fluxes.shape[0])
    gp.compute(interp_x, yerr)
    
    return gp

def to_min(x):
    gp = build_gp(x)
    
    nll = -gp.log_likelihood(all_fluxes[:, 20])
    # gnll = - gp.grad_log_likelihood(all_fluxes[:, 20])

    print(nll, x)
    return nll#, gnll

res = minimize(to_min, [10, 5, 5, 1])#, jac=True)

gp = build_gp([20, 5, 5, 1])

ValueError: Dimension mismatch

In [395]:
gp = build_gp([-7.3473681, 28.69758488,-2.5888035,  0.65813375])

In [396]:
res

      fun: 3484.2829979161615
 hess_inv: array([[ 2.01096038e+00, -5.12541530e+00,  5.82737148e-01,
        -3.63339994e-03],
       [-5.12541530e+00,  1.40694362e+01, -1.51420966e+00,
         1.44636561e-02],
       [ 5.82737148e-01, -1.51420966e+00,  1.78785616e-01,
        -2.02041828e-03],
       [-3.63339994e-03,  1.44636561e-02, -2.02041828e-03,
         2.69526648e-04]])
      jac: array([ 0.003479  ,  0.00067139, -0.00296021,  0.02093506])
  message: 'Desired error not necessarily achieved due to precision loss.'
     nfev: 618
      nit: 23
     njev: 101
   status: 2
  success: False
        x: array([-7.3473681 , 28.69758487, -2.5888035 ,  0.65813375])

In [397]:
pred_phase = 0
wave_idx = 110

min_x = np.min(a.trans[:, 0]) - 0.5
max_x = np.max(a.trans[:, 0]) + 0.5
min_y = np.min(a.trans[:, 1]) - 0.5
max_y = np.max(a.trans[:, 1]) + 0.5

num_points = 50

plot_x, plot_y = np.meshgrid(np.linspace(min_x, max_x, num_points),
                             np.linspace(min_y, max_y, num_points))

flat_plot_x = plot_x.flatten()
flat_plot_y = plot_y.flatten()
flat_phases = pred_phase * np.ones(len(flat_plot_x))

plot_coords = np.array([flat_phases, flat_plot_x, flat_plot_y]).T

pred = gp.predict(all_fluxes[:, wave_idx], plot_coords, return_cov=False)
pred = pred.reshape(plot_x.shape)

cut = (all_phases - pred_phase) < 2.5
scatter_fluxes = all_fluxes[cut, wave_idx]

vmin = np.percentile(scatter_fluxes, 5)
vmax = np.percentile(scatter_fluxes, 95)

plt.figure()
plt.imshow(pred[::-1], extent=(min_x, max_x, min_y, max_y), vmin=vmin, vmax=vmax)

plt.scatter(all_trans[cut, 0], all_trans[cut, 1], c=scatter_fluxes,
            edgecolors='k', vmin=vmin, vmax=vmax)


plt.colorbar()



<matplotlib.colorbar.Colorbar at 0x7f55bab5eeb8>

In [297]:
interp_x.shape

(2474, 3)

In [298]:
all_fluxes.shape

(2474, 144)

In [301]:
pred_x.shape

(1, 3)

In [302]:
plot_coords.shape

(3, 2500)

In [300]:
for transval in np.linspace(-3, 3, 10):
    pred_phases = np.arange(0, 1)
    loc = [0, transval]
    pred_trans = np.array([loc] * len(pred_phases))
    pred_x = np.hstack([pred_phases[:, None], pred_trans])

    all_vals = []
    for i in range(all_fluxes.shape[1]):
        vals, cov = gp.predict(all_fluxes[:, i], pred_x)
        all_vals.append(vals)

    all_vals = np.array(all_vals)
    plt.plot(all_vals, label=loc)
    plt.legend()

KeyboardInterrupt: 

In [290]:
plt.figure()



<Figure size 640x480 with 0 Axes>

In [204]:
plt.figure()
all_vals = np.array(all_vals)
plt.plot(all_vals)
# plt.plot(pred_phases, all_vals)

[<matplotlib.lines.Line2D at 0x7f55c153b550>]

In [215]:
x = np.arange(100)
y1 = (x < 50).astype(float)
y2 = (x >= 50).astype(float)

in_coefs = np.random.normal(size=(1000, 2))
in_coefs[:, 1] *= 0.5
in_funcs = in_coefs.dot([y1, y2])

in_funcs += np.random.normal(0, 0.1, size=in_funcs.shape)

iso = Isomap(n_components=2, n_neighbors=10)
trans = iso.fit_transform(in_funcs)

In [218]:
plt.figure()
plt.scatter(in_coefs[:, 1], trans[:, 1])



FigureCanvasNbAgg()

<matplotlib.collections.PathCollection at 0x7fef79b4a358>

In [221]:
iso.kernel_pca_.lambdas_

array([141058.61555927,  45836.24781285])

In [223]:
print(np.std(in_coefs, axis=0))
print(np.std(trans, axis=0))

[1.01873123 0.50343313]
[11.87680999  6.77024725]
