# eidein

Interactive tool for dimensionality reduction of astronomical data and their visualisation.

In [1]:
%matplotlib widget

from astropy.modeling import models, fitting
import h5py
import ipywidgets
import matplotlib.pyplot as plt
import numpy as np

from eidein import Eidein

## Data

In [2]:
SIZE = 2048

In [3]:
# TODO should I take only those with high S/N?
with h5py.File("data/dr16q_superset.hdf5", "r") as datafile:
    print(datafile.keys())
    n = datafile["id"].shape[0]
    mi = datafile["mutual_information"][:]
    idx = np.sort(np.argsort(mi)[-2048:])
    mi = mi[idx]
    ids = datafile["id"][idx]
    X = datafile["X"][idx]
    z_pred = datafile["z_pred"][idx]
ids.dtype, X.dtype, mi.dtype, z_pred.dtype

<KeysViewHDF5 ['X', 'entropy', 'entropy_std', 'id', 'mutual_information', 'pipe_corr_10k', 'sn_median_all', 'source_z', 'variation_ratio', 'z', 'z_10k', 'z_pca', 'z_pipe', 'z_pred', 'z_pred_std', 'z_qn', 'z_vi', 'zs_pred']>


(dtype('int32'), dtype('float32'), dtype('float64'), dtype('float32'))

In [4]:
fig, ax = plt.subplots()
ax.hist(mi)
ax.set_xlabel("Mutual Information")
ax.set_ylabel("Count")

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0, 0.5, 'Count')

In [5]:
LOGLAMMIN, LOGLAMMAX = 3.5832, 3.9583
N_FEATURES = 3752
WAVE = np.power(10, np.linspace(LOGLAMMIN, LOGLAMMAX, N_FEATURES))

def plot_sdss_spectrum(ax, identifier, flux, y, label, wave=WAVE):
    plate, mjd, fiberid = identifier
    label_str = "spec-{:04d}-{}-{:04d}.fits\n$\hat{{z}} = {:.2f}$\n$z = {:.2f}$".format(
        plate, mjd, fiberid, y, label)
    ax.plot(wave, flux, label=label_str)
    ax.legend()
    ax.set_xlabel("Observed Wavelength [Å]")
    ax.set_ylabel("Flux [10$^{-17}$ erg cm$^{-2}$ s$^{-1}$ Å$^{-1}$]")

In [12]:
lines = [
    (3727.092, "O\,\\textsc{\lowercase{II}}"),
    (4102.89, "H\,$\\Updelta$"),
    (4341.68, "H\,$\\upgamma$"),
    (4862.68, "H\,$\\upbeta$"),
    (5008.240, "O\,\\textsc{\lowercase{III}}"),
    (6564.61, "H\,$\\upalpha$")]
lines

[(3727.092, 'O\\,\\textsc{\\lowercase{II}}'),
 (4102.89, 'H\\,$\\Updelta$'),
 (4341.68, 'H\\,$\\upgamma$'),
 (4862.68, 'H\\,$\\upbeta$'),
 (5008.24, 'O\\,\\textsc{\\lowercase{III}}'),
 (6564.61, 'H\\,$\\upalpha$')]

In [6]:
rnd_i = np.random.randint(SIZE)
fig, ax = plt.subplots()
plot_sdss_spectrum(ax, ids[rnd_i], X[rnd_i], z_pred[rnd_i], 0.0)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [7]:
widget_label = ipywidgets.FloatText(
    description="redshift:",
    step=0.01
)
display(widget_label)

FloatText(value=0.0, description='redshift:', step=0.01)

In [8]:
eidein = Eidein(ids, X, mi, plot_sdss_spectrum, widget_label)

In [9]:
display(eidein)

Eidein(children=(Tab(children=(interactive(children=(Checkbox(value=False, description='whiten'), Dropdown(des…

In [10]:
eidein.ids

array([[  266, 51630,   161],
       [  268, 51633,   160],
       [  270, 51909,    83],
       ...,
       [ 9611, 58107,   527],
       [ 9611, 58107,   934],
       [ 9611, 58136,   932]], dtype=int32)