In [None]:
from idrtools import Dataset
import numpy as np
import os
from sklearn.manifold import TSNE, Isomap
import skdim
import matplotlib.pyplot as plt
import specind
from astropy import table
import matplotlib as mpl
import scienceplots
import importlib

In [None]:
# plt.style.use('science')

In [None]:
def load_all_spectra(spectra_path):
    spectra = []
    for spectrum_f_name in os.scandir(spectra_path):       
        spectra.append(
            np.loadtxt(spectrum_f_name, skiprows=1)[None, ...]
        )
    return np.concatenate(spectra, axis=0)
    
ALL_SPECTRA = load_all_spectra("../SNfactory/spectra")

In [61]:
print(ALL_SPECTRA.shape)

(203, 288, 3)


In [None]:
def filter_spectra(spectra, uncertainties):
    intrinsic_variance = np.var(spectra, axis=0)
    argmax_uncertainties = np.argmax(uncertainties, axis=1)
    max_intrinsic_sd = intrinsic_variance[argmax_uncertainties] ** 0.5
    max_uncertainties = np.max(uncertainties, axis=1)
    print((max_uncertainties < max_intrinsic_sd * 0.1).sum())
    
filter_spectra(ALL_SPECTRA[:, :, 1], ALL_SPECTRA[:, :, 2])

In [None]:
def get_distance_matrix(spectra):
    mean_spectrum = np.mean(spectra, axis=0)
    spectra_rep_1 = np.tile(spectra, (spectra.shape[0], 1))
    spectra_rep_2 = np.tile(spectra, (1, spectra.shape[0])).reshape(-1, spectra.shape[1])
    distances = np.linalg.norm((spectra_rep_1 - spectra_rep_2) / mean_spectrum, axis=1)
    distances = distances.reshape(spectra.shape[0], spectra.shape[0])
    return distances
    
SPECTRAL_DISTANCES = get_distance_matrix(ALL_SPECTRA[:, :, 1])

In [None]:
def calculate_spectral_indicators(spectra, spectra_err, wavelengths):
    """Calculate spectral indicators for all of the features"""
    spectral_indicators = []

    for idx in range(len(spectra)):
        spec = specind.Spectrum(
            wavelengths, spectra[idx], spectra_err[idx]**2
        )
        indicators = spec.get_spin_dict()
        spectral_indicators.append(indicators)

    spectral_indicators = table.Table(spectral_indicators, masked=True)

    # Figure out Branch classifications
    all_si6355 = spectral_indicators["EWSiII6355"]
    all_si5972 = spectral_indicators["EWSiII5972"]

    branch_classifications = []

    for si6355, si5972 in zip(all_si6355, all_si5972):
        if si5972 >= 30:
            branch_classifications.append("Cool")
        elif (si5972 < 30) & (si6355 < 70):
            branch_classifications.append("Shallow Silicon")
        elif (si5972 < 30) & (si6355 >= 70) & (si6355 < 100):
            branch_classifications.append("Core Normal")
        elif (si5972 < 30) & (si6355 >= 100):
            branch_classifications.append("Broad Line")

    spectral_indicators['branch_classification'] = branch_classifications

    for colname in spectral_indicators.colnames:
        # Mask out indicators that we shouldn't be using.
        # spectral_indicators[colname].mask = ~self.uncertainty_mask

        if 'branch' not in colname:
            spectral_indicators.rename_column(colname, f'spectrum_{colname}')

    return spectral_indicators

SPECTRAL_INDICATORS = calculate_spectral_indicators(ALL_SPECTRA[:, :, 1], ALL_SPECTRA[:, :, 2], ALL_SPECTRA[0, :, 0])

In [None]:
BRANCH_CLASSIFICATION_COLORMAP = {
    "Core Normal": "tab:gray",
    "Shallow Silicon": "tab:orange",
    "Cool": "tab:blue",
    "Broad Line": "tab:green"
}
# SPECTRAL_INDICATORS['branch_classification']

In [49]:
def t_sne_embedding(spectra, n_components=3, perplexity=30, random_state=42, init='pca'):
    mean_spectrum = np.mean(spectra, axis=0)
    def spectral_distance(x, y):
        return np.linalg.norm((x - y) / mean_spectrum)
    t_sne = TSNE(
        n_components=n_components, method='exact', random_state=random_state, verbose=1, n_jobs=6, init=init, n_iter=10000,
        perplexity=perplexity, metric=spectral_distance
    )
    projection = t_sne.fit_transform(spectra)
    """
    projection = Isomap(
        n_jobs=-1, n_neighbors=19
    ).fit_transform(spectra)
    """
    return projection, t_sne.kl_divergence_

# EMBEDDING = t_sne_embedding(ALL_SPECTRA[:, :, 1])
# plt.scatter(EMBEDDING[:, 0], EMBEDDING[:, 1])

In [50]:
np.round(np.logspace(np.log10(5), np.log10(203), 16))

array([  5.,   6.,   8.,  10.,  13.,  17.,  22.,  28.,  36.,  46.,  59.,
        76.,  97., 124., 159., 203.])

In [60]:
def plot_t_sne(
        spectra, save_path, n_components=2, init='pca', random_state=42, grid_dim=5, f_name="t_sne_projection.png",
        branch_classification=None
):
    with plt.style.context('science'):
        fig, axs = plt.subplots(grid_dim, grid_dim, figsize=(16, 16))
        # perplexities = [perplexity_step * i for i in range(1, grid_dim ** 2 + 1)]
        perplexities = np.round(np.logspace(np.log10(5), np.log10(spectra.shape[0]-1), grid_dim ** 2))
        colors = None
        if branch_classification is not None:
            colors = [BRANCH_CLASSIFICATION_COLORMAP[branch] for branch in branch_classification]
        for i, ax in enumerate(axs.flat):
            perplexity = perplexities[i]
            print(f"Perplexity: {perplexity}")
            embedding, kl_divergence = t_sne_embedding(
                spectra, n_components=n_components, perplexity=perplexity, random_state=random_state, init=init
            )
            if colors is not None:
                for label, color in BRANCH_CLASSIFICATION_COLORMAP.items():
                    ax.scatter(
                        embedding[branch_classification == label, 0], embedding[branch_classification == label, 1],
                        alpha=0.75, c=color, label=label
                    )
                ax.legend(frameon=True, framealpha=0.7, edgecolor='black', fancybox=True)
            else:
                ax.scatter(embedding[:, 0], embedding[:, 1], alpha=0.75)
            ax.set_xlabel(r"$\xi_1$")
            ax.set_ylabel(r"$\xi_2$")
            ax.set_title(f"Perplexity: {int(perplexity)}, KL divergence: {kl_divergence:.5f}")
        fig.tight_layout(rect=[0, 0.03, 1, 0.95])
        fig.suptitle(f"Initialization: {init}, Random seed: {random_state}")
        fig.savefig(f"{save_path}/{f_name}", dpi=150)
        plt.close(fig)

In [52]:
plot_t_sne(
    ALL_SPECTRA[:, :, 1], "./misc/plots/t-sne", f_name="t_sne_pca.png", grid_dim=4,
    branch_classification=SPECTRAL_INDICATORS['branch_classification']
)

Perplexity: 5.0
[t-SNE] Computing pairwise distances...
[t-SNE] Computed conditional probabilities for sample 203 / 203
[t-SNE] Mean sigma: 0.476403
[t-SNE] KL divergence after 250 iterations with early exaggeration: 65.986088
[t-SNE] KL divergence after 10000 iterations: 0.526770
Perplexity: 6.0
[t-SNE] Computing pairwise distances...
[t-SNE] Computed conditional probabilities for sample 203 / 203
[t-SNE] Mean sigma: 0.511257
[t-SNE] KL divergence after 250 iterations with early exaggeration: 63.805736
[t-SNE] KL divergence after 10000 iterations: 0.532502
Perplexity: 8.0
[t-SNE] Computing pairwise distances...
[t-SNE] Computed conditional probabilities for sample 203 / 203
[t-SNE] Mean sigma: 0.565429
[t-SNE] KL divergence after 250 iterations with early exaggeration: 63.174715
[t-SNE] KL divergence after 10000 iterations: 0.535181
Perplexity: 10.0
[t-SNE] Computing pairwise distances...
[t-SNE] Computed conditional probabilities for sample 203 / 203
[t-SNE] Mean sigma: 0.609174
[t-S

In [53]:
plot_t_sne(
    ALL_SPECTRA[:, :, 1], "./misc/plots/t-sne", random_state=42, init="random", f_name="t_sne_random_42.png", grid_dim=4,
    branch_classification=SPECTRAL_INDICATORS['branch_classification']
)

Perplexity: 5.0
[t-SNE] Computing pairwise distances...
[t-SNE] Computed conditional probabilities for sample 203 / 203
[t-SNE] Mean sigma: 0.476403
[t-SNE] KL divergence after 250 iterations with early exaggeration: 67.367185
[t-SNE] KL divergence after 10000 iterations: 0.525036
Perplexity: 6.0
[t-SNE] Computing pairwise distances...
[t-SNE] Computed conditional probabilities for sample 203 / 203
[t-SNE] Mean sigma: 0.511257
[t-SNE] KL divergence after 250 iterations with early exaggeration: 66.071128
[t-SNE] KL divergence after 10000 iterations: 0.549924
Perplexity: 8.0
[t-SNE] Computing pairwise distances...
[t-SNE] Computed conditional probabilities for sample 203 / 203
[t-SNE] Mean sigma: 0.565429
[t-SNE] KL divergence after 250 iterations with early exaggeration: 62.540098
[t-SNE] KL divergence after 10000 iterations: 0.537167
Perplexity: 10.0
[t-SNE] Computing pairwise distances...
[t-SNE] Computed conditional probabilities for sample 203 / 203
[t-SNE] Mean sigma: 0.609174
[t-S

In [54]:
plot_t_sne(
    ALL_SPECTRA[:, :, 1], "./misc/plots/t-sne", random_state=123, init="random", f_name="t_sne_random_123.png", grid_dim=4,
    branch_classification=SPECTRAL_INDICATORS['branch_classification']
)

Perplexity: 5.0
[t-SNE] Computing pairwise distances...
[t-SNE] Computed conditional probabilities for sample 203 / 203
[t-SNE] Mean sigma: 0.476403
[t-SNE] KL divergence after 250 iterations with early exaggeration: 65.489327
[t-SNE] KL divergence after 10000 iterations: 0.526400
Perplexity: 6.0
[t-SNE] Computing pairwise distances...
[t-SNE] Computed conditional probabilities for sample 203 / 203
[t-SNE] Mean sigma: 0.511257
[t-SNE] KL divergence after 250 iterations with early exaggeration: 68.100297
[t-SNE] KL divergence after 10000 iterations: 0.522161
Perplexity: 8.0
[t-SNE] Computing pairwise distances...
[t-SNE] Computed conditional probabilities for sample 203 / 203
[t-SNE] Mean sigma: 0.565429
[t-SNE] KL divergence after 250 iterations with early exaggeration: 62.470808
[t-SNE] KL divergence after 10000 iterations: 0.538567
Perplexity: 10.0
[t-SNE] Computing pairwise distances...
[t-SNE] Computed conditional probabilities for sample 203 / 203
[t-SNE] Mean sigma: 0.609174
[t-S

In [55]:
def isomap_embedding(spectra, n_components=2, n_neighbors=5):
    mean_spectrum = np.mean(spectra, axis=0)
    def spectral_distance(x, y):
        return np.linalg.norm((x - y) / mean_spectrum)    
    isomap = Isomap(
        n_jobs=-1, n_neighbors=n_neighbors, metric=spectral_distance, n_components=n_components
    )
    projection = isomap.fit_transform(spectra)
    return projection

In [56]:
def plot_isomap(spectra, save_path, n_components=2, grid_dim=5, f_name="isomap.png", branch_classification=None):
    with plt.style.context('science'):
        # importlib.reload(mpl)
        # print(mpl.rcParams)
        # mpl.rcParams["legend.frameon"] = True
        fig, axs = plt.subplots(grid_dim, grid_dim, figsize=(16, 16))
        # neighbors_step = spectra.shape[0] // grid_dim ** 2
        n_neighbors_ls = np.round(np.logspace(np.log10(5), np.log10(spectra.shape[0]-1), grid_dim ** 2)).astype(int)
        if branch_classification is not None:
            colors = [BRANCH_CLASSIFICATION_COLORMAP[branch] for branch in branch_classification]
        for i, ax in enumerate(axs.flat):
            n_neighbors = n_neighbors_ls[i]
            print(f"Neighbors: {n_neighbors}")
            embedding = isomap_embedding(spectra, n_neighbors=n_neighbors, n_components=n_components)
            if colors is not None:
                for label, color in BRANCH_CLASSIFICATION_COLORMAP.items():
                    ax.scatter(
                        embedding[branch_classification == label, 0], embedding[branch_classification == label, 1],
                        alpha=0.75, c=color, label=label
                    )
                ax.legend(frameon=True, framealpha=0.8, edgecolor='black', fancybox=True)
            else:
                ax.scatter(embedding[:, 0], embedding[:, 1], alpha=0.75)
            ax.set_xlabel(r"$\xi_1$")
            ax.set_ylabel(r"$\xi_2$")
            ax.set_title(f"Neighbors: {int(n_neighbors)}")

        fig.suptitle(f"Total components: {n_components}")
        fig.tight_layout(rect=[0, 0.03, 1, 0.95])
        fig.savefig(f"{save_path}/{f_name}", dpi=150)
        plt.close(fig)

In [57]:
plot_isomap(
    ALL_SPECTRA[:, :, 1], "./misc/plots/t-sne", f_name="isomap_2.png", grid_dim=4, n_components=2,
    branch_classification=SPECTRAL_INDICATORS['branch_classification'],
)

Neighbors: 5
Neighbors: 6
Neighbors: 8
Neighbors: 10
Neighbors: 13
Neighbors: 17
Neighbors: 22
Neighbors: 28
Neighbors: 36
Neighbors: 46
Neighbors: 59
Neighbors: 75
Neighbors: 96
Neighbors: 123
Neighbors: 158
Neighbors: 202


In [58]:
plot_isomap(
    ALL_SPECTRA[:, :, 1], "./misc/plots/t-sne", f_name="isomap_3.png", grid_dim=4, n_components=3,
    branch_classification=SPECTRAL_INDICATORS['branch_classification']
)

Neighbors: 5
Neighbors: 6
Neighbors: 8
Neighbors: 10
Neighbors: 13
Neighbors: 17
Neighbors: 22
Neighbors: 28
Neighbors: 36
Neighbors: 46
Neighbors: 59
Neighbors: 75
Neighbors: 96
Neighbors: 123
Neighbors: 158
Neighbors: 202


In [59]:
skdim.id.FisherS(verbose=True, conditional_number=100).fit_transform(ALL_SPECTRA[:, :, 1])

15 components are retained using conditional_number=100.00


8.419870958592943

In [None]:
skdim.id.lPCA(verbose=True).fit_transform(ALL_SPECTRA[:, :, 1])

In [None]:
plt.scatter(EMBEDDING[:, 0], EMBEDDING[:, 1])

In [None]:
sn = np.loadtxt("../SNfactory/spectra/AT2015bp.dat", skiprows=1)
