In [3]:
import os, glob, re, sys
import socket
import torch
import wandb
import numpy as np

import matplotlib.pyplot as plt
import matplotlib.cm as cm
import seaborn as sb
import pandas as pd
import umap
from sklearn.manifold import TSNE
from tqdm import tqdm_notebook

sys.path.append('../')
from src.vae_models import *
from src.datasets import Astro_lightcurves
from src.utils import *

import warnings
warnings.filterwarnings('ignore')

main_path = os.path.dirname(os.getcwd())

save_plots = False
save_tables = False

In [4]:
ID = 'yp4qdw1r'
gpu = False

rnd_seed = 13
np.random.seed(rnd_seed)
torch.manual_seed(rnd_seed)
torch.cuda.manual_seed_all(rnd_seed)

In [5]:
if not os.path.exists('%s/wandb/run--%s/VAE_model_None.pt' % 
                      (main_path, ID)):
    print('Downloading files from Weight & Biases')
    
    api = wandb.Api()
    run = api.run('jorgemarpa/Phy-VAE/%s' % (ID))
    run.file('VAE_model_None.pt').download(replace=True, 
                                           root='%s/wandb/run--%s/' % 
                                           (main_path, ID))
    run.file('config.yaml').download(replace=True, 
                                     root='%s/wandb/run--%s/' % 
                                     (main_path, ID))

device = torch.device("cuda:0" if torch.cuda.is_available() and gpu else "cpu")

## Load model variables into VAE model

In [6]:
vae, config = load_model_list(ID=ID)
config

Loading from... 
 /Users/jorgetil/Astro/PELS-VAE/wandb/run--yp4qdw1r/VAE_model_None.pt
Is model in cuda?  False


{'architecture': 'tcn',
 'batch_size': 128,
 'beta_vae': '0.75',
 'classes': 'all',
 'data': 'OGLE3',
 'dropout': 0.2,
 'epochs': 150,
 'feed_pp': 'F',
 'hidden_size': 48,
 'kernel_size': 5,
 'label_dim': 8,
 'latent_dim': 4,
 'latent_mode': 'repeat',
 'learning_rate': 0.001,
 'learning_rate_scheduler': 'cos',
 'n_feats': 3,
 'n_train_params': 300945,
 'num_layers': 9,
 'phys_params': 'PTA',
 'physics_dim': 3,
 'sequence_lenght': 600,
 'transpose': False,
 'normed': True,
 'folded': True,
 'date': '',
 'ID': 'yp4qdw1r'}

In [7]:
dataset = Astro_lightcurves(survey=config['data'],
                            band='I' if config['data'] else 'B',
                            use_time=True,
                            use_err=True,
                            norm=config['normed'],
                            folded=config['folded'],
                            machine=socket.gethostname(),
                            seq_len=config['sequence_lenght'],
                            phy_params=config['phys_params'])

if config['classes'].split('_')[0] == 'drop':
    dataset.drop_class(config['classes'].split('_')[1])
elif config['classes'].split('_')[0] == 'only':
    dataset.only_class(config['classes'].split('_')[1])
print('Using physical parameters: ', dataset.phy_names)
dataset.remove_nan()
print(dataset.class_value_counts())
print('Total: ', len(dataset))
num_cls = dataset.labels_onehot.shape[1]

train_load, test_load = dataset.get_dataloader(batch_size=config['batch_size'],
                                               shuffle=True,
                                               test_split=.2,
                                               random_seed=rnd_seed)

Loading from:
 /Users/jorgetil/Google Drive/Colab_Notebooks/data/time_series/real/OGLE3_lcs_I_meta_snr5_augmented_folded_trim600.npy.gz
Using physical parameters:  ['Period', 'teff_val', 'abs_Gmag']
ECL      8817
ELL      8637
LPV      8201
CEP      6906
T2CEP    3734
DSCT     3704
RRLYR    2962
ACEP       70
Name: Type, dtype: int64
None
Total:  43031


In [None]:
mu, std = evaluate_encoder(vae, test_load, config, 
                           n_classes=num_cls, force=True)

Evaluating Encoder...


HBox(children=(FloatProgress(value=0.0, max=68.0), HTML(value='')))

In [None]:
test_meta = dataset.meta.iloc[test_load.sampler.indices]
test_lcs = dataset.lcs[test_load.sampler.indices]
mu.shape, std.shape, test_meta.shape, test_lcs.shape

# Plot LC reconstruction

# Joint distributions of latent variables

In [None]:
g = sb.PairGrid(mu, 
                hue='class', hue_order=sorted(set(mu.loc[:,'class'].values)),
               corner=False, despine=True, palette='Dark2_r')
g = g.map_diag(plt.hist, histtype='step', lw=1.5)
g = g.map_offdiag(plt.scatter, marker='.', s=20, alpha=.5, edgecolors='none')
for i, j in zip(*np.triu_indices_from(g.axes, 1)):
    g.axes[i, j].set_visible(False)
g = g.add_legend(loc='upper center', fontsize=20, title='', 
                 markerscale=4, ncol=2)    
for ax in g.axes.flat:
    # This only works for the left ylabels
    ax.set_ylabel(ax.get_ylabel(), fontsize='x-large')
    ax.set_xlabel(ax.get_xlabel(), fontsize='x-large')

if save_plots:
    plt.savefig('%s/imgs/z_pairplot_%s.pdf' % (path,ID), 
            format='pdf', bbox_inches='tight')
    
plt.show()

### tSNE projection to 2D

In [None]:
mu_to_tsne = mu.copy()
meta_to_tsne = test_meta.copy()
print(meta_to_tsne.Type.value_counts())

#for perplex in [2,5,8,10,15,20,25,30,40,50,100]:
tsne = TSNE(n_components=2, perplexity=40, 
            random_state=10, verbose=0)
mu_embed = tsne.fit_transform(mu_to_tsne.iloc[:,:-1].values)
print('Embeding shape: ', mu_embed.shape)

In [None]:
meta_to_tsne.info()

In [None]:
scatter_hue(mu_embed[:,0], mu_embed[:,1], 
            mu_to_tsne.loc[:,'class'].values,
            disc=True)
scatter_hue(mu_embed[:,0], mu_embed[:,1], 
            np.log10(meta_to_tsne.loc[:,'teff_val'].values),
            disc=False, c_label=r'$T_{eff}$')
scatter_hue(mu_embed[:,0], mu_embed[:,1], 
            meta_to_tsne.loc[:,'bp_rp'].values,
            disc=False, c_label='bp-rp')
scatter_hue(mu_embed[:,0], mu_embed[:,1], 
            meta_to_tsne.loc[:,'abs_Gmag'].values,
            disc=False, c_label=r'$M_g$')
scatter_hue(mu_embed[:,0], mu_embed[:,1], 
            np.log10(meta_to_tsne.loc[:,'Period'].values),
            disc=False, c_label='log(P)')
scatter_hue(mu_embed[:,0], mu_embed[:,1], 
            meta_to_tsne.loc[:,'[Fe/H]_J95'].values,
            disc=False, c_label='[Fe/H]_J95')

## UMAP projection

In [None]:
umapper = umap.UMAP(n_neighbors=100, min_dist=0.05, 
                    n_components=2, metric='euclidean')
embedding = umapper.fit_transform(mu_to_tsne.iloc[:,:-1].values)

In [None]:
scatter_hue(embedding[:,0], embedding[:,1], 
            meta_to_tsne.loc[:,'Type'].values,
            disc=True)
scatter_hue(embedding[:,0], embedding[:,1], 
            meta_to_tsne.loc[:,'bp_rp'].values,
            disc=False, c_label='bp-rp')
scatter_hue(embedding[:,0], embedding[:,1], 
            meta_to_tsne.loc[:,'abs_Gmag'].values,
            disc=False, c_label=r'$M_g$')
scatter_hue(embedding[:,0], embedding[:,1], 
            np.log10(meta_to_tsne.loc[:,'Period'].values),
            disc=False, c_label='log(P)')
scatter_hue(embedding[:,0], embedding[:,1], 
            meta_to_tsne.loc[:,'[Fe/H]_J95'].values,
            disc=False, c_label='[Fe/H]_J95')

# Evaluate VAE with dT

In [None]:
N_resamp = 5
new_onehot = torch.from_numpy(dataset[test_load.sampler.indices][-2])
new_lcs_t = torch.from_numpy(test_lcs)
new_latent = []

for k in range(N_resamp):
    new_meta = test_meta.copy()
    new_meta.teff_val = np.random.normal(loc=test_meta.teff_val, 
                                         scale=test_meta.teff_e)
    new_meta_pp = torch.from_numpy(dataset.mm_scaler.transform(new_meta.loc[:, dataset.phy_aux].values.astype(np.float32)))
    print(new_lcs_t.shape, new_meta_pp.shape, new_onehot.shape)
    
    if config['label_dim'] > 0 and config['physics_dim'] > 0:
        mu_, logvar_ = vae.encoder(new_lcs_t[:100], label=new_onehot[:100], phy=new_meta_pp[:100])
    elif config['label_dim'] > 0 and config['physics_dim'] == 0:
        mu_, logvar_ = vae.encoder(new_lcs_t, label=new_onehot)
    elif config['label_dim'] == 0:
        mu_, logvar_ = vae.encoder(new_lcs_t)
    new_mu = mu_.numpy()
    new_std = np.exp(0.5 * np.array(logvar_.numpy()))
    new_latent.append([new_mu, new_std])
    
    break

In [None]:
new_latent