In [None]:
from itertools import product
import numpy as np
import pandas as pd
from astroquery.vizier import Vizier
import matplotlib.pyplot as plt
import astropy as ap
import george
from george import kernels
import pymc3 as pm
import theano
import theano.tensor as tt
import sklearn
from sklearn.neighbors import KernelDensity as KD
from sklearn.model_selection import GridSearchCV
from sklearn.decomposition import PCA, KernelPCA
import corner

In [None]:
Vizier = Vizier(row_limit=20000)

In [None]:
catalog = Vizier.get_catalogs("J/A+A/618/A93")

clucata = catalog[1]

In [None]:
newc = clucata.group_by('Cluster')

maxcluster = np.argmax(newc.groups.indices[1:]-newc.groups.indices[:-1])
bigcluster = newc.groups[maxcluster+6]
print(bigcluster.colnames)
newc.groups[maxcluster+6]['Cluster'][0]

Good `'Cluster'`s to choose from: Alessi\_24, ASCC_99, Alessi\_12

In [None]:
cutcluster = bigcluster[bigcluster['PMemb']>.8]
cutcluster = cutcluster[~np.isnan(cutcluster["BP-RP"])]
#We should cut by lines away from main sequence
cutcluster = cutcluster[~np.logical_and(cutcluster["BP-RP"]>1.0,cutcluster["Gmag"]<10.)]
cutcluster = cutcluster[~np.logical_and(cutcluster["BP-RP"]<.7, cutcluster["Gmag"]>13.8)]
plt.plot(cutcluster['RA_ICRS'],cutcluster['DE_ICRS'],'+')
plt.title('angular coordinates of '+cutcluster['Cluster'][0])

In [None]:
plt.hist(cutcluster['PMemb'])
plt.xlabel('PMemb')
plt.title('cluster membership probability of '+cutcluster['Cluster'][0])

In [None]:
plt.plot(cutcluster['BP-RP'],cutcluster['Gmag'], '+')
plt.ylim(19, 7)
plt.xlabel('BP-RP')
plt.ylabel('Gmag')
plt.title('color-magnitude diagram of '+cutcluster['Cluster'][0])

PCA - diagram, inverse of variance as metric -> sqrt(thing)= transformation, apply ->PCA(test) -> KDE -> inverse transform both L and R on Kernel Widths

LLE? -> Local linear embedding -> for non MS

remove outliers before PCA

In [None]:
X = np.asarray(np.vstack((cutcluster['BP-RP'], cutcluster['Gmag']))).T

In [None]:
X.shape
cmd.shape

In [None]:
pca = PCA(n_components=2)
X_pca = pca.fit(X)

In [None]:
plt.plot(X_pca.transform(X)[:,0],X_pca.transform(X)[:,1],'+')

In [None]:
X1, X2 = np.meshgrid(np.linspace(0, 3, 50), np.linspace(18, 7, 50))
X_grid = np.array([np.ravel(X1), np.ravel(X2)]).T
X_transform = pca.transform(X)
Z_grid = pca.transform(X_grid)[:, 0].reshape(X1.shape)
plt.contour(X1, X2, Z_grid);

In [None]:
X_transform = pca.transform(X)
X_transform.shape
plt.scatter(X_transform[:,0], X_transform[:,1])
plt.axis("equal")

In [None]:
pca.components_

In [None]:
def draw_vector(v0, v1, ax=None):
    ax = ax or plt.gca()
    arrowprops=dict(arrowstyle='->',
                    linewidth=2,
                    shrinkA=0, shrinkB=0)
    ax.annotate('', v1, v0, arrowprops=arrowprops)

# plot data
plt.scatter(X[:, 0], X[:, 1], alpha=0.2)
for length, vector in zip(pca.explained_variance_, pca.components_):
    v = vector * 3 * np.sqrt(length)
    draw_vector(pca.mean_, pca.mean_ + v)
plt.axis('equal');
plt.ylim(18,7)
plt.xlim(-2,4)

In [None]:
X_pca = pca.transform(X)
plt.scatter(X_pca[:, 0], X_pca[:, 1])
plt.axis('equal')
X_pca.shape

In [None]:
# I couldn't immediately find a KDE code that enabled different bandwidth in each dimension
# which we want because the errors in color are much greater than the errors in magnitude
params = {'bandwidth': np.logspace(-2, 0, 200)}
grid = GridSearchCV(KD(kernel='linear'), params, cv=5)
grid.fit(X_pca)

print("best bandwidth: {0}".format(grid.best_estimator_.bandwidth))
# first attempt obviously too fine a bandwidth because it allows for double stars
# we could fix it here or say this is just what the data is and fit an HRD model that doesn't permit those
# so now the data is the KDE evaluated on a grid

kde = grid.best_estimator_.fit(X_pca)
eval_where = np.array(list(product(np.linspace(-4,6,50), np.linspace(-1,1, 50))))
log_dens = kde.score_samples(eval_where)

plt.imshow(np.flip(np.exp(log_dens.reshape(50, 50).T), axis=0),extent=[-4, 6, -1, 1])
#plt.scatter(cutcluster['BP-RP'], cutcluster['Gmag'], marker='.', color='r', s=1)

In [None]:
plt.hist(X_pca[:,1], bins=50)

In [None]:
W = np.ones(3)
with pm.Model() as model:
    w = pm.Dirichlet('w', np.ones_like(W))

    mu1 = pm.Normal('mu1', -.09, .1)
    mu2 = pm.Normal('mu2', 0.19, .05)
    mu3 = pm.Normal('mu3', .4, .1)
    
    mu = [mu1, mu2, mu3]
    tau = pm.Gamma('tau', alpha=1, beta=1, shape=W.size)


    #p_min_potential = pm.Potential('p_min_potential', tt.switch(tt.min(w) < .1, -np.inf, 0))
    # break symmetry
    #order_means_potential = pm.Potential('order_means_potential',
     #                                    tt.switch(mu[1]-mu[0] < 0, -np.inf, 0)
      #                                   + tt.switch(mu[2]-mu[1] < 0, -np.inf, 0))


    x_obs = pm.NormalMixture('x_obs', w, mu, tau=tau, observed=X_pca[:,1])

In [None]:
with model:
    trace = pm.sample(50000, n_init=1000, tune=1000, chains=2)[1000:]

In [None]:
pm.traceplot(trace);

In [None]:
pm.plot_posterior(trace, varnames=['w', 'mu1', 'mu2','mu3','tau']);

In [None]:
with model:
    ppc_trace = pm.sample_posterior_predictive(trace, 5000)

In [None]:
newthing = ppc_trace['x_obs'][np.logical_and(ppc_trace['x_obs'][:,0]>-1, ppc_trace['x_obs'][:,0]<1)]

In [None]:
newthing[:,0]

In [None]:
plt.hist(X_pca[:,1], bins=20, lw=2, histtype='step', density=True)
plt.hist(newthing[:,0], bins=20, density=True,
        histtype='step', lw=2,
        label='Posterior predictive distribution');

In [None]:
for varname in trace.varnames:
    print(varname, trace[varname].mean(axis=0),np.var(trace[varname],axis=0))

In [None]:
pca.inverse_transform([[-4,-1],[-4,1],[6,-1],[6,1]])

In [None]:
X_new = pca.inverse_transform(X_pca)
plt.scatter(X_new[:, 0], X_new[:, 1], color='g', alpha=0.4)
plt.imshow(np.flip(np.exp(log_dens.reshape(50, 50).T), axis=0),extent=[])
plt.ylim(18,7)

In [None]:
corner.corner(X_transform);

In [None]:
plt.scatter(X_back[:, 0], X_back[:, 1], c="red",
            s=20, edgecolor='k')

In [None]:
cmd = np.asarray(np.vstack((cutcluster['BP-RP'], cutcluster['Gmag']))).T
print((np.min(cutcluster['BP-RP']), np.max(cutcluster['BP-RP']), np.min(cutcluster['Gmag']), np.max(cutcluster['Gmag'])))

In [None]:
cmd.shape

In [None]:
# I couldn't immediately find a KDE code that enabled different bandwidth in each dimension
# which we want because the errors in color are much greater than the errors in magnitude
params = {'bandwidth': np.logspace(-5, -2, 200)}
grid = GridSearchCV(KD(kernel='exponential'), params, cv=5)
grid.fit(X_pca)

print("best bandwidth: {0}".format(grid.best_estimator_.bandwidth))
# first attempt obviously too fine a bandwidth because it allows for double stars
# we could fix it here or say this is just what the data is and fit an HRD model that doesn't permit those
# so now the data is the KDE evaluated on a grid

kde = grid.best_estimator_.fit(cmd)
eval_where = np.array(list(product(np.linspace(0., 2.7, 50), np.linspace(18., 7., 50))))
log_dens = kde.score_samples(eval_where)

plt.imshow(np.flip(np.exp(log_dens.reshape(50, 50).T), axis=0), extent=[0., 2.7, 18., 7.], aspect=0.25)
#plt.scatter(cutcluster['BP-RP'], cutcluster['Gmag'], marker='.', color='r', s=1)

In [None]:
x = eval_where
y = log_dens
y.shape
x.shape

In [None]:
kernel = 5*kernels.Matern52Kernel(5., ndim=2)
gp = george.GP(kernel, mean=np.mean(y), fit_mean=True)

In [None]:
gp.compute(x)

test_x = np.array(list(product(np.linspace(0, 3, 100),np.linspace(7,18,100))))

In [None]:
pred, pred_var = gp.predict(y, test_x)

In [None]:
plt.imshow(np.exp(pred.reshape(100, 100).T), extent=[0., 3, 18., 7.], aspect=0.25)

In [None]:
import scipy.optimize as op

# Define the objective function (negative log-likelihood in this case).
def nll(p):
    gp.set_parameter_vector(p)
    ll = gp.log_likelihood(y, quiet=True)
    return -ll if np.isfinite(ll) else 1e25

# And the gradient of the objective function.
def grad_nll(p):
    gp.set_parameter_vector(p)
    return -gp.grad_log_likelihood(y, quiet=True)

# You need to compute the GP once before starting the optimization.
gp.compute(x)

# Print the initial ln-likelihood.
print(gp.log_likelihood(y))

# Run the optimization routine.
p0 = gp.get_parameter_vector()
results = op.minimize(nll, p0, jac=grad_nll, method="L-BFGS-B")

# Update the kernel and print the final log-likelihood.
gp.set_parameter_vector(results.x)
print(gp.log_likelihood(y))



In [None]:
pred, pred_var = gp.predict(y, test_x)


plt.scatter(cutcluster['BP-RP'], cutcluster['Gmag'], marker='.', color='r', s=1)
plt.imshow(np.exp(pred.reshape(100, 100).T), extent=[0., 3, 18., 7.], aspect=0.25)