In [None]:
import numpy as np
from sklearn.decomposition import PCA
import h5py
import pickle
import matplotlib.pyplot as plt
import GPy

## PCA decomposition

Dimensionality reduction using unsupervised PCA. Model is pickled. 

In [None]:
def pca_reduction(X, ncomp=10):
    """
    Input parameters :
    - X : 1-D flatten data (nsamp * imsize)
    - ncomp : Dimension of the subspace

    Output :
    - pca : PCA model
    - weights : 
    """
    
    print('Performing dimensionality reduction ...')

    # PCA fitting
    pca = PCA(n_components=ncomp)
    weights = pca.fit_transform(X)
    basis = pca.components_

    # Plot cumsum(explained_variance) versus component
    plt.figure(234)
    plt.semilogy(pca.explained_variance_ratio_*100, 's-')
    plt.ylabel('Explained Variance (%)', size=20)
    plt.xticks(size=20)
    plt.xlabel('Component', size=20)
    plt.yticks(size=20)
    plt.show()

    print('Explained variance: '+str(round(np.cumsum(pca.explained_variance_ratio_)[-1]*100, 2))+' %.')

    with open('pcaModel.pickle', 'wb') as handle:
        pickle.dump(pca, handle, protocol=pickle.HIGHEST_PROTOCOL)

    return pca, weights, basis


## GP fitting

GP(x_values, y_values) is fitted. Trained model is picked as saved

In [None]:


def gp_fit(weights, params):
    """
    Learns the GP related to the weigths matrix
    Input :
    - weights : From PCA
    - params : x-values

    Output :
    - model : GP model
    - tmean, tmult : Rescaling factors
    """
    params, tmean, tmult = rescale(params)

    # Set the kernel
    # kernel = GPy.kern.Matern52(input_dim=params.shape[1], variance=.1, lengthscale=.1)
    kernel = GPy.kern.Matern52(input_dim=params.shape[1])

    # GP Regression
    model = GPy.models.GPRegression(params, weights, kernel=kernel)
    model.optimize()

    # Save model
    nparams = params.shape[1]
    ntrain = weights.shape[1]
    model.save_model('gpfit_'+str(ntrain)+'_'+str(nparams), compress=True, save_data=True)
    return model, tmean, tmult


def gp_predict(model, params):
    """
    Predicts the weights matrix to feed inverse PCA from physical parameters.

    Input :
    - model : GP model
    - params : physical parameters (flux, radius, shear profile, psf fwhm)

    Output :
    - predic[0] : predicted weights
    """
    predict = model.predict(params)
    return predict[0]


def emulator(pca_model, gp_model, params):
    """
    Emulates new images from physical parameters.

    Input :
    - pca : PCA model
    - gp_model : GP model
    - params : physical parameters (flux, radius, shear profile, psf fwhm)

    Output :
    - reconstructed : Emulated image
    """
    # Weights prediction
    params = np.expand_dims(params, axis = 0)
    pred_weights = gp_predict(gp_model, params)

    # Inverse PCA (pred_weights * basis + mean)
    reconstructed = pca_model.inverse_transform(pred_weights)
    return reconstructed



In [None]:
save_loc = "/Users/nramachandra/Desktop/Projects/AGNfinder/FSPS/"

with h5py.File(save_loc + 'fsps_cache.hdf5', 'r') as f:
   X_loaded = f['fsps_cache']['X'][...]
   Y_loaded = f['fsps_cache']['Y'][...] 


In [None]:
## logging and clipping
# X_loaded = np.log10(X_loaded)
Y_loaded = np.log10(Y_loaded[:, 100:])

# ### rescaling 
y_mean = np.mean(Y_loaded, axis=0)
y_mult = np.max(Y_loaded - y_mean, axis=0)

y_train = (Y_loaded - y_mean)/y_mult

# ### rescaling 
x_mean = np.mean(X_loaded, axis=0)
x_mult = np.max(X_loaded - x_mean, axis=0)

x_train = (X_loaded - x_mean)/x_mult

In [None]:
pca_model, pca_weights, pca_basis = pca_reduction(y_train)

## Plotting PC weights 

Scatter plot to see dependence on x_values

In [None]:
plt.figure( figsize = (7,6))
plt.scatter(pca_weights[:, 0], pca_weights[:, 1], s=1, c= np.log10(X_loaded[:,0]))
plt.ylabel('Weight '+str(0), size=25)
plt.xlabel('Weight '+str(1), size=25)
plt.colorbar()
plt.show()

In [None]:
plt.figure( figsize = (7,6))
plt.scatter(pca_weights[:, 0], pca_weights[:, 2], s=1, c= np.log10(X_loaded[:,1]))
plt.ylabel('Weight '+str(0), size=25)
plt.xlabel('Weight '+str(2), size=25)
plt.colorbar()
plt.show()


In [None]:
plt.figure( figsize = (7,6))
plt.scatter(pca_weights[:, 0], pca_weights[:, 3], s=1, c= np.log10(X_loaded[:,2]))
plt.ylabel('Weight '+str(0), size=25)
plt.xlabel('Weight '+str(3), size=25)
plt.colorbar()
plt.show()

In [None]:


import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d    

# %matplotlib notebook


fig = plt.figure()
fig.add_subplot(111, projection = '3d')
plt.scatter(pca_weights[:, 0], pca_weights[:, 2], pca_weights[:, 4], c= np.log10(X_loaded[:,0] ) )

In [None]:

plt.plot(y_train.T)
# plt.yscale('log')
# plt.xscale('log')
plt.show()

In [None]:
with open('pcaModel.pickle', 'rb') as handle:
    pca = pickle.load(handle)

## GP fitting

In [None]:
gp_fit(pca_weights, x_train)

In [None]:
x_train.shape

In [None]:
new_params = x_train[433]

nparams = x_train.shape[1]
ntrain = pca_weights.shape[1]

gp_model = GPy.models.GPRegression.load_model('gpfit_'+str(ntrain)+'_'+str(nparams) + '.zip')


In [None]:
y_emu = emulator(pca_model, gp_model, new_params)

In [None]:
plt.plot(y_emu.T)

In [None]:
plt.figure(1212)
for i in range(100):
    rand_idx = (np.random.randint(100, 2000))
    rand_parmas = x_train[rand_idx]
    y_emu = emulator(pca_model, gp_model, new_params)
    y_true = y_train[rand_idx]
    plt.plot(y_emu.T/y_true.T)
    
plt.show()