In [1]:
from populations.bbh_models import get_models


PyCBC.libutils: pkg-config call failed, setting NO_PKGCONFIG=1


In [None]:
get_models('../OneChannel_Flows/models_reduced.hdf5',['CE'],['mchirp'])

In [1]:
import pandas as pd
import numpy as np
import h5py

from scipy.special import logit
from scipy.special import expit

def logistic(data,rescaling=False, wholedataset=True, max =1, rescale_max=1):
    if rescaling:
        if wholedataset:
            rescale_max = np.max(data) + 0.01
        else:
            rescale_max = rescale_max
        data /= rescale_max
    else:
        rescale_max = None
    data = logit(data)
    if wholedataset:
        max = np.max(data)
    else:
        max = max
    data /= max
    return([data, max, rescale_max])

def expistic(data, max, rescale_max=None):
    data*=max
    data = expit(data)
    if rescale_max != None:
        data *=rescale_max
    return(data)

def get_model_keys(path):
    alpha_val = '10'
    all_models = []
    models = []
    def find_submodels(name, obj):
        if isinstance(obj, h5py.Dataset):
            all_models.append(name.rsplit('/', 1)[0])
            
    f = h5py.File(path, 'r')
    f.visititems(find_submodels)
    # get all unique models
    all_models = sorted(list(set(all_models)))
    f.close()

    # use only models with given alpha value
    for model in all_models:
        if 'alpha' in model:
            if 'alpha'+alpha_val in model:
                models.append('/'+model)
        else:
            models.append('/' + model)
    return(np.split(np.array(models), 5))

def get_model_keys_CE(path):
    all_models = []
    models = []
    def find_submodels(name, obj):
        if isinstance(obj, h5py.Dataset):
            all_models.append(name.rsplit('/', 1)[0])
            
    f = h5py.File(path, 'r')
    f.visititems(find_submodels)
    # get all unique models
    all_models = sorted(list(set(all_models)))
    f.close()

    # use only models with given alpha value
    for model in all_models:
        if 'CE' in model:
            models.append('/'+model)
    return(np.split(np.array(models), 4))

def read_hdf5(path, all_alpha=False):
    if all_alpha:
        popsynth_outputs = {}
        models = np.asarray(get_model_keys_CE(path))
        for i in range(models.shape[0]):
            for j in range(models.shape[1]):
                popsynth_outputs[i,j]=pd.read_hdf(path, key=models[i,j])
    else:
        popsynth_outputs = {}
        models = np.asarray(get_model_keys(path))
        for i in range(models.shape[0]):
            for j in range(models.shape[1]):
                popsynth_outputs[i,j]=pd.read_hdf(path, key=models[i,j])

    return(popsynth_outputs)

def plot_histogram(model_id, param, axes, models_path):
    popsynth_outputs = read_hdf5(models_path)
    axes.hist(popsynth_outputs[model_id][param])

In [None]:
models_path ='/Users/stormcolloms/Documents/PhD/Project_work/OneChannel_Flows/models_reduced.hdf5'
samples = read_hdf5(models_path, all_alpha=False)

cond_inputs = 1

channel_label = 'SMT'
params = ['mchirp','q', 'chieff', 'z']
no_params = 4

chi_b = [0.,0.1,0.2,0.5]
alpha = [0.2,0.5,1.,2.,5.]


In [22]:

channel_ids = {'CE':0, 'CHE':1,'GC':2,'NSC':3, 'SMT':4}
channel_id = channel_ids[channel_label]
#number of data points (total) for each channel
channel_samples = [1e6,864124,896611,582961, 4e6]
no_binaries = int(channel_samples[channel_id])

params = params + ['weight'] #read in weights as well

if cond_inputs == 1:
    #Channels with 1D hyperparameters: SMT, GC, NSC, CHE

    #put data from required parameters for all alphas and chi_bs into model_stack
    models = np.zeros((no_binaries,no_params+1))
    model_size = np.zeros(no_params)
    cumulsize = np.zeros(no_params)

    #stack data
    for chib_id, xb in enumerate(chi_b):
        model_size[chib_id] = np.shape(samples[(channel_id,chib_id)][params])[0]
        cumulsize[chib_id] = np.sum(model_size)
        models[int(cumulsize[chib_id-1]):int(np.sum(model_size))]=np.asarray(samples[(channel_id,chib_id)][params])

        models_stack = np.copy(models) #np.concatenate(models, axis=0)

    #logit and renormalise distributions pre-batching
    models_stack[:,0], max_logit_mchirp, max_mchirp = logistic(models_stack[:,0], True)
    if channel_id == 2: #add extra tiny amount to GC mass ratios as q=1 samples exist
        models_stack[:,1], max_q, extra_scale = logistic(models_stack[:,1], True)
    else:
        models_stack[:,1], max_q, _ = logistic(models_stack[:,1])
    models_stack[:,2] = np.arctanh(models_stack[:,2])
    models_stack[:,3],max_logit_z, max_z = logistic(models_stack[:,3], True)

    training_hps_stack = np.repeat(chi_b, (model_size).astype(int), axis=0)
    training_hps_stack = np.reshape(training_hps_stack,(-1,1))
    validation_hps_stack = np.reshape(training_hps_stack,(-1,1))
    train_models_stack = models_stack
    validation_models_stack = models_stack

else:
    #CE channel with alpha parameter treatment

    #put data from required parameters for all alphas and chi_bs into model_stack
    models = np.zeros((4,5,no_binaries,no_params+1))
    removed_model_id =[7,11]
    val_hps = [[0.1,1],[0.2,.5]]

    #format which chi_bs and alphas match which parameter values being read in
    chi_b_alpha_pairs= np.zeros((20,2))
    chi_b_alpha_pairs[:,0] = np.repeat(chi_b,np.shape(alpha)[0])
    chi_b_alpha_pairs[:,1] = np.tile(alpha, np.shape(chi_b)[0])

    training_hp_pairs = np.delete(chi_b_alpha_pairs, removed_model_id, 0) #removes [0.1,1] and [0.2,0.5] point
    training_hps_stack = np.repeat(training_hp_pairs, no_binaries, axis=0) #repeats to cover all samples in each population
    validation_hps_stack = np.repeat(val_hps, no_binaries, axis=0)
    all_chi_b_alphas = np.repeat(chi_b_alpha_pairs, no_binaries, axis=0)

    #stack data
    for chib_id in range(4):
        for alpha_id in range(5):
            models[chib_id, alpha_id]=np.asarray(samples[(chib_id, alpha_id)][params])

    #removing the sepeartion of chi_b and alpha into axes and just stringing them all together instead
    joined_chib_samples = np.concatenate(models, axis=0)
    models_stack = np.concatenate(joined_chib_samples, axis=0) #all models if needed

    #logit and renormalise distributions pre-batching
    #chirp mass original range 0 to inf
    joined_chib_samples[:,:,0], max_logit_mchirp, max_mchirp = logistic(joined_chib_samples[:,:,0], True)

    #mass ratio - original range 0 to 1
    joined_chib_samples[:,:,1], max_q, _ = logistic(joined_chib_samples[:,:,1])

    #chieff - original range -0.5 to +1
    joined_chib_samples[:,:,2] = np.arctanh(joined_chib_samples[:,:,2])

    #redshift - original range 0 to inf
    joined_chib_samples[:,:,3], max_logit_z, max_z = logistic(joined_chib_samples[:,:,3], True)

    #keep samples seperated by model id (combined chi_b and alpha id) until validation samples are removed, then concatenate
    train_models = np.delete(joined_chib_samples, removed_model_id, 0) #removes samples from validation models
    train_models_stack = np.concatenate(train_models, axis=0)

    validation_model = joined_chib_samples[removed_model_id,:,:]
    validation_models_stack = np.concatenate(validation_model, axis=0)

#concatenate data plus weights with hyperparams
training_data = np.concatenate((train_models_stack, training_hps_stack), axis=1)
val_data = np.concatenate((validation_models_stack, validation_hps_stack), axis=1)
mappings = np.asarray([max_logit_mchirp, max_mchirp, max_q, None, max_logit_z, max_z])

[1000000.       0.       0.       0.] [1000000.       0.       0.       0.] 4 4000000
[1000000. 1000000.       0.       0.] [1000000. 2000000.       0.       0.] 4 4000000
[1000000. 1000000. 1000000.       0.] [1000000. 2000000. 3000000.       0.] 4 4000000
[1000000. 1000000. 1000000. 1000000.] [1000000. 2000000. 3000000. 4000000.] 4 4000000


In [19]:
print(np.shape(train_models_stack))
print(np.shape(training_hps_stack))

(896611, 5)
(896611, 1)
