In [1]:
from os.path import exists
import sys
import numpy as np

from cosmoprimo import *
from pycorr import TwoPointCorrelationFunction, setup_logging
from mockfactory import EulerianLinearMock, LagrangianLinearMock, RandomBoxCatalog, setup_logging

from densitysplit import catalog_data, density_split
from bin.density_split_mocks_functions import generate_N_mocks, generate_batch_2PCF, generate_batch_densitySplit_CCF

# Set up logging
setup_logging()

In [2]:
def generate_densitySplit_CCF(catalog, nmocks, nmesh, bias,
                                    cellsize, resampler, nsplits, use_rsd,
                                    randoms_size, edges, los, f=None, rsd=False, use_weights=False, nbar=None,
                                    nthreads=128,
                                    batch_size=None, batch_index=0,
                                    save_each=False, output_dir='', mpi=False, overwrite=True):

    if nbar is None:
        nbar=catalog.size/catalog.boxsize**3

    results_hh_auto = list()
    results_hh_cross = list()
    results_rh = list()

    if batch_size is None:
        batch_size = nmocks

    mocks_indices = range(batch_index*batch_size, (batch_index+1)*batch_size)

    for i in mocks_indices:
        print('Mock '+str(i))
        filename = catalog.name+'_gaussianMock{}_truncatedPk'.format(i)
        if exists(output_dir+filename+'.npy') and not overwrite:
            print('Mock already exists. Loading mock...')
            mock_catalog = catalog_data.Data.load(output_dir+filename+'.npy')
        else:
            print('Mock does not exist. Generating mock...')
            mock_catalog = generate_mock(nmesh=nmesh, boxsize=catalog.boxsize, boxcenter=catalog.boxcenter, seed=i,
                                         cosmology=fiducial.AbacusSummitBase(), nbar=catalog.size/catalog.boxsize**3,
                                         z=catalog.redshift, bias=bias,
                                         rsd=rsd, los=los, f=f,
                                         save=save_each, output_dir=output_dir, name=filename,
                                         mpi=mpi)

        if mock_catalog is not None:
            print('Computing density splits...')
            mock_density = split_density(mock_catalog, cellsize, resampler, nsplits, use_rsd=use_rsd, use_weights=use_weights, save=False)
            print('Computing correlation function...')
            mock_CCFs = compute_densitySplit_CCF(mock_density, edges, los, use_rsd=rsd, use_weights=use_weights, randoms_size=randoms_size, nthreads=nthreads)
            result_hh_auto = mock_CCFs['hh_auto']
            result_hh_cross = mock_CCFs['hh_cross']
            result_rh = mock_CCFs['rh']

            results_hh_auto.append(result_hh_auto)
            results_hh_cross.append(result_hh_cross)
            results_rh.append(result_rh)

    return results_hh_auto, results_hh_cross, results_rh

In [3]:
def split_density(catalog, cellsize, resampler, nsplits, use_rsd=False, use_weights=False, save=False, output_dir=''):
    catalog_density = density_split.DensitySplit(catalog)
    catalog_density.compute_density(cellsize=cellsize, resampler=resampler, use_rsd=use_rsd, use_weights=use_weights)
    catalog_density.split_density(nsplits)

    if save:
        catalog_density.save(output_dir+catalog.name+'_density')

    return catalog_density


def compute_densitySplit_CCF(data_density_splits, edges, los, use_rsd=False, use_weights=False, save=False, output_dir='', name='mock', randoms_size=1, nthreads=128):
    data = data_density_splits.data

    if use_rsd and data.positions_rsd is not None:
        rsd_info = '_RSD'
        positions = data.positions_rsd
        split_positions = data_density_splits.split_positions_rsd
    else:
        rsd_info = ''
        positions = data.positions
        split_positions = data_density_splits.split_positions

    if use_weights and (data.weights is not None):
        weights = data.weights
        split_weights = [weights[data_density_splits.split_indices[split]] for split in range(data_density_splits.nsplits)]
    else:
        weights = None
        split_weights = [None for split in range(data_density_splits.nsplits)]

    split_samples = data_density_splits.sample_splits(size=randoms_size*data_density_splits.data.size, seed=0, update=False)
    cellsize = data_density_splits.cellsize

    results_hh_auto = list()
    results_hh_cross = list()
    results_rh = list()

    for i in range(data_density_splits.nsplits):
        result_hh_auto = TwoPointCorrelationFunction('smu', edges,
                                                data_positions1=split_positions[i], data_weights1=split_weights[i],
                                                boxsize=data_density_splits.boxsize,
                                                engine='corrfunc', nthreads=128,
                                                los = los)

        cross_indices = [j for j in range(data_density_splits.nsplits) if j!=i]
        cross_positions = np.concatenate([split_positions[j] for j in cross_indices], axis=1)
        if use_weights and (data.weights is not None):
            cross_weights = np.concatenate([split_weights[j] for j in cross_indices])
        else:
            cross_weights = None
        result_hh_cross = TwoPointCorrelationFunction('smu', edges,
                                                data_positions1=split_positions[i], data_positions2=cross_positions,
                                                data_weights1=split_weights[i], data_weights2=cross_weights,
                                                boxsize=data_density_splits.boxsize,
                                                engine='corrfunc', nthreads=nthreads,
                                                los = los)

        result_rh = TwoPointCorrelationFunction('smu', edges,
                                                data_positions1=split_samples[i], data_positions2=positions,
                                                data_weights1=None, data_weights2=weights,
                                                boxsize=data_density_splits.boxsize,
                                                engine='corrfunc', nthreads=nthreads,
                                                los = los)

        results_hh_auto.append(result_hh_auto)
        results_hh_cross.append(result_hh_cross)
        results_rh.append(result_rh)

    if save:
        np.save(output_dir+name+'_densitySplit_hh_autoCFs_cellsize'+str(cellsize)+'_randomsize'+str(randoms_size)+rsd_info, results_hh_auto)
        np.save(output_dir+name+'_densitySplit_hh_crossCFs_cellsize'+str(cellsize)+'_randomsize'+str(randoms_size)+rsd_info, results_hh_cross)
        np.save(output_dir+name+'_densitySplit_rh_CCFs_cellsize'+str(cellsize)+'_randomsize'+str(randoms_size)+rsd_info, results_rh)

    return {'hh_auto': results_hh_auto, 'hh_cross': results_hh_cross, 'rh': results_rh}


In [4]:
# Mock batch
#batch_index = int(sys.argv[1])
#batch_index = 0

# Data and output directories
data_dir = '/feynman/work/dphp/mp270220/data/'
output_dir = '/feynman/work/dphp/mp270220/outputs/'


# Get data
catalog_name = 'AbacusSummit_1Gpc_z1.175'
bias = 1.8

#catalog_name = 'AbacusSummit_2Gpc_z1.175'
#bias = 3.

#catalog_name = 'AbacusSummit_2Gpc_z0.800'
#catalog_name = 'mock'

catalog = catalog_data.Data.load(data_dir+catalog_name+'.npy')
catalog.shift_boxcenter(-catalog.offset)

# Parameters

# Density mesh
cellsize = 10
resampler = 'tsc'
nsplits = 3

# Correlation function
randoms_size = 4
edges = (np.linspace(0., 150., 51), np.linspace(-1, 1, 201))
los = 'x'

# Mocks
nmocks = 4000
nmesh = 512
nbar = catalog.size/catalog.boxsize**3

# For RSD
cosmology=fiducial.AbacusSummitBase()
bg = cosmology.get_background()
f = bg.growth_rate(catalog.redshift)

# Set RSD
hz = 100*bg.efunc(catalog.redshift)
catalog.set_rsd(hz=hz)

results_hh_auto, results_hh_cross, results_rh = generate_densitySplit_CCF(catalog, nmocks=10,
                                                                                 nmesh=nmesh,
                                                                                 bias=bias,
                                                                                 cellsize=cellsize, resampler=resampler, nsplits=nsplits, use_rsd=False,
                                                                                 randoms_size=randoms_size,
                                                                                 edges=edges, los=los, f=f, rsd=False, use_weights=True, nbar=nbar,
                                                                                 nthreads=128, batch_size=None, batch_index=0,
                                                                                 save_each=True, output_dir=output_dir+'mocks_rsd/', mpi=False, overwrite=False)

# np.save(output_dir+catalog.name+'_10_gaussianMocks_2PCF', results)
# np.save(output_dir+catalog.name+'_4000_mocks_2PCF_batch'+str(batch_index), results)
# np.save(output_dir+catalog.name+'_4000_mocks_densitySplit_hh_autoCF_cellsize'+str(cellsize)+'_randomsize'+str(randoms_size)+'_batch'+str(batch_index), results_hh_auto)
# np.save(output_dir+catalog.name+'_4000_mocks_densitySplit_hh_crossCF_cellsize'+str(cellsize)+'_randomsize'+str(randoms_size)+'_batch'+str(batch_index), results_hh_cross)
# np.save(output_dir+catalog.name+'_4000_mocks_densitySplit_rh_CCF_cellsize'+str(cellsize)+'_randomsize'+str(randoms_size)+'_batch'+str(batch_index), results_rh)
np.save(output_dir+catalog.name+'_10_gaussianMocks_truncatedPk_densitySplit_hh_autoCF_cellsize'+str(cellsize)+'_randomsize'+str(randoms_size), results_hh_auto)
np.save(output_dir+catalog.name+'_10_gaussianMocks_truncatedPk_densitySplit_hh_crossCF_cellsize'+str(cellsize)+'_randomsize'+str(randoms_size), results_hh_cross)
np.save(output_dir+catalog.name+'_10_gaussianMocks_truncatedPk_densitySplit_rh_CCF_cellsize'+str(cellsize)+'_randomsize'+str(randoms_size), results_rh)

Mock 0
Mock already exists. Loading mock...
Computing density splits...
[000000.88] [0/1] 06-14 10:24  CatalogMesh               INFO     Slab 0 ~ 4194304 / 2934922.
[000001.66] [0/1] 06-14 10:24  CatalogMesh               INFO     Painted 2934922 out of 2934922 objects to mesh.
Computing correlation function...
[000003.80] [0/1] 06-14 10:24  TwoPointCorrelationFunction INFO     Using estimator <class 'pycorr.twopoint_estimator.NaturalTwoPointEstimator'>.
[000003.80] [0/1] 06-14 10:24  TwoPointCorrelationFunction INFO     Computing two-point counts D1D2.
[000008.77] [0/1] 06-14 10:24  TwoPointCorrelationFunction INFO     Analytically computing two-point counts R1R2.
[000008.77] [0/1] 06-14 10:24  TwoPointCorrelationFunction INFO     Using estimator <class 'pycorr.twopoint_estimator.NaturalTwoPointEstimator'>.
[000008.77] [0/1] 06-14 10:24  TwoPointCorrelationFunction INFO     Computing two-point counts D1D2.
[000024.31] [0/1] 06-14 10:25  TwoPointCorrelationFunction INFO     Analytical

In [5]:
results_rh

[[<pycorr.twopoint_estimator.NaturalTwoPointEstimator at 0x7ffecf3caa00>,
  <pycorr.twopoint_estimator.NaturalTwoPointEstimator at 0x7ffecb775d90>,
  <pycorr.twopoint_estimator.NaturalTwoPointEstimator at 0x7ffecb8ef760>],
 [<pycorr.twopoint_estimator.NaturalTwoPointEstimator at 0x7ffecb775c40>,
  <pycorr.twopoint_estimator.NaturalTwoPointEstimator at 0x7ffece9f40d0>,
  <pycorr.twopoint_estimator.NaturalTwoPointEstimator at 0x7ffecb8efb80>],
 [<pycorr.twopoint_estimator.NaturalTwoPointEstimator at 0x7ffecb8ef520>,
  <pycorr.twopoint_estimator.NaturalTwoPointEstimator at 0x7ffece9f48b0>,
  <pycorr.twopoint_estimator.NaturalTwoPointEstimator at 0x7ffecb775100>],
 [<pycorr.twopoint_estimator.NaturalTwoPointEstimator at 0x7ffecfa0f100>,
  <pycorr.twopoint_estimator.NaturalTwoPointEstimator at 0x7ffecb775580>,
  <pycorr.twopoint_estimator.NaturalTwoPointEstimator at 0x7ffecb7de7f0>],
 [<pycorr.twopoint_estimator.NaturalTwoPointEstimator at 0x7ffecb7de0a0>,
  <pycorr.twopoint_estimator.Natur