From 67f0b8e9ef8e9119d12aef24bfcf48b756588fd3 Mon Sep 17 00:00:00 2001 From: Manodeep Sinha Date: Sat, 22 Jun 2019 10:57:58 +1000 Subject: [PATCH 01/14] Apparently wp was not a target. Cleaned up the Makefiles --- mocks/Makefile | 36 ++++++++++++++++-------------------- theory/Makefile | 41 +++++++++++++++-------------------------- 2 files changed, 31 insertions(+), 46 deletions(-) diff --git a/mocks/Makefile b/mocks/Makefile index 44853a5a..5c4fe15d 100644 --- a/mocks/Makefile +++ b/mocks/Makefile @@ -1,6 +1,6 @@ include ../mocks.options ../common.mk -TARGETS:= dirs DDrppi_mocks DDtheta_mocks DDsmu_mocks vpf_mocks examples +TARGETS:= DDrppi_mocks DDtheta_mocks DDsmu_mocks vpf_mocks examples ifneq ($(COMPILE_PYTHON_EXT), 0) TARGETS += python_bindings else @@ -16,7 +16,7 @@ dirs: | ../lib ../bin ../include .PHONY: clean celna clena celan $(TARGETS) tests distclean realclean distclena realclena dirs test python_bindings libs all -DDrppi_mocks DDtheta_mocks vpf_mocks DDsmu_mocks: +DDrppi_mocks DDtheta_mocks vpf_mocks DDsmu_mocks: | dirs $(MAKE) -C $@ examples: libs @@ -31,20 +31,16 @@ realclena:realclean realclean: $(MAKE) clean - $(MAKE) -C DDrppi_mocks distclean - $(MAKE) -C DDsmu_mocks distclean - $(MAKE) -C DDtheta_mocks distclean - $(MAKE) -C vpf_mocks distclean - $(MAKE) -C python_bindings distclean + for dir in $(TARGETS); do \ + $(MAKE) -C $$dir distclean; \ + done $(MAKE) -C ../utils clean $(MAKE) -C ../io clean clean: - $(MAKE) -C DDrppi_mocks clean - $(MAKE) -C DDsmu_mocks clean - $(MAKE) -C DDtheta_mocks clean - $(MAKE) -C vpf_mocks clean - $(MAKE) -C examples clean + for dir in $(TARGETS); do \ + $(MAKE) -C $$dir clean; \ + done $(MAKE) -C tests clean $(MAKE) -C python_bindings clean @@ -53,17 +49,17 @@ celan: clean celna: clean install: examples | dirs - $(MAKE) -C DDrppi_mocks install - $(MAKE) -C DDsmu_mocks install - $(MAKE) -C DDtheta_mocks install - $(MAKE) -C vpf_mocks install + for dir in $(TARGETS); do \ + $(MAKE) -C $$dir install; \ + done +ifneq ($(COMPILE_PYTHON_EXT), 0) $(MAKE) -C python_bindings install +endif libs: | dirs - $(MAKE) -C DDrppi_mocks lib - $(MAKE) -C DDsmu_mocks lib - $(MAKE) -C DDtheta_mocks lib - $(MAKE) -C vpf_mocks lib + for dir in $(TARGETS); do \ + $(MAKE) -C $$dir lib; \ + done test: tests tests: diff --git a/theory/Makefile b/theory/Makefile index f6160eb8..8e52579f 100644 --- a/theory/Makefile +++ b/theory/Makefile @@ -18,7 +18,7 @@ logbins: logbins.c .PHONY: $(TARGETS) libs install tests distclean realclean distclena realclena clean celna -DD DDrppi DDsmu xi vpf: +DD DDrppi DDsmu xi wp vpf: $(MAKE) -C $@ python_bindings: libs @@ -38,38 +38,30 @@ realclena:realclean realclean: $(RM) $(ROOT_DIR)/bin/logbins $(RM) -R *.dSYM - $(MAKE) -C DD distclean - $(MAKE) -C DDrppi distclean - $(MAKE) -C DDsmu distclean - $(MAKE) -C wp distclean - $(MAKE) -C xi distclean - $(MAKE) -C vpf distclean + for dir in $(TARGETS); do \ + $(MAKE) -C $$dir distclean; \ + done $(MAKE) -C python_bindings distclean $(MAKE) -C tests clean clean: $(RM) logbins $(RM) -R *.dSYM - $(MAKE) -C DD clean - $(MAKE) -C DDrppi clean - $(MAKE) -C DDsmu clean - $(MAKE) -C wp clean - $(MAKE) -C xi clean - $(MAKE) -C vpf clean - $(MAKE) -C examples clean + for dir in $(TARGETS); do \ + $(MAKE) -C $$dir clean; \ + done $(MAKE) -C python_bindings clean $(MAKE) -C tests clean $(MAKE) -C ../io clean $(MAKE) -C ../utils clean install: $(ROOT_DIR)/bin/logbins examples $(TARGETS) $(PYTHON_EXT_DIR) - $(MAKE) -C DD install - $(MAKE) -C DDrppi install - $(MAKE) -C DDsmu install - $(MAKE) -C wp install - $(MAKE) -C xi install - $(MAKE) -C vpf install + for dir in $(TARGETS); do \ + $(MAKE) -C $$dir install; \ + done +ifneq ($(COMPILE_PYTHON_EXT), 0) $(MAKE) -C python_bindings install +endif $(ROOT_DIR)/bin/logbins: logbins cp -p logbins $(ROOT_DIR)/bin @@ -77,12 +69,9 @@ $(ROOT_DIR)/bin/logbins: logbins lib: libs libs: - $(MAKE) -C DD lib - $(MAKE) -C DDrppi lib - $(MAKE) -C DDsmu lib - $(MAKE) -C wp lib - $(MAKE) -C xi lib - $(MAKE) -C vpf lib + for dir in $(TARGETS); do \ + $(MAKE) -C $$dir lib; \ + done test: tests tests: From f3992fe06bdb7f1c1f686b2b5558bc9f960cf281 Mon Sep 17 00:00:00 2001 From: Manodeep Sinha Date: Sat, 22 Jun 2019 07:48:43 +1000 Subject: [PATCH 02/14] Fixing errors reported by codacy --- Corrfunc/mocks/DDrppi_mocks.py | 2 +- Corrfunc/mocks/DDsmu_mocks.py | 5 +- Corrfunc/mocks/DDtheta_mocks.py | 2 +- Corrfunc/mocks/vpf_mocks.py | 3 +- Corrfunc/theory/DD.py | 2 +- Corrfunc/theory/DDrppi.py | 2 +- Corrfunc/theory/DDsmu.py | 4 +- Corrfunc/theory/vpf.py | 2 +- Corrfunc/theory/wp.py | 2 +- Corrfunc/theory/xi.py | 2 +- Corrfunc/utils.py | 90 ++++++++++++++++----------------- 11 files changed, 57 insertions(+), 59 deletions(-) diff --git a/Corrfunc/mocks/DDrppi_mocks.py b/Corrfunc/mocks/DDrppi_mocks.py index 08c2e088..2346a3d0 100644 --- a/Corrfunc/mocks/DDrppi_mocks.py +++ b/Corrfunc/mocks/DDrppi_mocks.py @@ -328,7 +328,7 @@ def DDrppi_mocks(autocorr, cosmology, nthreads, pimax, binfile, import numpy as np from Corrfunc.utils import translate_isa_string_to_enum, fix_ra_dec,\ return_file_with_rbins, convert_to_native_endian,\ - is_native_endian, sys_pipes, process_weights + sys_pipes, process_weights from future.utils import bytes_to_native_str if not autocorr: diff --git a/Corrfunc/mocks/DDsmu_mocks.py b/Corrfunc/mocks/DDsmu_mocks.py index f51f70ee..6a118f04 100644 --- a/Corrfunc/mocks/DDsmu_mocks.py +++ b/Corrfunc/mocks/DDsmu_mocks.py @@ -248,7 +248,8 @@ def DDsmu_mocks(autocorr, cosmology, nthreads, mu_max, nmu_bins, binfile, import numpy as np from Corrfunc.utils import translate_isa_string_to_enum, fix_ra_dec,\ - return_file_with_rbins, sys_pipes, process_weights, convert_to_native_endian + return_file_with_rbins, convert_to_native_endian,\ + sys_pipes, process_weights from future.utils import bytes_to_native_str # Check if mu_max is scalar @@ -260,7 +261,7 @@ def DDsmu_mocks(autocorr, cosmology, nthreads, mu_max, nmu_bins, binfile, # Check that mu_max is within (0.0, 1.0] if mu_max <= 0.0 or mu_max > 1.0: - msg = "The parameter `mu_max` = {0}, is the max. of cosine of an " + msg = "The parameter `mu_max` = {0}, is the max. of cosine of an "\ "angle and should be within (0.0, 1.0]".format(mu_max) raise ValueError(msg) diff --git a/Corrfunc/mocks/DDtheta_mocks.py b/Corrfunc/mocks/DDtheta_mocks.py index 7d1d3713..fe8040a5 100644 --- a/Corrfunc/mocks/DDtheta_mocks.py +++ b/Corrfunc/mocks/DDtheta_mocks.py @@ -277,7 +277,7 @@ def DDtheta_mocks(autocorr, nthreads, binfile, import numpy as np from Corrfunc.utils import translate_isa_string_to_enum, fix_ra_dec,\ return_file_with_rbins, convert_to_native_endian,\ - is_native_endian, sys_pipes, process_weights + sys_pipes, process_weights from future.utils import bytes_to_native_str if autocorr == 0: diff --git a/Corrfunc/mocks/vpf_mocks.py b/Corrfunc/mocks/vpf_mocks.py index fc96c647..346d5e80 100644 --- a/Corrfunc/mocks/vpf_mocks.py +++ b/Corrfunc/mocks/vpf_mocks.py @@ -282,8 +282,7 @@ def vpf_mocks(rmax, nbins, nspheres, numpN, import numpy as np from future.utils import bytes_to_native_str from Corrfunc.utils import translate_isa_string_to_enum,\ - return_file_with_rbins, convert_to_native_endian,\ - is_native_endian, sys_pipes, process_weights + convert_to_native_endian, sys_pipes _locals = locals() diff --git a/Corrfunc/theory/DD.py b/Corrfunc/theory/DD.py index 7ea82209..61e51600 100644 --- a/Corrfunc/theory/DD.py +++ b/Corrfunc/theory/DD.py @@ -201,7 +201,7 @@ def DD(autocorr, nthreads, binfile, X1, Y1, Z1, weights1=None, periodic=True, import numpy as np from Corrfunc.utils import translate_isa_string_to_enum,\ return_file_with_rbins, convert_to_native_endian,\ - is_native_endian, sys_pipes, process_weights + sys_pipes, process_weights from future.utils import bytes_to_native_str if not autocorr: diff --git a/Corrfunc/theory/DDrppi.py b/Corrfunc/theory/DDrppi.py index 3f7fcf3c..ceefafb0 100644 --- a/Corrfunc/theory/DDrppi.py +++ b/Corrfunc/theory/DDrppi.py @@ -247,7 +247,7 @@ def DDrppi(autocorr, nthreads, pimax, binfile, X1, Y1, Z1, weights1=None, import numpy as np from Corrfunc.utils import translate_isa_string_to_enum,\ return_file_with_rbins, convert_to_native_endian,\ - is_native_endian, sys_pipes, process_weights + sys_pipes, process_weights from future.utils import bytes_to_native_str if not autocorr: diff --git a/Corrfunc/theory/DDsmu.py b/Corrfunc/theory/DDsmu.py index e40cbef9..92f89f3d 100644 --- a/Corrfunc/theory/DDsmu.py +++ b/Corrfunc/theory/DDsmu.py @@ -254,7 +254,7 @@ def DDsmu(autocorr, nthreads, binfile, mu_max, nmu_bins, import numpy as np from Corrfunc.utils import translate_isa_string_to_enum,\ return_file_with_rbins, convert_to_native_endian,\ - is_native_endian, sys_pipes, process_weights + sys_pipes, process_weights from future.utils import bytes_to_native_str @@ -267,7 +267,7 @@ def DDsmu(autocorr, nthreads, binfile, mu_max, nmu_bins, # Check that mu_max is within (0.0, 1.0] if mu_max <= 0.0 or mu_max > 1.0: - msg = "The parameter `mu_max` = {0}, is the max. of cosine of an " + msg = "The parameter `mu_max` = {0}, is the max. of cosine of an "\ "angle and should be within (0.0, 1.0]".format(mu_max) raise ValueError(msg) diff --git a/Corrfunc/theory/vpf.py b/Corrfunc/theory/vpf.py index d7523427..0d0a0515 100644 --- a/Corrfunc/theory/vpf.py +++ b/Corrfunc/theory/vpf.py @@ -189,7 +189,7 @@ def vpf(rmax, nbins, nspheres, numpN, seed, import numpy as np from future.utils import bytes_to_native_str from Corrfunc.utils import translate_isa_string_to_enum,\ - convert_to_native_endian, is_native_endian, sys_pipes, process_weights + convert_to_native_endian, sys_pipes, process_weights from math import pi if numpN <= 0: diff --git a/Corrfunc/theory/wp.py b/Corrfunc/theory/wp.py index 89d61dd0..2c92de47 100644 --- a/Corrfunc/theory/wp.py +++ b/Corrfunc/theory/wp.py @@ -483,7 +483,7 @@ def wp(boxsize, pimax, nthreads, binfile, X, Y, Z, from future.utils import bytes_to_native_str from Corrfunc.utils import translate_isa_string_to_enum,\ return_file_with_rbins, convert_to_native_endian,\ - is_native_endian, sys_pipes, process_weights + sys_pipes, process_weights weights, _ = process_weights(weights, None, X, None, weight_type, autocorr=True) diff --git a/Corrfunc/theory/xi.py b/Corrfunc/theory/xi.py index 3beddf13..5a33d277 100644 --- a/Corrfunc/theory/xi.py +++ b/Corrfunc/theory/xi.py @@ -194,7 +194,7 @@ def xi(boxsize, nthreads, binfile, X, Y, Z, from future.utils import bytes_to_native_str from Corrfunc.utils import translate_isa_string_to_enum,\ return_file_with_rbins, convert_to_native_endian,\ - is_native_endian, sys_pipes, process_weights + sys_pipes, process_weights weights, _ = process_weights(weights, None, X, None, weight_type, autocorr=True) diff --git a/Corrfunc/utils.py b/Corrfunc/utils.py index e2ceeb8d..881f8c08 100644 --- a/Corrfunc/utils.py +++ b/Corrfunc/utils.py @@ -280,7 +280,7 @@ def convert_rp_pi_counts_to_wp(ND1, ND2, NR1, NR2, 11.208365 """ - + import numpy as np if dpi <= 0.0: msg = 'Binsize along the line of sight (dpi) = {0}'\ @@ -310,7 +310,7 @@ def convert_rp_pi_counts_to_wp(ND1, ND2, NR1, NR2, 'npibins = {1} and dpi = {2}. Check your binning scheme.'\ .format(pimax, npibins, dpi) raise ValueError(msg) - + for i in range(nrpbins): wp[i] = 2.0 * dpi * np.sum(xirppi[i * npibins:(i + 1) * npibins]) @@ -520,7 +520,7 @@ def compute_nbins(max_diff, binsize, """ Helper utility to find the number of bins for that satisfies the constraints of (binsize, refine_factor, and max_nbins). - + Parameters ------------ @@ -529,28 +529,28 @@ def compute_nbins(max_diff, binsize, (i.e., range of allowed domain values) binsize : double - Min. allowed binsize (spatial or angular) + Min. allowed binsize (spatial or angular) refine_factor : integer, default 1 How many times to refine the bins. The refinements occurs after ``nbins`` has already been determined (with ``refine_factor-1``). - Thus, the number of bins will be **exactly** higher by + Thus, the number of bins will be **exactly** higher by ``refine_factor`` compared to the base case of ``refine_factor=1`` max_nbins : integer, default None Max number of allowed cells - Returns + Returns --------- nbins: integer, >= 1 - Number of bins that satisfies the constraints of - bin size >= ``binsize``, the refinement factor + Number of bins that satisfies the constraints of + bin size >= ``binsize``, the refinement factor and nbins <= ``max_nbins``. Example --------- - + >>> from Corrfunc.utils import compute_nbins >>> max_diff = 180 >>> binsize = 10 @@ -558,7 +558,7 @@ def compute_nbins(max_diff, binsize, 18 >>> refine_factor=2 >>> max_nbins = 20 - >>> compute_nbins(max_diff, binsize, refine_factor=refine_factor, + >>> compute_nbins(max_diff, binsize, refine_factor=refine_factor, ... max_nbins=max_nbins) 20 @@ -589,9 +589,9 @@ def compute_nbins(max_diff, binsize, if max_nbins: ngrid = min(int(max_nbins), ngrid) - return ngrid - - + return ngrid + + def gridlink_sphere(thetamax, ra_limits=None, dec_limits=None, @@ -601,24 +601,24 @@ def gridlink_sphere(thetamax, return_num_ra_cells=False, input_in_degrees=True): """ - A method to optimally partition spherical regions such that pairs of + A method to optimally partition spherical regions such that pairs of points within a certain angular separation, ``thetamax``, can be quickly - computed. + computed. - Generates the binning scheme used in :py:mod:`Corrfunc.mocks.DDtheta_mocks` - for a spherical region in Right Ascension (RA), Declination (DEC) - and a maximum angular separation. + Generates the binning scheme used in :py:mod:`Corrfunc.mocks.DDtheta_mocks` + for a spherical region in Right Ascension (RA), Declination (DEC) + and a maximum angular separation. For a given ``thetamax``, regions on the sphere are divided into bands - in DEC bands, with the width in DEC equal to ``thetamax``. If - ``link_in_ra`` is set, then these DEC bands are further sub-divided - into RA cells. + in DEC bands, with the width in DEC equal to ``thetamax``. If + ``link_in_ra`` is set, then these DEC bands are further sub-divided + into RA cells. Parameters ---------- thetamax : double - Max. angular separation of pairs. Expected to be in degrees + Max. angular separation of pairs. Expected to be in degrees unless ``input_in_degrees`` is set to ``False``. ra_limits : array of 2 doubles. Default [0.0, 2*pi] @@ -631,12 +631,12 @@ def gridlink_sphere(thetamax, Whether linking in RA is done (in addition to linking in DEC) ra_refine_factor : integer, >= 1. Default 1 - Controls the sub-division of the RA cells. For a large number of + Controls the sub-division of the RA cells. For a large number of particles, higher `ra_refine_factor` typically results in a faster runtime dec_refine_factor : integer, >= 1. Default 1 - Controls the sub-division of the DEC cells. For a large number of + Controls the sub-division of the DEC cells. For a large number of particles, higher `dec_refine_factor` typically results in a faster runtime @@ -644,29 +644,29 @@ def gridlink_sphere(thetamax, The max. number of RA cells **per DEC band**. max_dec_cells : integer >= 1. Default 200 - The max. number of total DEC bands + The max. number of total DEC bands return_num_ra_cells: bool, default False Flag to return the number of RA cells per DEC band input_in_degrees : Boolean. Default True - Flag to show if the input quantities are in degrees. If set to + Flag to show if the input quantities are in degrees. If set to False, all angle inputs will be taken to be in radians. Returns --------- - sphere_grid : A numpy compound array, shape (ncells, 2) - A numpy compound array with fields ``dec_limit`` and ``ra_limit`` of - size 2 each. These arrays contain the beginning and end of DEC - and RA regions for the cell. + sphere_grid : A numpy compound array, shape (ncells, 2) + A numpy compound array with fields ``dec_limit`` and ``ra_limit`` of + size 2 each. These arrays contain the beginning and end of DEC + and RA regions for the cell. num_ra_cells: numpy array, returned if ``return_num_ra_cells`` is set A numpy array containing the number of RA cells per declination band .. note:: If ``link_in_ra=False``, then there is effectively one RA bin - per DEC band. The 'ra_limit' field will show the range of allowed + per DEC band. The 'ra_limit' field will show the range of allowed RA values. @@ -745,7 +745,7 @@ def gridlink_sphere(thetamax, from math import radians, pi import numpy as np - + if input_in_degrees: thetamax = radians(thetamax) @@ -753,10 +753,10 @@ def gridlink_sphere(thetamax, ra_limits = [radians(x) for x in ra_limits] if dec_limits: dec_limits = [radians(x) for x in dec_limits] - + if not ra_limits: ra_limits = [0.0, 2.0*pi] - + if not dec_limits: dec_limits = [-0.5*pi, 0.5*pi] @@ -777,18 +777,18 @@ def gridlink_sphere(thetamax, 'However, dec_limits = [{0}, {1}] does not fall within that '\ 'range'.format(dec_limits[0], dec_limits[1]) raise ValueError(msg) - + if ra_limits[0] < 0.0 or ra_limits[1] > 2.0*pi: msg = 'Valid range of values for declination are [0.0, 2*pi] deg. '\ 'However, ra_limits = [{0}, {1}] does not fall within that '\ 'range'.format(ra_limits[0], ra_limits[1]) raise ValueError(msg) - + dec_diff = abs(dec_limits[1] - dec_limits[0]) ngrid_dec = compute_nbins(dec_diff, thetamax, refine_factor=dec_refine_factor, max_nbins=max_dec_cells) - + dec_binsize = dec_diff/ngrid_dec # Upper and lower limits of the declination bands @@ -810,8 +810,6 @@ def gridlink_sphere(thetamax, # RA linking is requested ra_diff = ra_limits[1] - ra_limits[0] sin_half_thetamax = np.sin(thetamax) - costhetamax = np.cos(thetamax) - max_nmesh_ra = 1 totncells = 0 num_ra_cells = np.zeros(ngrid_dec, dtype=np.int64) @@ -838,7 +836,7 @@ def gridlink_sphere(thetamax, num_ra_cells[idec] = compute_nbins(ra_diff, ra_binsize, refine_factor=ra_refine_factor, max_nbins=max_ra_cells) - + totncells = num_ra_cells.sum() sphere_grid = np.zeros(totncells, dtype=grid_dtype) ra_binsizes = ra_diff/num_ra_cells @@ -852,7 +850,7 @@ def gridlink_sphere(thetamax, r['dec_limit'][1] = dec_limits[0] + dec_binsize*(idec + 1) r['ra_limit'][0] = ra_limits[0] + ra_binsizes[idec] * ira r['ra_limit'][1] = ra_limits[0] + ra_binsizes[idec] * (ira + 1) - + start += num_ra_cells[idec] if return_num_ra_cells: @@ -905,11 +903,11 @@ def convert_to_native_endian(array, warn=False): if array is None: return array - + import numpy as np array = np.asanyarray(array) - system_is_little_endian = (sys.byteorder == 'little') + system_is_little_endian = (sys.byteorder == 'little') array_is_little_endian = (array.dtype.byteorder == '<') if (array_is_little_endian != system_is_little_endian) and not (array.dtype.byteorder == '='): if warn: @@ -918,12 +916,12 @@ def convert_to_native_endian(array, warn=False): return array.byteswap().newbyteorder() else: return array - + def is_native_endian(array): ''' Checks whether the given array is native-endian. None evaluates to True. - + Parameters ---------- array: np.ndarray @@ -1053,7 +1051,7 @@ def sys_pipes(): yield except: yield - + if __name__ == '__main__': import doctest doctest.testmod() From 49b93ab8377fc53784f01ffdeff01fcdb1964015 Mon Sep 17 00:00:00 2001 From: Manodeep Sinha Date: Sat, 22 Jun 2019 11:39:13 +1000 Subject: [PATCH 03/14] More code quality fixes --- Corrfunc/theory/DD.py | 3 ++- Corrfunc/theory/vpf.py | 2 +- Corrfunc/utils.py | 4 +++- setup.py | 14 ++++++++------ 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/Corrfunc/theory/DD.py b/Corrfunc/theory/DD.py index 61e51600..bdaa63b0 100644 --- a/Corrfunc/theory/DD.py +++ b/Corrfunc/theory/DD.py @@ -171,7 +171,8 @@ def DD(autocorr, nthreads, binfile, X1, Y1, Z1, weights1=None, periodic=True, >>> Y = np.random.uniform(0, boxsize, N) >>> Z = np.random.uniform(0, boxsize, N) >>> weights = np.ones_like(X) - >>> results = DD(autocorr, nthreads, binfile, X, Y, Z, weights1=weights, weight_type='pair_product', output_ravg=True) + >>> results = DD(autocorr, nthreads, binfile, X, Y, Z, weights1=weights, + ... weight_type='pair_product', output_ravg=True) >>> for r in results: print("{0:10.6f} {1:10.6f} {2:10.6f} {3:10d} {4:10.6f}". ... format(r['rmin'], r['rmax'], r['ravg'], ... r['npairs'], r['weightavg'])) # doctest: +NORMALIZE_WHITESPACE diff --git a/Corrfunc/theory/vpf.py b/Corrfunc/theory/vpf.py index 0d0a0515..9c9e24ca 100644 --- a/Corrfunc/theory/vpf.py +++ b/Corrfunc/theory/vpf.py @@ -189,7 +189,7 @@ def vpf(rmax, nbins, nspheres, numpN, seed, import numpy as np from future.utils import bytes_to_native_str from Corrfunc.utils import translate_isa_string_to_enum,\ - convert_to_native_endian, sys_pipes, process_weights + convert_to_native_endian, sys_pipes from math import pi if numpN <= 0: diff --git a/Corrfunc/utils.py b/Corrfunc/utils.py index 881f8c08..a7cf249c 100644 --- a/Corrfunc/utils.py +++ b/Corrfunc/utils.py @@ -1009,7 +1009,9 @@ def prep(w,x): if (weights1 is None) != (weights2 is None): if weight_type != 'pair_product': - raise ValueError("If using a weight_type other than 'pair_product', you must provide both weight arrays.") + raise ValueError("If using a weight_type other than "\ + "'pair_product', you must provide "\ + "both weight arrays.") if weights1 is None and weights2 is not None: weights1 = np.ones((len(weights2),len(X1)), dtype=X1.dtype) diff --git a/setup.py b/setup.py index a6d589a3..0f5ef80c 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,7 @@ def find_packages(path='.'): ret = [] - for root, dirs, files in os.walk(path): + for root, _, files in os.walk(path): if '__init__.py' in files: ret.append(re.sub('^[^A-z0-9_]+', '', root.replace('/', '.'))) return ret @@ -84,7 +84,7 @@ def get_dict_from_buffer(buf, keys=['DISTNAME', 'MAJOR', Returns: Python dictionary with all the keys (all keys in buffer if None is passed for keys) with the values being a list corresponding to each key. - + Note: Return dict will contain all keys supplied (if not None). If any key was not found in the buffer, then the value for that key will be [] such that dict[key] does not produce @@ -409,8 +409,10 @@ def generate_extensions(python_dirs): # defining the function that works on all reasonable pythons # http://stackoverflow.com/questions/2186525/use-a-glob-to- # find-files-recursively-in-python -def recursive_glob(rootdir='.', patterns=['*']): +def recursive_glob(rootdir='.', patterns=None): import fnmatch + if not patterns: + patterns = ['*'] return [pjoin(looproot, filename) for looproot, _, filenames in os.walk(rootdir) for filename in filenames for p in patterns @@ -463,7 +465,7 @@ def setup_packages(): "However, python expects the extension to be `{0}`"\ .format(get_config_var('SHLIB_EXT')) raise ValueError(msg) - + # global variable compiler is set if passed in # command-line extra_string = '' @@ -472,7 +474,7 @@ def setup_packages(): command = "make libs {0}".format(extra_string) run_command(command) - + else: # not installing. Check if creating source distribution # in that case run distclean to delete auto-generated C @@ -480,7 +482,7 @@ def setup_packages(): if 'sdist' in sys.argv: command = "make distclean" run_command(command) - + # find all the data-files required. # Now the lib + associated header files have been generated From 13f615d2540ce161a87afc95f18b8d3593f8f109 Mon Sep 17 00:00:00 2001 From: Manodeep Sinha Date: Sat, 22 Jun 2019 12:02:22 +1000 Subject: [PATCH 04/14] Added the codacy badge [ci skip] --- README.rst | 97 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 55 insertions(+), 42 deletions(-) diff --git a/README.rst b/README.rst index 092cb007..0bea2b54 100644 --- a/README.rst +++ b/README.rst @@ -1,23 +1,23 @@ -|logo| +|logo| -|Release| |PyPI| |MIT licensed| |ASCL| |Travis Build| |Issues| |RTD| +|Release| |PyPI| |MIT licensed| |ASCL| |Travis Build| |Issues| |RTD| |Codacy| Description =========== -This repo contains a set of codes to calculate correlation functions and +This repo contains a set of codes to calculate correlation functions and other clustering statistics in a cosmological box (co-moving XYZ) -or on a mock (RA, DEC, CZ). Read the documentation on `corrfunc.rtfd.io `_. +or on a mock (RA, DEC, CZ). Read the documentation on `corrfunc.rtfd.io `_. Why Should You Use it ====================== 1. **Fast** Theory pair-counting is **7x** faster than ``SciPy cKDTree``, and at least **2x** faster than all existing public codes. 2. **OpenMP Parallel** All pair-counting codes can be done in parallel (with strong scaling efficiency >~ 95% up to 10 cores) -3. **Python Extensions** Python extensions allow you to do the compute-heavy bits using C while retaining all of the user-friendliness of python. +3. **Python Extensions** Python extensions allow you to do the compute-heavy bits using C while retaining all of the user-friendliness of python. 4. **Weights** All correlation functions now support *arbitrary, user-specified* weights for individual points -5. **Modular** The code is written in a modular fashion and is easily extensible to compute arbitrary clustering statistics. -6. **Future-proof** As I get access to newer instruction-sets, the codes will get updated to use the latest and greatest CPU features. +5. **Modular** The code is written in a modular fashion and is easily extensible to compute arbitrary clustering statistics. +6. **Future-proof** As I get access to newer instruction-sets, the codes will get updated to use the latest and greatest CPU features. *If you use the codes for your analysis, please star this repo -- that helps us keep track of the number of users.* @@ -26,7 +26,7 @@ Benchmark against Existing Codes Please see this `gist `__ for -some benchmarks with current codes. If you have a pair-counter that you would like to compare, please add in a corresponding function and update the timings. +some benchmarks with current codes. If you have a pair-counter that you would like to compare, please add in a corresponding function and update the timings. Installation ============ @@ -40,7 +40,7 @@ Pre-requisites ``theory.options`` and ``mocks.options``. You might need to ask your sys-admin for system-wide installs of the compiler; if you prefer to install your own then ``conda install gcc`` (MAC/linux) or - ``(sudo) port install gcc5`` (on MAC) should work. + ``(sudo) port install gcc5`` (on MAC) should work. 3. ``gsl >= 2.4``. Use either ``conda install -c conda-forge gsl`` (MAC/linux) or ``(sudo) port install gsl`` (MAC) to install ``gsl`` @@ -54,10 +54,10 @@ Preferred Install Method :: $ git clone https://github.com/manodeep/Corrfunc/ - $ make + $ make $ make install $ python setup.py install (--user) - $ make tests + $ make tests Assuming you have ``gcc`` in your ``PATH``, ``make`` and ``make install`` should compile and install the C libraries + python @@ -82,7 +82,7 @@ Compilation Notes Alternate Install Method ------------------------- -The python package is directly installable via ``pip install Corrfunc``. However, in that case you will lose the ability to recompile the code according to your needs. Installing via ``pip`` is **not** recommended, please open an install issue on this repo first; doing so helps improve the code-base and saves future users from running into similar install issues. +The python package is directly installable via ``pip install Corrfunc``. However, in that case you will lose the ability to recompile the code according to your needs. Installing via ``pip`` is **not** recommended, please open an install issue on this repo first; doing so helps improve the code-base and saves future users from running into similar install issues. Installation notes ------------------ @@ -134,9 +134,9 @@ All codes that work on mock catalogs (RA, DEC, CZ) are located in the produce the Landy-Szalay estimator for `wp(rp)`. 2. ``DDsmu_mocks`` -- The standard auto/cross correlation between two data - sets. The outputs, DD, DR and RR can be combined using the python utility + sets. The outputs, DD, DR and RR can be combined using the python utility ``convert_3d_counts_to_cf`` to produce the Landy-Szalay estimator for `xi(s, mu)`. - + 3. ``DDtheta_mocks`` -- Computes angular correlation function between two data sets. The outputs from ``DDtheta_mocks`` need to be combined with ``wtheta`` to get the full `\omega(\theta)` @@ -151,10 +151,10 @@ code runtime options at compile-time. For theory routines, these options are in the file `theory.options `__ while for the mocks, these options are in file `mocks.options `__. -**Note** All options can be specified at +**Note** All options can be specified at runtime if you use the python interface or the static libraries. Each one of the following ``Makefile`` option has a corresponding entry for the runtime -libraries. +libraries. Theory (in `theory.options `__) ------------------------------------------------- @@ -164,11 +164,11 @@ Theory (in `theory.options `__) 2. ``OUTPUT_RPAVG`` -- switches on output of ```` in each ``rp`` bin. Can be a massive performance hit (~ 2.2x in case of wp). - Disabled by default. + Disabled by default. 3. ``DOUBLE_PREC`` -- switches on calculations in double precision. Disabled by default (i.e., calculations are performed in single precision by default). - + Mocks (in `mocks.options `__) ---------------------------------------------- @@ -181,13 +181,13 @@ Mocks (in `mocks.options `__) 3. ``DOUBLE_PREC`` -- switches on calculations in double precision. Disabled by default (i.e., calculations are performed in single precision by default). - + 4. ``LINK_IN_DEC`` -- creates binning in declination for ``DDtheta``. Please - check that for your desired limits ``\theta``, this binning does not + check that for your desired limits ``\theta``, this binning does not produce incorrect results (due to numerical precision). Generally speaking, if your ``\thetamax`` (the max. ``\theta`` to consider pairs within) is too small (probaly less than 1 degree), then you should check with and without - this option. Errors are typically sub-percent level. + this option. Errors are typically sub-percent level. 5. ``LINK_IN_RA`` -- creates binning in RA once binning in DEC has been enabled. Same numerical issues as ``LINK_IN_DEC`` @@ -197,25 +197,25 @@ Mocks (in `mocks.options `__) the divisions with a reciprocal followed by a Newton-Raphson. The code will run ~20% faster at the expense of some numerical precision. Please check that the loss of precision is not important for your - use-case. + use-case. -7. ``FAST_ACOS`` -- Relevant only when ``OUTPUT_THETAAVG`` is enabled. Disabled +7. ``FAST_ACOS`` -- Relevant only when ``OUTPUT_THETAAVG`` is enabled. Disabled by default. An ``arccos`` is required to calculate ``<\theta>``. In absence of vectorized - ``arccos`` (intel compiler, ``icc`` provides one via intel Short Vector Math + ``arccos`` (intel compiler, ``icc`` provides one via intel Short Vector Math Library), this calculation is extremely slow. However, we can approximate ``arccos`` using polynomials (with `Remez Algorithm `_). The approximations are taken from implementations released by `Geometric Tools `_. - Depending on the level of accuracy desired, this implementation of ``fast acos`` + Depending on the level of accuracy desired, this implementation of ``fast acos`` can be tweaked in the file `utils/fast_acos.h `__. An alternate, less - accurate implementation is already present in that file. Please check that the loss of - precision is not important for your use-case. + accurate implementation is already present in that file. Please check that the loss of + precision is not important for your use-case. 8. ``COMOVING_DIST`` -- Currently there is no support in ``Corrfunc`` for different cosmologies. However, for the mocks routines like, ``DDrppi_mocks`` and ``vpf_mocks``, cosmology parameters are required to convert between - redshift and co-moving distance. Both ``DDrppi_mocks`` and ``vpf_mocks`` expects to receive a ``redshift`` array + redshift and co-moving distance. Both ``DDrppi_mocks`` and ``vpf_mocks`` expects to receive a ``redshift`` array as input; however, with this option enabled, the ``redshift`` array will be assumed to contain already converted co-moving distances. So, if you have redshifts and want to use an arbitrary cosmology, then convert the redshifts - into co-moving distances, enable this option, and pass the co-moving distance array into the routines. + into co-moving distances, enable this option, and pass the co-moving distance array into the routines. Running the codes ================= @@ -232,8 +232,8 @@ what you want. If not, edit those two files (and possibly `common.mk `__), and recompile. Then, you can use the command-line executables in each individual subdirectory corresponding to the clustering measure you are interested in. For example, if you want to -compute the full 3-D correlation function, ``\xi(r)``, then run the -executable ``theory/xi/xi``. If you run executables without any arguments, +compute the full 3-D correlation function, ``\xi(r)``, then run the +executable ``theory/xi/xi``. If you run executables without any arguments, the program will output a message with all the required arguments. Calling from C @@ -280,7 +280,7 @@ use the C extensions directly. Here are a few examples: rmin = 0.1 rmax = 20.0 nbins = 20 - + # Create the bins rbins = np.logspace(np.log10(0.1), np.log10(rmax), nbins + 1) @@ -292,7 +292,7 @@ use the C extensions directly. Here are a few examples: print("## rmin rmax rpavg wp npairs") print("#############################################################################") print(wp_results) - + Common Code options for both Mocks and Cosmological Boxes ========================================================= @@ -301,23 +301,32 @@ Common Code options for both Mocks and Cosmological Boxes (close to perfect scaling up to 12 threads in our tests) and okay (runtime becomes constant ~6-8 threads in our tests) for ``DDrppi`` and ``wp``. Enabled by default. The ``Makefile`` will compare the `CC` variable with - known OpenMP enabled compilers and set compile options accordingly. - Set in `common.mk `__ by default. + known OpenMP enabled compilers and set compile options accordingly. + Set in `common.mk `__ by default. + +2. ``ENABLE_MIN_SEP_OPT`` -- uses some further optimisations based on the + minimum separation between pairs of cells. Enabled by default. + +3. ``COPY_PARTICLES`` -- whether or not to create a copy of the particle + positions (and weights, if supplied). Enabled by default (copies of the + particle arrays **are** created) *Optimization for your architecture* 1. The values of ``bin_refine_factor`` and/or ``zbin_refine_factor`` in the ``countpairs\_\*.c`` files control the cache-misses, and consequently, the runtime. In my trial-and-error methods, I have seen - any values larger than 3 are always slower. But some different + any values larger than 3 are generally slower for theory routines but + can be faster for mocks. But some different combination of 1/2 for ``(z)bin_refine_factor`` might be faster on your platform. -2. If you have AVX2/AVX-512/KNC, you will need to add a new kernel within - the ``*_kernels.c`` and edit the runtime dispatch code to call this new - kernel. +2. If you are using the angular correlation function and need ``thetaavg``, + you might benefit from using the INTEL MKL library. The vectorized + trigonometric functions provided by MKL can provide significant speedup. -Author & Maintainers + +Author & Maintainers ===================== Corrfunc was designed by Manodeep Sinha and is currently maintained by @@ -377,12 +386,12 @@ with the code, including using it in commercial application. Project URL =========== -- documentation (http://corrfunc.rtfd.io/) +- documentation (http://corrfunc.rtfd.io/) - version control (https://github.com/manodeep/Corrfunc) .. |logo| image:: https://github.com/manodeep/Corrfunc/blob/master/corrfunc_logo.png :target: https://github.com/manodeep/Corrfunc - :alt: Corrfunc logo + :alt: Corrfunc logo .. |Release| image:: https://img.shields.io/github/release/manodeep/Corrfunc.svg :target: https://github.com/manodeep/Corrfunc/releases/latest :alt: Latest Release @@ -404,3 +413,7 @@ Project URL .. |RTD| image:: https://readthedocs.org/projects/corrfunc/badge/?version=master :target: http://corrfunc.readthedocs.io/en/master/?badge=master :alt: Documentation Status + +.. |Codacy| image:: https://api.codacy.com/project/badge/Grade/95717e4798b04ee5ad42d5cab3c15429 + :target: https://app.codacy.com/project/manodeep/Corrfunc/dashboard + :alt: Code Quality From e1eac217a4544423bf0d4212ca4935f6346a393c Mon Sep 17 00:00:00 2001 From: Manodeep Sinha Date: Sun, 23 Jun 2019 09:36:21 +1000 Subject: [PATCH 05/14] Fixed the fast divide option --- utils/defs.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/utils/defs.h b/utils/defs.h index 5674c601..2f1332b0 100644 --- a/utils/defs.h +++ b/utils/defs.h @@ -60,7 +60,7 @@ struct api_cell_timings }; -#define MAX_FAST_DIVIDE_NR_STEPS 6 +#define MAX_FAST_DIVIDE_NR_STEPS 3 #define OPTIONS_HEADER_SIZE 1024 struct config_options @@ -287,9 +287,13 @@ static inline struct config_options get_config_options(void) #endif /* Options specific to mocks */ - /* Options for DDrppi_mocks */ + /* Options for DDrppi_mocks (FAST_DIVIDE is also applicable for both DDsmu, and DDsmu_mocks) */ #if defined(FAST_DIVIDE) - options.fast_divide_and_NR_steps=FAST_DIVIDE; +#if FAST_DIVIDE > MAX_FAST_DIVIDE_NR_STEPS + options.fast_divide_and_NR_steps = MAX_FAST_DIVIDE_NR_STEPS; +#else + options.fast_divide_and_NR_steps = FAST_DIVIDE; +#endif #endif /* Options for wtheta*/ From 13589af5f97cda458ef3fc7aa15e2203b8b6d785 Mon Sep 17 00:00:00 2001 From: Manodeep Sinha Date: Sun, 23 Jun 2019 09:56:59 +1000 Subject: [PATCH 06/14] More code quality fixes --- Corrfunc/utils.py | 2 +- io/io.c | 16 ++--- mocks/python_bindings/_countpairs_mocks.c | 9 +-- theory/python_bindings/_countpairs.c | 17 ++--- utils/progressbar.c | 14 ++-- utils/utils.c | 81 +++++++++++------------ 6 files changed, 62 insertions(+), 77 deletions(-) diff --git a/Corrfunc/utils.py b/Corrfunc/utils.py index a7cf249c..d6191f59 100644 --- a/Corrfunc/utils.py +++ b/Corrfunc/utils.py @@ -986,7 +986,7 @@ def prep(w,x): return w # not None, so probably float or numpy array - if type(w) is float: + if isinstance(w, float): # Use the particle dtype if a Python float was given w = np.array(w, dtype=x.dtype) diff --git a/io/io.c b/io/io.c index d5b56664..8c4db1cf 100644 --- a/io/io.c +++ b/io/io.c @@ -31,13 +31,13 @@ int64_t read_positions(const char *filename, const char *format, const size_t si XRETURN((sizeof(void *) == sizeof(float *) && sizeof(void *) == sizeof(double *)), -1, "Size of void pointer = %zu must be the same as size of float pointer = %zu and sizeof double pointers = %zu\n", sizeof(void *), sizeof(float *), sizeof(double *)); - + void *columns[num_fields]; int64_t np = read_columns_into_array(filename, format, size, num_fields, columns); - + va_list ap; va_start(ap,num_fields); - + for(int i=0;i= 1, -1, "Number of fields to read-in = %d must be at least 1\n", num_fields); XRETURN((size == 4 || size == 8), -1, "Size of fields = %zu must be either 4 or 8\n", size); - + { //new scope - just to check if file is gzipped. //in that case, use gunzip to unzip the file. @@ -150,7 +150,7 @@ int64_t read_columns_into_array(const char *filename, const char *format, const if(tmp == NULL) { return -1; } - + //read-in the fields for(int i=0;i Unknown format `%s'\n",__FUNCTION__,format); return -1; } - + return np; } diff --git a/mocks/python_bindings/_countpairs_mocks.c b/mocks/python_bindings/_countpairs_mocks.c index 9fa8dcb1..671db27b 100644 --- a/mocks/python_bindings/_countpairs_mocks.c +++ b/mocks/python_bindings/_countpairs_mocks.c @@ -1395,10 +1395,9 @@ static PyObject *countpairs_countpairs_rp_pi_mocks(PyObject *self, PyObject *arg for(int i=1;i simply return Py_RETURN_NONE; } - + int found_weights = weights1_obj == NULL ? 0 : PyArray_SHAPE(weights1_obj)[0]; struct extra_options extra = get_extra_options(weighting_method); if(extra.weights0.num_weights > 0 && extra.weights0.num_weights != found_weights){ @@ -2483,10 +2479,9 @@ static PyObject *countpairs_countpairs_s_mu(PyObject *self, PyObject *args, PyOb const double smax=results.supp[i]; for(int j=0;j 0.0 ) { if(*interrupted == 1) { fprintf(stderr,"\n"); *interrupted = 0; beg_of_string_index = 0; } - + percent = (curr_index+1)/SMALLPRINTSTEP;//division is in double -> C has 0-based indexing -- the +1 accounts for that. - integer_percent = (int) percent; - + int integer_percent = (int) percent; + if (integer_percent >= 0 && integer_percent <= 100) { /* for(int i=beg_of_string_index;itv_sec = 0;//(start * sTimebaseInfo.numer/sTimebaseInfo.denom) * tv_nsec; ts->tv_nsec = start * sTimebaseInfo.numer / sTimebaseInfo.denom; - + #if 0 //Much slower implementation for clock //Slows down the code by up to 4x @@ -380,12 +380,12 @@ void current_utc_time(struct timespec *ts) mach_port_deallocate(mach_task_self(), cclock); ts->tv_sec = mts.tv_sec; ts->tv_nsec = mts.tv_nsec; -#endif - +#endif + #else clock_gettime(CLOCK_REALTIME, ts); #endif -} +} @@ -418,31 +418,29 @@ char * get_time_string(struct timeval t0,struct timeval t1) double timediff = t1.tv_sec - t0.tv_sec; double ratios[] = {24*3600.0, 3600.0, 60.0, 1}; char units[4][10] = {"days", "hrs" , "mins", "secs"}; - int which = 0; - double timeleft = timediff; - double time_to_print; - if(timediff < ratios[2]) { - my_snprintf(time_string, MAXLINESIZE,"%6.3lf secs",1e-6*(t1.tv_usec-t0.tv_usec) + timediff); + my_snprintf(time_string, MAXLINESIZE,"%6.3lf secs",1e-6*(t1.tv_usec-t0.tv_usec) + timediff); } else { - size_t curr_index = 0; - while (which < 4) { - time_to_print = floor(timeleft/ratios[which]); - if (time_to_print > 1) { - timeleft -= (time_to_print*ratios[which]); - char tmp[MAXLINESIZE]; - my_snprintf(tmp, MAXLINESIZE, "%5d %s",(int)time_to_print,units[which]); - const size_t len = strlen(tmp); - const size_t required_len = curr_index + len + 1; - XRETURN(MAXLINESIZE >= required_len, NULL, - "buffer overflow will occur: string has space for %zu bytes while concatenating requires at least %zu bytes\n", - MAXLINESIZE, required_len); - strcpy(time_string + curr_index, tmp); - curr_index += len; + double timeleft = timediff; + size_t curr_index = 0; + int which = 0; + while (which < 4) { + double time_to_print = floor(timeleft/ratios[which]); + if (time_to_print > 1) { + timeleft -= (time_to_print*ratios[which]); + char tmp[MAXLINESIZE]; + my_snprintf(tmp, MAXLINESIZE, "%5d %s",(int)time_to_print,units[which]); + const size_t len = strlen(tmp); + const size_t required_len = curr_index + len + 1; + XRETURN(MAXLINESIZE >= required_len, NULL, + "buffer overflow will occur: string has space for %zu bytes while concatenating requires at least %zu bytes\n", + MAXLINESIZE, required_len); + strcpy(time_string + curr_index, tmp); + curr_index += len; + } + which++; } - which++; - } } return time_string; @@ -453,7 +451,6 @@ void print_time(struct timeval t0,struct timeval t1,const char *s) double timediff = t1.tv_sec - t0.tv_sec; double ratios[] = {24*3600.0, 3600.0, 60.0, 1}; char units[4][10] = {"days", "hrs" , "mins", "secs"}; - int which = 0; double timeleft = timediff; double time_to_print; @@ -462,6 +459,7 @@ void print_time(struct timeval t0,struct timeval t1,const char *s) if(timediff < ratios[2]) { fprintf(stderr,"%6.3lf secs",1e-6*(t1.tv_usec-t0.tv_usec) + timediff); } else { + int which = 0; while (which < 4) { time_to_print = floor(timeleft/ratios[which]); if (time_to_print > 1) { @@ -493,13 +491,12 @@ void* my_realloc(void *x,size_t size,int64_t N,const char *varname) void* my_malloc(size_t size,int64_t N) { - void *x = NULL; - x = malloc(N*size); + void *x = malloc(N*size); if (x==NULL){ fprintf(stderr,"malloc for %"PRId64" elements with %zu bytes failed...\n",N,size); perror(NULL); } - + return x; } @@ -507,8 +504,7 @@ void* my_malloc(size_t size,int64_t N) void* my_calloc(size_t size,int64_t N) { - void *x = NULL; - x = calloc((size_t) N, size); + void *x = calloc((size_t) N, size); if (x==NULL) { fprintf(stderr,"malloc for %"PRId64" elements with %zu size failed...\n",N,size); perror(NULL); @@ -595,7 +591,7 @@ int matrix_realloc(void **matrix, size_t size, int64_t nrow, int64_t ncol){ } matrix[i] = tmp; } - + return EXIT_SUCCESS; } @@ -604,7 +600,7 @@ void matrix_free(void **m,int64_t nrow) { if(m == NULL) return; - + for(int i=0;i Date: Sun, 23 Jun 2019 16:54:10 +1000 Subject: [PATCH 07/14] More clean-ups --- io/ftread.c | 4 ++-- utils/set_cosmo_dist.c | 30 +++++++++++++----------------- utils/utils.c | 9 ++++----- 3 files changed, 19 insertions(+), 24 deletions(-) diff --git a/io/ftread.c b/io/ftread.c index d0f5a277..9987acec 100644 --- a/io/ftread.c +++ b/io/ftread.c @@ -53,12 +53,12 @@ int ftread(void *ptr,size_t size,size_t nitems,FILE *stream) } if ( nbyte1 != nbyte2 ) { errno = errno - 1 ; - fprintf(stderr,"read warning, byte # do not match, nbyte1 = %d, nbyte2 = %d \n", + fprintf(stderr,"read warning, byte # do not match, nbyte1 = %u, nbyte2 = %u \n", nbyte1,nbyte2) ; } if ( nbyte1 != size*nitems) { errno = errno - 2 ; - fprintf(stderr, "read warning, byte # does not match item #, nbyte1 = %d, nitems = %zu \n", + fprintf(stderr, "read warning, byte # does not match item #, nbyte1 = %u, nitems = %zu \n", nbyte1,nitems) ; } diff --git a/utils/set_cosmo_dist.c b/utils/set_cosmo_dist.c index bea99f3f..51ea3c9b 100644 --- a/utils/set_cosmo_dist.c +++ b/utils/set_cosmo_dist.c @@ -31,35 +31,31 @@ int set_cosmo_dist(const double zmax,const int max_size,double *zc,double *dc,co if(status != EXIT_SUCCESS) { return -1; } - + int i = 0; double Omegak; - double dz,z,Deltaz,z2; - double Eint,E0,E1,E2,Dh,Dc; double smallh = 1.0;//Andreas pointed out that I don't need the real value of LITTLE_H - /* double one_plus_z,one_plus_z_minus_dz,one_plus_z_sqr,one_plus_z_minus_dz_sqr; */ Omegak = 1.0 - OMEGA_M - OMEGA_L; - Dh = SPEED_OF_LIGHT*0.01/smallh ;// c/(100) -> in units of little h^-1 Mpc + const double Dh = SPEED_OF_LIGHT*0.01/smallh ;// c/(100) -> in units of little h^-1 Mpc - Deltaz = 1.0/max_size; - dz = 1e-2*Deltaz; + const double Deltaz = 1.0/max_size; + const double dz = 1e-2*Deltaz; - Eint=0. ; - E2=1. ; - z2=Deltaz ; - for(z=2.*dz;z(z2-epsilon) && z<(z2+epsilon)) { if( i >= max_size ) break ; - Dc = Eint*Dh ; + const double Dc = Eint*Dh ; zc[i] = z ; dc[i] = Dc ; z2 += Deltaz ; @@ -68,8 +64,8 @@ int set_cosmo_dist(const double zmax,const int max_size,double *zc,double *dc,co } #ifdef DEBUG - fprintf(stderr,"cosmodist> (Omega_m, Omega_L, Omega_k, h, zmax) = (%4.2f, %4.2f, %4.2f, %4.2f,%4.2lf)\n",OMEGA_M,OMEGA_L,Omegak,LITTLE_H,zmax) ; - fprintf(stderr,"cosmodist> tabulated redshift: %g to %g (distance: %g to %g Mpc)\n", zc[0], zc[i-1], dc[0], dc[i-1]) ; + fprintf(stderr,"%s> (Omega_m, Omega_L, Omega_k, h, zmax) = (%4.2f, %4.2f, %4.2f, %4.2f,%4.2lf)\n", __FUNCTION__, OMEGA_M,OMEGA_L,Omegak,LITTLE_H,zmax) ; + fprintf(stderr,"%s> tabulated redshift: %g to %g (distance: %g to %g Mpc)\n", __FUNCTION__, zc[0], zc[i-1], dc[0], dc[i-1]) ; #endif return i ; } diff --git a/utils/utils.c b/utils/utils.c index ebf00e74..1680bb2b 100644 --- a/utils/utils.c +++ b/utils/utils.c @@ -417,7 +417,6 @@ char * get_time_string(struct timeval t0,struct timeval t1) char *time_string = my_malloc(sizeof(char), MAXLINESIZE); double timediff = t1.tv_sec - t0.tv_sec; double ratios[] = {24*3600.0, 3600.0, 60.0, 1}; - char units[4][10] = {"days", "hrs" , "mins", "secs"}; if(timediff < ratios[2]) { my_snprintf(time_string, MAXLINESIZE,"%6.3lf secs",1e-6*(t1.tv_usec-t0.tv_usec) + timediff); @@ -426,6 +425,7 @@ char * get_time_string(struct timeval t0,struct timeval t1) size_t curr_index = 0; int which = 0; while (which < 4) { + char units[4][10] = {"days", "hrs" , "mins", "secs"}; double time_to_print = floor(timeleft/ratios[which]); if (time_to_print > 1) { timeleft -= (time_to_print*ratios[which]); @@ -450,18 +450,17 @@ void print_time(struct timeval t0,struct timeval t1,const char *s) { double timediff = t1.tv_sec - t0.tv_sec; double ratios[] = {24*3600.0, 3600.0, 60.0, 1}; - char units[4][10] = {"days", "hrs" , "mins", "secs"}; - double timeleft = timediff; - double time_to_print; fprintf(stderr,"Time taken to execute '%s' = ",s); if(timediff < ratios[2]) { fprintf(stderr,"%6.3lf secs",1e-6*(t1.tv_usec-t0.tv_usec) + timediff); } else { + double timeleft = timediff; int which = 0; + char units[4][10] = {"days", "hrs" , "mins", "secs"}; while (which < 4) { - time_to_print = floor(timeleft/ratios[which]); + double time_to_print = floor(timeleft/ratios[which]); if (time_to_print > 1) { timeleft -= (time_to_print*ratios[which]); fprintf(stderr,"%5d %s",(int)time_to_print,units[which]); From ca67e4e38655c1001aa3ef8bf39a33583898a9bf Mon Sep 17 00:00:00 2001 From: Manodeep Sinha Date: Sun, 23 Jun 2019 17:29:14 +1000 Subject: [PATCH 08/14] Cleaning up some scope and other codacy comments --- mocks/examples/run_correlations_mocks.c | 10 +-- mocks/python_bindings/_countpairs_mocks.c | 88 +++++++++++------------ theory/examples/run_correlations.c | 20 +++--- theory/python_bindings/_countpairs.c | 79 ++++++++++---------- utils/utils.c | 9 +-- 5 files changed, 99 insertions(+), 107 deletions(-) diff --git a/mocks/examples/run_correlations_mocks.c b/mocks/examples/run_correlations_mocks.c index 0c9248cf..46c280bc 100644 --- a/mocks/examples/run_correlations_mocks.c +++ b/mocks/examples/run_correlations_mocks.c @@ -63,16 +63,16 @@ int main(int argc, char **argv) struct timeval t0,t1; DOUBLE pimax; int cosmology=1; - int nthreads=1; + int nthreads; int nmu_bins; DOUBLE mu_max; - + struct config_options options = get_config_options(); options.verbose=1; options.periodic=0; options.need_avg_sep=1; options.float_type = sizeof(*ra1); - + #if defined(_OPENMP) nthreads=4;//default to 4 threads const char argnames[][30]={"file","format","binfile","pimax","cosmology","mu_max", "nmu_bins", "Nthreads"}; @@ -98,6 +98,8 @@ int main(int argc, char **argv) nmu_bins=atoi(argv[7]); #if defined(_OPENMP) nthreads = atoi(argv[8]); +#else + nthreads=1; #endif } } else { @@ -233,7 +235,7 @@ int main(int argc, char **argv) //free the result structure free_results_mocks_s_mu(&results); } - + //Do the DD(theta) counts { diff --git a/mocks/python_bindings/_countpairs_mocks.c b/mocks/python_bindings/_countpairs_mocks.c index 671db27b..cbc6cba6 100644 --- a/mocks/python_bindings/_countpairs_mocks.c +++ b/mocks/python_bindings/_countpairs_mocks.c @@ -1281,15 +1281,15 @@ static PyObject *countpairs_countpairs_rp_pi_mocks(PyObject *self, PyObject *arg /* Interpret the input objects as numpy arrays. */ const int requirements = NPY_ARRAY_IN_ARRAY; - PyObject *x1_array = NULL, *y1_array = NULL, *z1_array = NULL, *weights1_array = NULL; - PyObject *x2_array = NULL, *y2_array = NULL, *z2_array = NULL, *weights2_array = NULL; - x1_array = PyArray_FromArray(x1_obj, NOTYPE_DESCR, requirements); - y1_array = PyArray_FromArray(y1_obj, NOTYPE_DESCR, requirements); - z1_array = PyArray_FromArray(z1_obj, NOTYPE_DESCR, requirements); + PyObject *x1_array = PyArray_FromArray(x1_obj, NOTYPE_DESCR, requirements); + PyObject *y1_array = PyArray_FromArray(y1_obj, NOTYPE_DESCR, requirements); + PyObject *z1_array = PyArray_FromArray(z1_obj, NOTYPE_DESCR, requirements); + PyObject *weights1_array = NULL; if(weights1_obj != NULL){ weights1_array = PyArray_FromArray(weights1_obj, NOTYPE_DESCR, requirements); } + PyObject *x2_array = NULL, *y2_array = NULL, *z2_array = NULL, *weights2_array = NULL; if(autocorr == 0) { x2_array = PyArray_FromArray(x2_obj, NOTYPE_DESCR, requirements); y2_array = PyArray_FromArray(y2_obj, NOTYPE_DESCR, requirements); @@ -1318,16 +1318,15 @@ static PyObject *countpairs_countpairs_rp_pi_mocks(PyObject *self, PyObject *arg } /* Get pointers to the data as C-types. */ - void *phiD1=NULL, *thetaD1=NULL, *czD1=NULL, *weights1=NULL; - void *phiD2=NULL, *thetaD2=NULL, *czD2=NULL, *weights2=NULL; - - phiD1 = PyArray_DATA((PyArrayObject *)x1_array); - thetaD1 = PyArray_DATA((PyArrayObject *)y1_array); - czD1 = PyArray_DATA((PyArrayObject *)z1_array); + void *phiD1 = PyArray_DATA((PyArrayObject *)x1_array); + void *thetaD1 = PyArray_DATA((PyArrayObject *)y1_array); + void *czD1 = PyArray_DATA((PyArrayObject *)z1_array); + void *weights1=NULL; if(weights1_array != NULL){ weights1 = PyArray_DATA((PyArrayObject *) weights1_array); } + void *phiD2=NULL, *thetaD2=NULL, *czD2=NULL, *weights2=NULL; if(autocorr == 0) { phiD2 = PyArray_DATA((PyArrayObject *) x2_array); thetaD2 = PyArray_DATA((PyArrayObject *) y2_array); @@ -1604,15 +1603,15 @@ static PyObject *countpairs_countpairs_s_mu_mocks(PyObject *self, PyObject *args /* Interpret the input objects as numpy arrays. */ const int requirements = NPY_ARRAY_IN_ARRAY; - PyObject *x1_array = NULL, *y1_array = NULL, *z1_array = NULL, *weights1_array = NULL; - PyObject *x2_array = NULL, *y2_array = NULL, *z2_array = NULL, *weights2_array = NULL; - x1_array = PyArray_FromArray(x1_obj, NOTYPE_DESCR, requirements); - y1_array = PyArray_FromArray(y1_obj, NOTYPE_DESCR, requirements); - z1_array = PyArray_FromArray(z1_obj, NOTYPE_DESCR, requirements); + PyObject *x1_array = PyArray_FromArray(x1_obj, NOTYPE_DESCR, requirements); + PyObject *y1_array = PyArray_FromArray(y1_obj, NOTYPE_DESCR, requirements); + PyObject *z1_array = PyArray_FromArray(z1_obj, NOTYPE_DESCR, requirements); + PyObject *weights1_array = NULL; if(weights1_obj != NULL){ weights1_array = PyArray_FromArray(weights1_obj, NOTYPE_DESCR, requirements); } + PyObject *x2_array = NULL, *y2_array = NULL, *z2_array = NULL, *weights2_array = NULL; if(autocorr == 0) { x2_array = PyArray_FromArray(x2_obj, NOTYPE_DESCR, requirements); y2_array = PyArray_FromArray(y2_obj, NOTYPE_DESCR, requirements); @@ -1641,16 +1640,15 @@ static PyObject *countpairs_countpairs_s_mu_mocks(PyObject *self, PyObject *args } /* Get pointers to the data as C-types. */ - void *phiD1=NULL, *thetaD1=NULL, *czD1=NULL, *weights1=NULL; - void *phiD2=NULL, *thetaD2=NULL, *czD2=NULL, *weights2=NULL; - - phiD1 = PyArray_DATA((PyArrayObject *)x1_array); - thetaD1 = PyArray_DATA((PyArrayObject *)y1_array); - czD1 = PyArray_DATA((PyArrayObject *)z1_array); + void *phiD1 = PyArray_DATA((PyArrayObject *)x1_array); + void *thetaD1 = PyArray_DATA((PyArrayObject *)y1_array); + void *czD1 = PyArray_DATA((PyArrayObject *)z1_array); + void *weights1=NULL; if(weights1_array != NULL){ weights1 = PyArray_DATA((PyArrayObject *) weights1_array); } + void *phiD2=NULL, *thetaD2=NULL, *czD2=NULL, *weights2=NULL; if(autocorr == 0) { phiD2 = PyArray_DATA((PyArrayObject *) x2_array); thetaD2 = PyArray_DATA((PyArrayObject *) y2_array); @@ -1911,14 +1909,14 @@ static PyObject *countpairs_countpairs_theta_mocks(PyObject *self, PyObject *arg /* Interpret the input objects as numpy arrays. */ const int requirements = NPY_ARRAY_IN_ARRAY; - PyObject *x1_array = NULL, *y1_array = NULL, *weights1_array = NULL; - PyObject *x2_array = NULL, *y2_array = NULL, *weights2_array = NULL; - x1_array = PyArray_FromArray(x1_obj, NOTYPE_DESCR, requirements); - y1_array = PyArray_FromArray(y1_obj, NOTYPE_DESCR, requirements); + PyObject *x1_array = PyArray_FromArray(x1_obj, NOTYPE_DESCR, requirements); + PyObject *y1_array = PyArray_FromArray(y1_obj, NOTYPE_DESCR, requirements); + PyObject *weights1_array = NULL; if(weights1_obj != NULL){ weights1_array = PyArray_FromArray(weights1_obj, NOTYPE_DESCR, requirements); } + PyObject *x2_array = NULL, *y2_array = NULL, *weights2_array = NULL; if(autocorr == 0) { x2_array = PyArray_FromArray(x2_obj, NOTYPE_DESCR, requirements); y2_array = PyArray_FromArray(y2_obj, NOTYPE_DESCR, requirements); @@ -1944,14 +1942,14 @@ static PyObject *countpairs_countpairs_theta_mocks(PyObject *self, PyObject *arg } /* Get pointers to the data as C-types. */ - void *phiD1 = NULL, *thetaD1 = NULL, *weights1=NULL; - void *phiD2 = NULL, *thetaD2 = NULL, *weights2=NULL; - phiD1 = PyArray_DATA((PyArrayObject *) x1_array); - thetaD1 = PyArray_DATA((PyArrayObject *) y1_array); + void *phiD1 = PyArray_DATA((PyArrayObject *) x1_array); + void *thetaD1 = PyArray_DATA((PyArrayObject *) y1_array); + void *weights1 = NULL; if(weights1_array != NULL){ weights1 = PyArray_DATA((PyArrayObject *) weights1_array); } + void *phiD2=NULL, *thetaD2=NULL, *weights2=NULL; if(autocorr == 0) { phiD2 = PyArray_DATA((PyArrayObject *) x2_array); thetaD2 = PyArray_DATA((PyArrayObject *) y2_array); @@ -2158,15 +2156,12 @@ static PyObject *countpairs_countspheres_vpf_mocks(PyObject *self, PyObject *arg /* Interpret the input objects as numpy arrays. */ const int requirements = NPY_ARRAY_IN_ARRAY; - PyObject *x1_array = NULL, *y1_array = NULL, *z1_array = NULL; - PyObject *x2_array = NULL, *y2_array = NULL, *z2_array = NULL; - x1_array = PyArray_FromArray(x1_obj, NOTYPE_DESCR, requirements); - y1_array = PyArray_FromArray(y1_obj, NOTYPE_DESCR, requirements); - z1_array = PyArray_FromArray(z1_obj, NOTYPE_DESCR, requirements); - x2_array = PyArray_FromArray(x2_obj, NOTYPE_DESCR, requirements); - y2_array = PyArray_FromArray(y2_obj, NOTYPE_DESCR, requirements); - z2_array = PyArray_FromArray(z2_obj, NOTYPE_DESCR, requirements); - + PyObject *x1_array = PyArray_FromArray(x1_obj, NOTYPE_DESCR, requirements); + PyObject *y1_array = PyArray_FromArray(y1_obj, NOTYPE_DESCR, requirements); + PyObject *z1_array = PyArray_FromArray(z1_obj, NOTYPE_DESCR, requirements); + PyObject *x2_array = PyArray_FromArray(x2_obj, NOTYPE_DESCR, requirements); + PyObject *y2_array = PyArray_FromArray(y2_obj, NOTYPE_DESCR, requirements); + PyObject *z2_array = PyArray_FromArray(z2_obj, NOTYPE_DESCR, requirements); if (x1_array == NULL || y1_array == NULL || z1_array == NULL || x2_array == NULL || y2_array == NULL || z2_array == NULL) { @@ -2185,16 +2180,13 @@ static PyObject *countpairs_countspheres_vpf_mocks(PyObject *self, PyObject *arg } /* Get pointers to the data as C-types. */ - void *phiD1=NULL, *thetaD1=NULL,*czD1=NULL; - void *phiD2=NULL, *thetaD2=NULL,*czD2=NULL; - - phiD1 = PyArray_DATA((PyArrayObject *) x1_array); - thetaD1 = PyArray_DATA((PyArrayObject *) y1_array); - czD1 = PyArray_DATA((PyArrayObject *) z1_array); + void *phiD1 = PyArray_DATA((PyArrayObject *) x1_array); + void *thetaD1 = PyArray_DATA((PyArrayObject *) y1_array); + void *czD1 = PyArray_DATA((PyArrayObject *) z1_array); - phiD2 = PyArray_DATA((PyArrayObject *) x2_array); - thetaD2 = PyArray_DATA((PyArrayObject *) y2_array); - czD2 = PyArray_DATA((PyArrayObject *) z2_array); + void *phiD2 = PyArray_DATA((PyArrayObject *) x2_array); + void *thetaD2 = PyArray_DATA((PyArrayObject *) y2_array); + void *czD2 = PyArray_DATA((PyArrayObject *) z2_array); NPY_BEGIN_THREADS_DEF; NPY_BEGIN_THREADS; diff --git a/theory/examples/run_correlations.c b/theory/examples/run_correlations.c index 13a6286e..4fc26de7 100644 --- a/theory/examples/run_correlations.c +++ b/theory/examples/run_correlations.c @@ -74,13 +74,13 @@ int main(int argc, char **argv) DOUBLE pimax; DOUBLE mu_max=1.0; int nmu_bins=10; - int nthreads=1;//default to single thread + int nthreads;//default to single thread struct config_options options = get_config_options(); options.verbose = 1; options.periodic = 1; options.float_type = sizeof(DOUBLE); - + #if defined(_OPENMP) const char argnames[][30]={"file","format","binfile","boxsize","pimax","mu_max","nmu_bins","Nthreads"}; #else @@ -106,6 +106,8 @@ int main(int argc, char **argv) nmu_bins=atoi(argv[7]); #if defined(_OPENMP) nthreads = atoi(argv[8]); +#else + nthreads = 1; #endif } } else { @@ -151,7 +153,7 @@ int main(int argc, char **argv) fprintf(stderr,ANSI_COLOR_MAGENTA "Command-line for running equivalent DD(r) calculation would be:\n `%s %s %s %s %s %s'" ANSI_COLOR_RESET "\n", "../DD/DD",file,fileformat,file,fileformat,binfile); #endif - + results_countpairs results; int status = countpairs(ND1,x1,y1,z1, ND2,x2,y2,z2, @@ -204,7 +206,7 @@ int main(int argc, char **argv) if(status != EXIT_SUCCESS) { return status; } - + gettimeofday(&t1,NULL); double pair_time = ADD_DIFF_TIME(t0,t1); #if 0 @@ -250,7 +252,7 @@ int main(int argc, char **argv) if(status != EXIT_SUCCESS) { return status; } - + gettimeofday(&t1,NULL); double pair_time = ADD_DIFF_TIME(t0,t1); #if 0 @@ -271,8 +273,8 @@ int main(int argc, char **argv) free_results_s_mu(&results); } - - + + //Do the wp counts { gettimeofday(&t0,NULL); @@ -298,7 +300,7 @@ int main(int argc, char **argv) if(status != EXIT_SUCCESS) { return status; } - + double pair_time = ADD_DIFF_TIME(t0,t1); #if 0 double rlow=results.rupp[0]; @@ -374,7 +376,7 @@ int main(int argc, char **argv) if(status != EXIT_SUCCESS) { return status; } - + gettimeofday(&t1,NULL); double sphere_time = ADD_DIFF_TIME(t0,t1); diff --git a/theory/python_bindings/_countpairs.c b/theory/python_bindings/_countpairs.c index 23489428..47d6e6e3 100644 --- a/theory/python_bindings/_countpairs.c +++ b/theory/python_bindings/_countpairs.c @@ -1319,10 +1319,10 @@ static PyObject *countpairs_countpairs(PyObject *self, PyObject *args, PyObject The input objects can be converted into the required DOUBLE array. */ const int requirements = NPY_ARRAY_IN_ARRAY; - PyObject *x1_array = NULL, *y1_array = NULL, *z1_array = NULL, *weights1_array = NULL; - x1_array = PyArray_FromArray(x1_obj, NOTYPE_DESCR, requirements); - y1_array = PyArray_FromArray(y1_obj, NOTYPE_DESCR, requirements); - z1_array = PyArray_FromArray(z1_obj, NOTYPE_DESCR, requirements); + PyObject *x1_array = PyArray_FromArray(x1_obj, NOTYPE_DESCR, requirements); + PyObject *y1_array = PyArray_FromArray(y1_obj, NOTYPE_DESCR, requirements); + PyObject *z1_array = PyArray_FromArray(z1_obj, NOTYPE_DESCR, requirements); + PyObject *weights1_array = NULL; if(weights1_obj != NULL){ weights1_array = PyArray_FromArray(weights1_obj, NOTYPE_DESCR, requirements); } @@ -1358,10 +1358,10 @@ static PyObject *countpairs_countpairs(PyObject *self, PyObject *args, PyObject /* Get pointers to the data */ - void *X1 = NULL, *Y1=NULL, *Z1=NULL, *weights1=NULL; - X1 = PyArray_DATA((PyArrayObject *) x1_array); - Y1 = PyArray_DATA((PyArrayObject *) y1_array); - Z1 = PyArray_DATA((PyArrayObject *) z1_array); + void *X1 = PyArray_DATA((PyArrayObject *) x1_array); + void *Y1 = PyArray_DATA((PyArrayObject *) y1_array); + void *Z1 = PyArray_DATA((PyArrayObject *) z1_array); + void *weights1=NULL; if(weights1_array != NULL){ weights1 = PyArray_DATA((PyArrayObject *) weights1_array); } @@ -1612,15 +1612,15 @@ static PyObject *countpairs_countpairs_rp_pi(PyObject *self, PyObject *args, PyO /* Interpret the input objects as numpy arrays. */ const int requirements = NPY_ARRAY_IN_ARRAY; - PyObject *x1_array = NULL, *y1_array = NULL, *z1_array = NULL, *weights1_array = NULL; - PyObject *x2_array = NULL, *y2_array = NULL, *z2_array = NULL, *weights2_array = NULL; - x1_array = PyArray_FromArray(x1_obj, NOTYPE_DESCR, requirements); - y1_array = PyArray_FromArray(y1_obj, NOTYPE_DESCR, requirements); - z1_array = PyArray_FromArray(z1_obj, NOTYPE_DESCR, requirements); + PyObject *x1_array = PyArray_FromArray(x1_obj, NOTYPE_DESCR, requirements); + PyObject *y1_array = PyArray_FromArray(y1_obj, NOTYPE_DESCR, requirements); + PyObject *z1_array = PyArray_FromArray(z1_obj, NOTYPE_DESCR, requirements); + PyObject *weights1_array = NULL; if(weights1_obj != NULL){ weights1_array = PyArray_FromArray(weights1_obj, NOTYPE_DESCR, requirements); } + PyObject *x2_array = NULL, *y2_array = NULL, *z2_array = NULL, *weights2_array = NULL; if(autocorr == 0) { x2_array = PyArray_FromArray(x2_obj, NOTYPE_DESCR, requirements); y2_array = PyArray_FromArray(y2_obj, NOTYPE_DESCR, requirements); @@ -1650,15 +1650,15 @@ static PyObject *countpairs_countpairs_rp_pi(PyObject *self, PyObject *args, PyO /* Get pointers to the data as C-types. */ - void *X1 = NULL, *Y1 = NULL, *Z1 = NULL, *weights1=NULL; - void *X2 = NULL, *Y2 = NULL, *Z2 = NULL, *weights2=NULL; - X1 = PyArray_DATA((PyArrayObject *) x1_array); - Y1 = PyArray_DATA((PyArrayObject *) y1_array); - Z1 = PyArray_DATA((PyArrayObject *) z1_array); + void *X1 = PyArray_DATA((PyArrayObject *) x1_array); + void *Y1 = PyArray_DATA((PyArrayObject *) y1_array); + void *Z1 = PyArray_DATA((PyArrayObject *) z1_array); + void *weights1=NULL; if(weights1_array != NULL){ weights1 = PyArray_DATA((PyArrayObject *) weights1_array); } + void *X2 = NULL, *Y2 = NULL, *Z2 = NULL, *weights2=NULL; if(autocorr == 0) { X2 = PyArray_DATA((PyArrayObject *) x2_array); Y2 = PyArray_DATA((PyArrayObject *) y2_array); @@ -1868,10 +1868,10 @@ static PyObject *countpairs_countpairs_wp(PyObject *self, PyObject *args, PyObje /* Interpret the input objects as numpy arrays. */ const int requirements = NPY_ARRAY_IN_ARRAY; - PyObject *x1_array = NULL, *y1_array = NULL, *z1_array = NULL, *weights1_array = NULL; - x1_array = PyArray_FromArray(x1_obj, NOTYPE_DESCR, requirements); - y1_array = PyArray_FromArray(y1_obj, NOTYPE_DESCR, requirements); - z1_array = PyArray_FromArray(z1_obj, NOTYPE_DESCR, requirements); + PyObject *x1_array = PyArray_FromArray(x1_obj, NOTYPE_DESCR, requirements); + PyObject *y1_array = PyArray_FromArray(y1_obj, NOTYPE_DESCR, requirements); + PyObject *z1_array = PyArray_FromArray(z1_obj, NOTYPE_DESCR, requirements); + PyObject *weights1_array = NULL; if(weights1_obj != NULL){ weights1_array = PyArray_FromArray(weights1_obj, NOTYPE_DESCR, requirements); } @@ -2105,10 +2105,10 @@ static PyObject *countpairs_countpairs_xi(PyObject *self, PyObject *args, PyObje /* Interpret the input objects as numpy arrays. */ const int requirements = NPY_ARRAY_IN_ARRAY; - PyObject *x1_array = NULL, *y1_array = NULL, *z1_array = NULL, *weights1_array = NULL; - x1_array = PyArray_FromArray(x1_obj, NOTYPE_DESCR, requirements); - y1_array = PyArray_FromArray(y1_obj, NOTYPE_DESCR, requirements); - z1_array = PyArray_FromArray(z1_obj, NOTYPE_DESCR, requirements); + PyObject *x1_array = PyArray_FromArray(x1_obj, NOTYPE_DESCR, requirements); + PyObject *y1_array = PyArray_FromArray(y1_obj, NOTYPE_DESCR, requirements); + PyObject *z1_array = PyArray_FromArray(z1_obj, NOTYPE_DESCR, requirements); + PyObject *weights1_array = NULL; if(weights1_obj != NULL){ weights1_array = PyArray_FromArray(weights1_obj, NOTYPE_DESCR, requirements); } @@ -2378,15 +2378,15 @@ static PyObject *countpairs_countpairs_s_mu(PyObject *self, PyObject *args, PyOb /* Interpret the input objects as numpy arrays. */ const int requirements = NPY_ARRAY_IN_ARRAY; - PyObject *x1_array = NULL, *y1_array = NULL, *z1_array = NULL, *weights1_array = NULL; - PyObject *x2_array = NULL, *y2_array = NULL, *z2_array = NULL, *weights2_array = NULL; - x1_array = PyArray_FromArray(x1_obj, NOTYPE_DESCR, requirements); - y1_array = PyArray_FromArray(y1_obj, NOTYPE_DESCR, requirements); - z1_array = PyArray_FromArray(z1_obj, NOTYPE_DESCR, requirements); + PyObject *x1_array = PyArray_FromArray(x1_obj, NOTYPE_DESCR, requirements); + PyObject *y1_array = PyArray_FromArray(y1_obj, NOTYPE_DESCR, requirements); + PyObject *z1_array = PyArray_FromArray(z1_obj, NOTYPE_DESCR, requirements); + PyObject *weights1_array = NULL; if(weights1_obj != NULL){ weights1_array = PyArray_FromArray(weights1_obj, NOTYPE_DESCR, requirements); } + PyObject *x2_array = NULL, *y2_array = NULL, *z2_array = NULL, *weights2_array = NULL; if(autocorr == 0) { x2_array = PyArray_FromArray(x2_obj, NOTYPE_DESCR, requirements); y2_array = PyArray_FromArray(y2_obj, NOTYPE_DESCR, requirements); @@ -2416,15 +2416,15 @@ static PyObject *countpairs_countpairs_s_mu(PyObject *self, PyObject *args, PyOb /* Get pointers to the data as C-types. */ - void *X1 = NULL, *Y1 = NULL, *Z1 = NULL, *weights1=NULL; - void *X2 = NULL, *Y2 = NULL, *Z2 = NULL, *weights2=NULL; - X1 = PyArray_DATA((PyArrayObject *) x1_array); - Y1 = PyArray_DATA((PyArrayObject *) y1_array); - Z1 = PyArray_DATA((PyArrayObject *) z1_array); + void *X1 = PyArray_DATA((PyArrayObject *) x1_array); + void *Y1 = PyArray_DATA((PyArrayObject *) y1_array); + void *Z1 = PyArray_DATA((PyArrayObject *) z1_array); + void *weights1=NULL; if(weights1_array != NULL){ weights1 = PyArray_DATA((PyArrayObject *) weights1_array); } + void *X2 = NULL, *Y2 = NULL, *Z2 = NULL, *weights2=NULL; if(autocorr == 0) { X2 = PyArray_DATA((PyArrayObject *) x2_array); Y2 = PyArray_DATA((PyArrayObject *) y2_array); @@ -2601,10 +2601,9 @@ static PyObject *countpairs_countspheres_vpf(PyObject *self, PyObject *args, PyO /* Interpret the input objects as numpy arrays. */ const int requirements = NPY_ARRAY_IN_ARRAY; - PyObject *x1_array = NULL, *y1_array = NULL, *z1_array = NULL; - x1_array = PyArray_FromArray(x1_obj, NOTYPE_DESCR, requirements); - y1_array = PyArray_FromArray(y1_obj, NOTYPE_DESCR, requirements); - z1_array = PyArray_FromArray(z1_obj, NOTYPE_DESCR, requirements); + PyObject *x1_array = PyArray_FromArray(x1_obj, NOTYPE_DESCR, requirements); + PyObject *y1_array = PyArray_FromArray(y1_obj, NOTYPE_DESCR, requirements); + PyObject *z1_array = PyArray_FromArray(z1_obj, NOTYPE_DESCR, requirements); if (x1_array == NULL || y1_array == NULL || z1_array == NULL) { Py_XDECREF(x1_array); diff --git a/utils/utils.c b/utils/utils.c index 1680bb2b..3de34e01 100644 --- a/utils/utils.c +++ b/utils/utils.c @@ -154,7 +154,6 @@ int setup_bins_float(const char *fname,float *rmin,float *rmax,int *nbin,float * //the form of the data file should be const int MAXBUFSIZE=1000; char buf[MAXBUFSIZE]; - FILE *fp=NULL; float low,hi; const char comment='#'; const int nitems=2; @@ -162,7 +161,7 @@ int setup_bins_float(const char *fname,float *rmin,float *rmax,int *nbin,float * *nbin = ((int) getnumlines(fname,comment))+1; *rupp = my_calloc(sizeof(float),*nbin+1); - fp = my_fopen(fname,"r"); + FILE *fp = my_fopen(fname,"r"); if(fp == NULL) { free(*rupp); return EXIT_FAILURE; @@ -425,10 +424,10 @@ char * get_time_string(struct timeval t0,struct timeval t1) size_t curr_index = 0; int which = 0; while (which < 4) { - char units[4][10] = {"days", "hrs" , "mins", "secs"}; double time_to_print = floor(timeleft/ratios[which]); if (time_to_print > 1) { timeleft -= (time_to_print*ratios[which]); + char units[4][10] = {"days", "hrs" , "mins", "secs"}; char tmp[MAXLINESIZE]; my_snprintf(tmp, MAXLINESIZE, "%5d %s",(int)time_to_print,units[which]); const size_t len = strlen(tmp); @@ -450,18 +449,16 @@ void print_time(struct timeval t0,struct timeval t1,const char *s) { double timediff = t1.tv_sec - t0.tv_sec; double ratios[] = {24*3600.0, 3600.0, 60.0, 1}; - fprintf(stderr,"Time taken to execute '%s' = ",s); - if(timediff < ratios[2]) { fprintf(stderr,"%6.3lf secs",1e-6*(t1.tv_usec-t0.tv_usec) + timediff); } else { double timeleft = timediff; int which = 0; - char units[4][10] = {"days", "hrs" , "mins", "secs"}; while (which < 4) { double time_to_print = floor(timeleft/ratios[which]); if (time_to_print > 1) { + char units[4][10] = {"days", "hrs" , "mins", "secs"}; timeleft -= (time_to_print*ratios[which]); fprintf(stderr,"%5d %s",(int)time_to_print,units[which]); } From f6cb96680a46c1e8b3195f2bc522cefa58b75957 Mon Sep 17 00:00:00 2001 From: Manodeep Sinha Date: Sun, 23 Jun 2019 17:35:11 +1000 Subject: [PATCH 09/14] Fixed an accidental edit and another codacy issue --- theory/examples/run_correlations.c | 2 +- utils/utils.c | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/theory/examples/run_correlations.c b/theory/examples/run_correlations.c index 4fc26de7..94296cac 100644 --- a/theory/examples/run_correlations.c +++ b/theory/examples/run_correlations.c @@ -82,10 +82,10 @@ int main(int argc, char **argv) options.float_type = sizeof(DOUBLE); #if defined(_OPENMP) + nthreads = 4; const char argnames[][30]={"file","format","binfile","boxsize","pimax","mu_max","nmu_bins","Nthreads"}; #else const char argnames[][30]={"file","format","binfile","boxsize","pimax","mu_max","nmu_bins"}; - nthreads = 4; #endif int nargs=sizeof(argnames)/(sizeof(char)*30); diff --git a/utils/utils.c b/utils/utils.c index 3de34e01..5221fbd1 100644 --- a/utils/utils.c +++ b/utils/utils.c @@ -109,7 +109,6 @@ int setup_bins_double(const char *fname,double *rmin,double *rmax,int *nbin,doub //the form of the data file should be const int MAXBUFSIZE=1000; char buf[MAXBUFSIZE]; - FILE *fp=NULL; double low,hi; const char comment='#'; const int nitems=2; @@ -117,7 +116,7 @@ int setup_bins_double(const char *fname,double *rmin,double *rmax,int *nbin,doub *nbin = ((int) getnumlines(fname,comment))+1; *rupp = my_calloc(sizeof(double),*nbin+1); - fp = my_fopen(fname,"r"); + FILE *fp = my_fopen(fname,"r"); if(fp == NULL) { free(*rupp); return EXIT_FAILURE; From 0ecd2912cf86a842881eb122a156c56fd61aec5b Mon Sep 17 00:00:00 2001 From: Manodeep Sinha Date: Tue, 25 Jun 2019 10:56:50 +1000 Subject: [PATCH 10/14] Modernised the readme and added the OpenMP on OSX to the FAQ --- FAQ | 131 ++++++++++++++++++++++++++++++++--------------------- README.rst | 113 +++++++++++++++++++++++---------------------- 2 files changed, 140 insertions(+), 104 deletions(-) diff --git a/FAQ b/FAQ index 71a6e83e..f4ef1674 100644 --- a/FAQ +++ b/FAQ @@ -3,92 +3,92 @@ A. Problems with the python interface on MAC: 1. You are getting the error message "Fatal Python error: PyThreadState_Get: no current thread" when you do "python call_correlation_functions.py" -This is usually caused by a python library mis-match between linking time and run-time, i.e., +This is usually caused by a python library mis-match between linking time and run-time, i.e., the python shared extension (_countpairs.so in the python_bindings/ subdir) was built with a -python lib version that is not the first one in your search path. The way I resolved it on -my MAC was by adding the path: "python-config --prefix"/lib to the dynamic library -path environment variable. +python lib version that is not the first one in your search path. The way I resolved it on +my MAC was by adding the path: "python-config --prefix"/lib to the dynamic library +path environment variable. -Typically, this happens when the output of "otool -L _countpairs.so" does not show -a full path to libpythonX.X.dylib. +Typically, this happens when the output of "otool -L _countpairs.so" does not show +a full path to libpythonX.X.dylib. -In my case, I use the anaconda distribution for python at link-time but I pick +In my case, I use the anaconda distribution for python at link-time but I pick up the macports install at run-time. The exact command to fix the situation is: Option 1. ---------- -Change the rpath in the shared C extension library: +Change the rpath in the shared C extension library: "install_name_tool -change libpythonX.X.dylib `python-config --prefix`/lib/libpythonX.X.dylib _countpairs.so" -"otool -L _countpairs.so" should show the full path to the libpythonX.X.dylib file. +"otool -L _countpairs.so" should show the full path to the libpythonX.X.dylib file. -Option 2. +Option 2. ---------- -Add to the environment variable - but this is not fool-proof. +Add to the environment variable - but this is not fool-proof. "export DYLD_FALLBACK_LIBRARY_PATH=`python-config --prefix`/lib:$DYLD_FALLBACK_LIBRARY_PATH" Option 3. ---------- If that does not work, try creating a symbolic link in the python_bindings directory: -"ln -s `python-config --prefix`/lib/libpythonX.X.dylib" +"ln -s `python-config --prefix`/lib/libpythonX.X.dylib" -(where I have omitted the sym-link name, defaults to the actual filename.) +(where I have omitted the sym-link name, defaults to the actual filename.) However, if you go this sym-link route, then the sym-link *has* to be in the directory -where the python code is situated - I do not know of any work-arounds (short of -creating the sym-link in a directory that's in the dynamic library path). +where the python code is situated - I do not know of any work-arounds (short of +creating the sym-link in a directory that's in the dynamic library path). B. Problems compiling with gcc on a MAC: If you see errors like : "no such instruction: `vmovsd 48(%rsp), %xmm0'" -this is because the gcc assembler does not support AVX yet. clang does -- -one way of getting around this problem is to use the clang assembler -instead of the gcc assembler. +this is because the gcc assembler does not support AVX yet. clang does -- +one way of getting around this problem is to use the clang assembler +instead of the gcc assembler. -Make a backup of the gcc assembler (/opt/local/bin/as) and then create -a new file with this content (taken from here: (http://stackoverflow.com/questions/9840207/how-to-use-avx-pclmulqdq-on-mac-os-x-lion) +Make a backup of the gcc assembler (/opt/local/bin/as) and then create +a new file with this content (taken from here: (http://stackoverflow.com/questions/9840207/how-to-use-avx-pclmulqdq-on-mac-os-x-lion) and here (https://gist.github.com/xianyi/2957847 as modified here: https://gist.github.com/ancapdev/8059572): ---------------------- -#!/bin/sh -HAS_INPUT_FILE=0 -ARGS=$@ -while [ $# -ne 0 ]; do - ARG=$1 - # Skip options - if [ $ARG == "-arch" ] || [ $ARG == "-o" ]; then - # Skip next token - shift - shift - continue - fi - - if [ `echo $ARG | head -c1` == "-" ]; then - shift - continue - fi - - HAS_INPUT_FILE=1 - break -done - -if [ $HAS_INPUT_FILE -eq 1 ]; then - clang -Qunused-arguments -c -x assembler $ARGS -else - clang -Qunused-arguments -c -x assembler $ARGS - -fi +#!/bin/sh +HAS_INPUT_FILE=0 +ARGS=$@ +while [ $# -ne 0 ]; do + ARG=$1 + # Skip options + if [ $ARG == "-arch" ] || [ $ARG == "-o" ]; then + # Skip next token + shift + shift + continue + fi + + if [ `echo $ARG | head -c1` == "-" ]; then + shift + continue + fi + + HAS_INPUT_FILE=1 + break +done + +if [ $HAS_INPUT_FILE -eq 1 ]; then + clang -Qunused-arguments -c -x assembler $ARGS +else + clang -Qunused-arguments -c -x assembler $ARGS - +fi ---------------------- -Using the clang assembler is now default when compiling with gcc on a MAC. -This is achieved in [common.mk](common.mk) by adding the compiler flag -Wa,-q. +Using the clang assembler is now default when compiling with gcc on a MAC. +This is achieved in [common.mk](common.mk) by adding the compiler flag -Wa,-q. However, if this causes errors and you are confident your gcc version supports -AVX instructions, then you can remove that -Wa,-q flag from common.mk. +AVX instructions, then you can remove that -Wa,-q flag from common.mk. Note, that a normal install of XCode command line tools will have clang masquerading -as gcc. You can tell if gcc --version mentions clang. +as gcc. You can tell if gcc --version mentions clang. C. Problems running tests with conda gcc on a MAC: @@ -103,3 +103,32 @@ then, the fix is, as above, to use `install_name_tool -change @rpath/./libgomp.1 and then rerun "make tests". +D. OpenMP on MAC OSX +---------------------- + +There are a wide variety of possible compiler options on OSX. The default `clang` compiler supplied by +Apple (residing in `/usr/bin/clang`) is quite old and does not support OpenMP. Somewhat frustratingly, +Apple also has aliased `gcc` to point to `clang` by default; which means we need to access a more recent +compiler to build the `Corrfunc` libraries with OpenMP support. There are a multitude of options for the compiler (`clang/gcc/icc`), +an associated OpenMP runtime library (`libomp/libgomp/libiomp`), and installation sources (`homebrew/macports/conda/OS supplied/binary distro`). +Typically, the choice of the compiler sets the choice of the OpenMP library; however, all possible combinations between the source and +the compiler can be expected. Such a vast array of options makes reliable (automatic) detection and installation of OpenMP +enabled `Corrfunc`. Here we will outline some of the steps to get an OpenMP install of `Corrfunc`, depending on the preferred +package management scheme: + + * Homebrew -- Discussion was here: https://github.com/manodeep/Corrfunc/issues/172 + - `brew install clang` + - `brew install libomp` + - Add `CFLAGS ?= -Xpreprocessor -fopenmp` in `common.mk` + - Add `CLINK ?= -lomp` in `common.mk` + - If kernel crashes, add `os.environ['KMP_DUPLICATE_LIB_OK']='True'` + + * Macports -- All you need is a compatible version of `clang` (any version of `clang >= 3.8` will suffice) + - `sudo port install clang` + + * OS supplied -- If you are unable to upgrade the compiler and are stuck with the OSX supplied `clang`, then chances + might be slim to get an OpenMP build of `Corrfunc`. If you do solve the issue, then please update this line and + send in a pull request on GitHub. Such a pull request will be genuinely appreciated. + + * Binary distro -- You will need to add appropriate flags to `CFLAGS` and `CLINK` at the top of `common.mk`. You might + find it helpful to follow the steps for homebrew diff --git a/README.rst b/README.rst index 0bea2b54..1ac0eeb0 100644 --- a/README.rst +++ b/README.rst @@ -84,6 +84,14 @@ Alternate Install Method The python package is directly installable via ``pip install Corrfunc``. However, in that case you will lose the ability to recompile the code according to your needs. Installing via ``pip`` is **not** recommended, please open an install issue on this repo first; doing so helps improve the code-base and saves future users from running into similar install issues. +OpenMP on OSX +-------------- + +Automatically detecting OpenMP support from the compiler and the runtime is a +bit tricky. If you run into any issues compiling (or running) with OpenMP, +please refer to the `FAQ `__ for potential solutions. + + Installation notes ------------------ @@ -166,9 +174,6 @@ Theory (in `theory.options `__) bin. Can be a massive performance hit (~ 2.2x in case of wp). Disabled by default. -3. ``DOUBLE_PREC`` -- switches on calculations in double precision. Disabled - by default (i.e., calculations are performed in single precision by default). - Mocks (in `mocks.options `__) ---------------------------------------------- @@ -179,30 +184,20 @@ Mocks (in `mocks.options `__) be extremely slow (~5x) depending on compiler, and CPU capabilities. Disabled by default. -3. ``DOUBLE_PREC`` -- switches on calculations in double precision. Disabled - by default (i.e., calculations are performed in single precision by default). - -4. ``LINK_IN_DEC`` -- creates binning in declination for ``DDtheta``. Please +3. ``LINK_IN_DEC`` -- creates binning in declination for ``DDtheta_mocks``. Please check that for your desired limits ``\theta``, this binning does not produce incorrect results (due to numerical precision). Generally speaking, if your ``\thetamax`` (the max. ``\theta`` to consider pairs within) is too small (probaly less than 1 degree), then you should check with and without this option. Errors are typically sub-percent level. -5. ``LINK_IN_RA`` -- creates binning in RA once binning in DEC has been - enabled. Same numerical issues as ``LINK_IN_DEC`` +4. ``LINK_IN_RA`` -- creates binning in RA once binning in DEC has been + enabled for ``DDtheta_mocks``. Same numerical issues as ``LINK_IN_DEC`` -6. ``FAST_DIVIDE`` -- Disabled by default. Divisions are slow but required - ``DD(r_p,\pi)``. Enabling this option, replaces - the divisions with a reciprocal followed by a Newton-Raphson. The code - will run ~20% faster at the expense of some numerical precision. - Please check that the loss of precision is not important for your - use-case. - -7. ``FAST_ACOS`` -- Relevant only when ``OUTPUT_THETAAVG`` is enabled. Disabled - by default. An ``arccos`` is required to calculate ``<\theta>``. In absence of vectorized - ``arccos`` (intel compiler, ``icc`` provides one via intel Short Vector Math - Library), this calculation is extremely slow. However, we can approximate +5. ``FAST_ACOS`` -- Relevant only when ``OUTPUT_THETAAVG`` is enabled for + ``DDtheta_mocks``. Disabled by default. An ``arccos`` is required to + calculate ``<\theta>``. In absence of vectorized ``arccos`` (intel compiler, + ``icc`` provides one via intel Short Vector Math Library), this calculation is extremely slow. However, we can approximate ``arccos`` using polynomials (with `Remez Algorithm `_). The approximations are taken from implementations released by `Geometric Tools `_. Depending on the level of accuracy desired, this implementation of ``fast acos`` @@ -210,13 +205,57 @@ Mocks (in `mocks.options `__) accurate implementation is already present in that file. Please check that the loss of precision is not important for your use-case. -8. ``COMOVING_DIST`` -- Currently there is no support in ``Corrfunc`` for different cosmologies. However, for the +6. ``COMOVING_DIST`` -- Currently there is no support in ``Corrfunc`` for different cosmologies. However, for the mocks routines like, ``DDrppi_mocks`` and ``vpf_mocks``, cosmology parameters are required to convert between redshift and co-moving distance. Both ``DDrppi_mocks`` and ``vpf_mocks`` expects to receive a ``redshift`` array as input; however, with this option enabled, the ``redshift`` array will be assumed to contain already converted co-moving distances. So, if you have redshifts and want to use an arbitrary cosmology, then convert the redshifts into co-moving distances, enable this option, and pass the co-moving distance array into the routines. +Common Code options for both Mocks and Theory +============================================== + +1. ``DOUBLE_PREC`` -- switches on calculations in double + precision. Calculations are performed in double precision when enabled. This + option is disabled by default in theory and enabled by default in the mocks + routines. + +2. ``USE_OMP`` -- uses OpenMP parallelization. Scaling is great for DD + (close to perfect scaling up to 12 threads in our tests) and okay (runtime + becomes constant ~6-8 threads in our tests) for ``DDrppi`` and ``wp``. + Enabled by default. The ``Makefile`` will compare the `CC` variable with + known OpenMP enabled compilers and set compile options accordingly. + Set in `common.mk `__ by default. + +3. ``ENABLE_MIN_SEP_OPT`` -- uses some further optimisations based on the + minimum separation between pairs of cells. Enabled by default. + +4. ``COPY_PARTICLES`` -- whether or not to create a copy of the particle + positions (and weights, if supplied). Enabled by default (copies of the + particle arrays **are** created) + +5. ``FAST_DIVIDE`` -- Disabled by default. Divisions are slow but required + ``DDrppi_mocks(r_p,\pi)``, ``DDsmu_mocks(s, \mu)`` and ``DD(s, \mu)``. + Enabling this option, replaces the divisions with a reciprocal + followed by a Newton-Raphson. The code will run ~20% faster at the expense + of some numerical precision. Please check that the loss of precision is not + important for your use-case. + +*Optimization for your architecture* + +1. The values of ``bin_refine_factor`` and/or ``zbin_refine_factor`` in + the ``countpairs\_\*.c`` files control the cache-misses, and + consequently, the runtime. In my trial-and-error methods, I have seen + any values larger than 3 are generally slower for theory routines but + can be faster for mocks. But some different + combination of 1/2 for ``(z)bin_refine_factor`` might be faster on + your platform. + +2. If you are using the angular correlation function and need ``thetaavg``, + you might benefit from using the INTEL MKL library. The vectorized + trigonometric functions provided by MKL can provide significant speedup. + + Running the codes ================= @@ -294,38 +333,6 @@ use the C extensions directly. Here are a few examples: print(wp_results) -Common Code options for both Mocks and Cosmological Boxes -========================================================= - -1. ``USE_OMP`` -- uses OpenMP parallelization. Scaling is great for DD - (close to perfect scaling up to 12 threads in our tests) and okay (runtime - becomes constant ~6-8 threads in our tests) for ``DDrppi`` and ``wp``. - Enabled by default. The ``Makefile`` will compare the `CC` variable with - known OpenMP enabled compilers and set compile options accordingly. - Set in `common.mk `__ by default. - -2. ``ENABLE_MIN_SEP_OPT`` -- uses some further optimisations based on the - minimum separation between pairs of cells. Enabled by default. - -3. ``COPY_PARTICLES`` -- whether or not to create a copy of the particle - positions (and weights, if supplied). Enabled by default (copies of the - particle arrays **are** created) - -*Optimization for your architecture* - -1. The values of ``bin_refine_factor`` and/or ``zbin_refine_factor`` in - the ``countpairs\_\*.c`` files control the cache-misses, and - consequently, the runtime. In my trial-and-error methods, I have seen - any values larger than 3 are generally slower for theory routines but - can be faster for mocks. But some different - combination of 1/2 for ``(z)bin_refine_factor`` might be faster on - your platform. - -2. If you are using the angular correlation function and need ``thetaavg``, - you might benefit from using the INTEL MKL library. The vectorized - trigonometric functions provided by MKL can provide significant speedup. - - Author & Maintainers ===================== From 9446bef31069019cf54f6082ceae89cc275b5d34 Mon Sep 17 00:00:00 2001 From: Manodeep Sinha Date: Wed, 26 Jun 2019 11:37:44 +1000 Subject: [PATCH 11/14] Fixed build failure from improper initialisation --- mocks/examples/run_correlations_mocks.c | 3 +-- theory/examples/run_correlations.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/mocks/examples/run_correlations_mocks.c b/mocks/examples/run_correlations_mocks.c index 46c280bc..069e953a 100644 --- a/mocks/examples/run_correlations_mocks.c +++ b/mocks/examples/run_correlations_mocks.c @@ -98,8 +98,6 @@ int main(int argc, char **argv) nmu_bins=atoi(argv[7]); #if defined(_OPENMP) nthreads = atoi(argv[8]); -#else - nthreads=1; #endif } } else { @@ -110,6 +108,7 @@ int main(int argc, char **argv) cosmology=1; mu_max=1.0; nmu_bins=10; + nthreads=1; } fprintf(stderr,ANSI_COLOR_BLUE "Running `%s' with the parameters \n",argv[0]); diff --git a/theory/examples/run_correlations.c b/theory/examples/run_correlations.c index 94296cac..14b00086 100644 --- a/theory/examples/run_correlations.c +++ b/theory/examples/run_correlations.c @@ -106,8 +106,6 @@ int main(int argc, char **argv) nmu_bins=atoi(argv[7]); #if defined(_OPENMP) nthreads = atoi(argv[8]); -#else - nthreads = 1; #endif } } else { @@ -118,6 +116,7 @@ int main(int argc, char **argv) pimax=40.0; mu_max=1.0; nmu_bins=10; + nthreads=1; } fprintf(stderr,ANSI_COLOR_BLUE "Running `%s' with the parameters \n",argv[0]); From 206c2f850ce0ce0056a4933002543ac723efe246 Mon Sep 17 00:00:00 2001 From: Manodeep Sinha Date: Fri, 12 Jul 2019 07:05:47 +1000 Subject: [PATCH 12/14] Attempting to fix build failure on OSX --- mocks/examples/run_correlations_mocks.c | 6 ++---- theory/examples/run_correlations.c | 4 +--- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/mocks/examples/run_correlations_mocks.c b/mocks/examples/run_correlations_mocks.c index 069e953a..842569c2 100644 --- a/mocks/examples/run_correlations_mocks.c +++ b/mocks/examples/run_correlations_mocks.c @@ -39,7 +39,7 @@ void Printhelp(void); void Printhelp(void) { fprintf(stderr,ANSI_COLOR_RED "=========================================================================\n") ; - fprintf(stderr," --- run_correlations_mocks file format binfile boxsize [Nthreads] \n") ; + fprintf(stderr," --- run_correlations_mocks file format binfile boxsize Nthreads \n") ; fprintf(stderr," --- Measure the auto-correlation functions DD(r), DD(rp, pi) and wp(rp) for a single file\n"); fprintf(stderr," * file = name of data file\n") ; fprintf(stderr," * format = format of data file (a=ascii, f=fast-food)\n") ; @@ -63,7 +63,7 @@ int main(int argc, char **argv) struct timeval t0,t1; DOUBLE pimax; int cosmology=1; - int nthreads; + int nthreads=4; int nmu_bins; DOUBLE mu_max; @@ -74,7 +74,6 @@ int main(int argc, char **argv) options.float_type = sizeof(*ra1); #if defined(_OPENMP) - nthreads=4;//default to 4 threads const char argnames[][30]={"file","format","binfile","pimax","cosmology","mu_max", "nmu_bins", "Nthreads"}; #else const char argnames[][30]={"file","format","binfile","pimax","cosmology", "mu_max", "nmu_bins"}; @@ -108,7 +107,6 @@ int main(int argc, char **argv) cosmology=1; mu_max=1.0; nmu_bins=10; - nthreads=1; } fprintf(stderr,ANSI_COLOR_BLUE "Running `%s' with the parameters \n",argv[0]); diff --git a/theory/examples/run_correlations.c b/theory/examples/run_correlations.c index 14b00086..6692caa7 100644 --- a/theory/examples/run_correlations.c +++ b/theory/examples/run_correlations.c @@ -74,7 +74,7 @@ int main(int argc, char **argv) DOUBLE pimax; DOUBLE mu_max=1.0; int nmu_bins=10; - int nthreads;//default to single thread + int nthreads=4;//default to 4 threads struct config_options options = get_config_options(); options.verbose = 1; @@ -82,7 +82,6 @@ int main(int argc, char **argv) options.float_type = sizeof(DOUBLE); #if defined(_OPENMP) - nthreads = 4; const char argnames[][30]={"file","format","binfile","boxsize","pimax","mu_max","nmu_bins","Nthreads"}; #else const char argnames[][30]={"file","format","binfile","boxsize","pimax","mu_max","nmu_bins"}; @@ -116,7 +115,6 @@ int main(int argc, char **argv) pimax=40.0; mu_max=1.0; nmu_bins=10; - nthreads=1; } fprintf(stderr,ANSI_COLOR_BLUE "Running `%s' with the parameters \n",argv[0]); From f0fa2b2de6138c7d66198789aa3df86a82f03808 Mon Sep 17 00:00:00 2001 From: Manodeep Sinha Date: Fri, 12 Jul 2019 08:24:19 +1000 Subject: [PATCH 13/14] Added a changelog entry --- CHANGES.rst | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 46414be7..4a4f455b 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,4 +1,4 @@ -3.0.0 (upcoming) +3.0.0 (future) ================= **Breaking Changes** @@ -10,6 +10,16 @@ New features - conda installable package - GPU version + +2.3.2 (upcoming) +================ + +Enhancements +------------ +- Code quality fixes for errors reported by codacy [#189] + + + 2.3.1 (2019-06-21) ================ @@ -83,13 +93,13 @@ New features - Both the API and ABI should be future proof - Extensive docs (first version with docs) - Arbitrary cosmology can be accounted for in the mocks routines `#71 `_ - + **Breaking Changes** --------------------- - API has changed from previous version. Two additional inputs are now required for every statistic (`#73 `_) - + Enhancements ------------ @@ -107,7 +117,7 @@ Bug fixes - Fixed bug in ``DDrppi_mocks`` where the minimum number of grid cells had to be 1 `#70 `_ - + Outstanding issues @@ -129,10 +139,10 @@ Outstanding issues 1.0.0 (2016-04-14) ================== -- Improved installation process +- Improved installation process - Detecting ``AVX`` capable CPU at compile time - Double-counting bug fixes in ``wp`` and ``xi`` - + 0.2.3 (2016-03-30) ================== @@ -157,11 +167,10 @@ Outstanding issues ================== - Python 2/3 compatible - + 0.0.1 (2015-11-11) ================== - Initial release - From e53bce3050139337dae5723a3601e707f1f37920 Mon Sep 17 00:00:00 2001 From: Manodeep Sinha Date: Sat, 24 Aug 2019 08:24:40 +1000 Subject: [PATCH 14/14] Fixing #192 --- utils/gridlink_impl.c.src | 22 ++++++++++++++++++++++ utils/gridlink_mocks_impl.c.src | 26 +------------------------- utils/gridlink_utils.c.src | 8 -------- utils/gridlink_utils.h.src | 27 +++++++++++++++++++++++++++ 4 files changed, 50 insertions(+), 33 deletions(-) diff --git a/utils/gridlink_impl.c.src b/utils/gridlink_impl.c.src index f30dbf40..fd15b000 100644 --- a/utils/gridlink_impl.c.src +++ b/utils/gridlink_impl.c.src @@ -446,6 +446,20 @@ struct cell_pair_DOUBLE * generate_cell_pairs_DOUBLE(struct cellarray_DOUBLE *la "Reducing bin refine factors might help. Requested for %"PRId64" elements " "with each element of size %zu bytes\n", max_num_cell_pairs, sizeof(*all_cell_pairs)); + + /* Under periodic boundary conditions + small nmesh_x/y/z, the periodic wrapping would cause + the same cell to be included as an neighbour cell from both the left and the right side (i.e., + when including cells with -bin_refine_factor, and when including cells up to +bin_refine_factor) + + Previously this would throw an error, but we can simply not add the duplicate cells. + + Raised in issue# 192 (https://github.com/manodeep/Corrfunc/issues/192) + + MS: 23/8/2019 + */ + const int check_for_duplicate_ngb_cells = (periodic == 1 && (nmesh_x < (2*xbin_refine_factor + 1) || + nmesh_y < (2*ybin_refine_factor + 1) || + nmesh_z < (2*zbin_refine_factor + 1))) ? 1:0; for(int64_t icell=0;icellnelements == 0) continue; @@ -456,6 +470,7 @@ struct cell_pair_DOUBLE * generate_cell_pairs_DOUBLE(struct cellarray_DOUBLE *la ANSI_COLOR_RED"BUG: Index reconstruction is wrong. icell = %"PRId64" reconstructed index = %"PRId64 ANSI_COLOR_RESET"\n", icell, (ix * nmesh_y * nmesh_z + iy * nmesh_z + (int64_t) iz)); + int64_t num_ngb_this_cell = 0; for(int iix=-xbin_refine_factor;iix<=xbin_refine_factor;iix++){ const int periodic_ix = (ix + iix + nmesh_x) % nmesh_x; const int non_periodic_ix = ix + iix; @@ -486,6 +501,12 @@ struct cell_pair_DOUBLE * generate_cell_pairs_DOUBLE(struct cellarray_DOUBLE *la continue; } + //Check if the ngb-cell has already been added - can happen under periodic boundary + //conditions, with small value of nmesh_x/y/z (ie large Rmax relative to BoxSize) + if(check_for_duplicate_ngb_cells) { + CHECK_AND_CONTINUE_FOR_DUPLICATE_NGB_CELLS_DOUBLE(icell, icell2, num_cell_pairs, num_ngb_this_cell, all_cell_pairs); + } + struct cellarray_DOUBLE *second = &(lattice2[icell2]); DOUBLE closest_x1 = ZERO, closest_y1 = ZERO, closest_z1 = ZERO; DOUBLE min_dx = ZERO, min_dy = ZERO, min_dz = ZERO; @@ -568,6 +589,7 @@ struct cell_pair_DOUBLE * generate_cell_pairs_DOUBLE(struct cellarray_DOUBLE *la this_cell_pair->same_cell = (autocorr == 1 && icell2 == icell) ? 1:0; num_cell_pairs++; + num_ngb_this_cell++; } //looping over neighbours in Z } //looping over neighbours along Y }//looping over neighbours along X diff --git a/utils/gridlink_mocks_impl.c.src b/utils/gridlink_mocks_impl.c.src index 00511bd4..56047608 100644 --- a/utils/gridlink_mocks_impl.c.src +++ b/utils/gridlink_mocks_impl.c.src @@ -1584,31 +1584,7 @@ struct cell_pair_DOUBLE * generate_cell_pairs_mocks_theta_ra_dec_DOUBLE(cellarra continue; } - int duplicate_flag = 0; - XRETURN(num_cell_pairs - num_ngb_this_cell >= 0, NULL, - "Error: While working on detecting (potential) duplicate cell-pairs on primary cell = %"PRId64"\n" - "The total number of cell-pairs (across all primary cells) = %"PRId64" should be >= the number of cell-pairs for " - "this primary cell = %"PRId64"\n", icell, num_cell_pairs, num_ngb_this_cell); - - for(int jj=0;jjcellindex1 == icell, NULL, - "Error: While working on detecting (potential) duplicate cell-pairs on primary cell = %"PRId64"\n" - "For cell-pair # %"PRId64", the primary cellindex (within cell-pair) = %"PRId64" should be *exactly* " - "equal to current primary cellindex = %"PRId64". Num_cell_pairs = %"PRId64" num_ngb_this_cell = %"PRId64"\n", - icell, num_cell_pairs - jj, this_cell_pair->cellindex1, icell, num_cell_pairs, num_ngb_this_cell); - //Check if the pointer to X within the cell-pair - //is pointing to this secondary cell's X-array. If so, we - //have already assigned - if (this_cell_pair->cellindex2 == icell2) { - duplicate_flag = 1; - break; - } - } - - if(duplicate_flag == 1) { - continue; - } + CHECK_AND_CONTINUE_FOR_DUPLICATE_NGB_CELLS_DOUBLE(icell, icell2, num_cell_pairs, num_ngb_this_cell, all_cell_pairs); XRETURN(num_cell_pairs < max_num_cell_pairs, NULL, "Error: Assigning this existing cell-pair would require accessing invalid memory.\n" diff --git a/utils/gridlink_utils.c.src b/utils/gridlink_utils.c.src index 7f5c09e7..7b72146c 100644 --- a/utils/gridlink_utils.c.src +++ b/utils/gridlink_utils.c.src @@ -34,14 +34,6 @@ int get_binsize_DOUBLE(const DOUBLE xmin,const DOUBLE xmax, const DOUBLE rmax, c const DOUBLE xdiff = (options->periodic && options->boxsize > 0) ? options->boxsize:(xmax-xmin); int nmesh=(int)(refine_factor*xdiff/rmax) ; nmesh = nmesh < 1 ? 1:nmesh; - if(options->periodic == 1) { - if (nmesh<(2*refine_factor+1)) { - fprintf(stderr,"%s> ERROR: nlattice = %d is so small that with periodic wrapping the same cells will be counted twice ....exiting\n",__FILE__,nmesh) ; - fprintf(stderr,"%s> Please reduce Rmax = %"REAL_FORMAT" to be a smaller fraction of the particle distribution region = %"REAL_FORMAT"\n", - __FILE__,rmax, xdiff); - return EXIT_FAILURE; - } - } if (nmesh>max_ncells) nmesh=max_ncells; *xbinsize = xdiff/nmesh; diff --git a/utils/gridlink_utils.h.src b/utils/gridlink_utils.h.src index ee2ee4c7..7509281a 100644 --- a/utils/gridlink_utils.h.src +++ b/utils/gridlink_utils.h.src @@ -41,6 +41,33 @@ extern "C" { const DOUBLE second_xbounds[2], const DOUBLE second_ybounds[2], const DOUBLE second_zbounds[2], DOUBLE *sqr_sep_min, DOUBLE *sqr_sep_max); + +#define CHECK_AND_CONTINUE_FOR_DUPLICATE_NGB_CELLS_DOUBLE(icell, icell2, num_cell_pairs, num_ngb_this_cell, all_cell_pairs) { \ + int duplicate_flag = 0; \ + XRETURN(num_cell_pairs - num_ngb_this_cell >= 0, NULL, \ + "Error: While working on detecting (potential) duplicate cell-pairs on primary cell = %"PRId64"\n" \ + "The total number of cell-pairs (across all primary cells) = %"PRId64" should be >= the number of cell-pairs for " \ + "this primary cell = %"PRId64"\n", icell, num_cell_pairs, num_ngb_this_cell); \ + \ + for(int jj=0;jjcellindex1 == icell, NULL, \ + "Error: While working on detecting (potential) duplicate cell-pairs on primary cell = %"PRId64"\n" \ + "For cell-pair # %"PRId64", the primary cellindex (within cell-pair) = %"PRId64" should be *exactly* " \ + "equal to current primary cellindex = %"PRId64". Num_cell_pairs = %"PRId64" num_ngb_this_cell = %"PRId64"\n", \ + icell, num_cell_pairs - jj, this_cell_pair->cellindex1, icell, num_cell_pairs, num_ngb_this_cell); \ + if (this_cell_pair->cellindex2 == icell2) { \ + duplicate_flag = 1; \ + break; \ + } \ + } \ + if(duplicate_flag == 1) continue; \ + } + + + + + #ifdef __cplusplus } #endif