From b3167eeefea4e70806ceb91c0369357aa2c0f734 Mon Sep 17 00:00:00 2001
From: Kris Stern <krisastern@gobuddy.asia>
Date: Sun, 1 Mar 2020 23:32:17 +0800
Subject: [PATCH 01/16] Add find_fastest_wp_bin_refs() function

Make modifications as per reviewer suggestion

Correct typo in L15 of DDtheta_mocks.py

Resolve PEP 8 issues

Resolve unused variable error

Get rid of unused lines

Make changes according to reviewer's suggestions

Update in-code example

Make changes to conform to PEP 8 standards

Add new author name to author list in file

Make changes to how three different (link_in_dec, link_in_ra) cases are handled

Remove unnecessary continue as it was not properly in loop

Modify code to show print statement in L555 where applicable
---
 Corrfunc/mocks/DDtheta_mocks.py | 349 +++++++++++++++++++++++++++++---
 Corrfunc/theory/wp.py           |   2 +-
 2 files changed, 323 insertions(+), 28 deletions(-)

diff --git a/Corrfunc/mocks/DDtheta_mocks.py b/Corrfunc/mocks/DDtheta_mocks.py
index 9e23dac7..db7ec9e7 100644
--- a/Corrfunc/mocks/DDtheta_mocks.py
+++ b/Corrfunc/mocks/DDtheta_mocks.py
@@ -11,8 +11,8 @@
 from __future__ import (division, print_function, absolute_import,
                         unicode_literals)
 
-__author__ = ('Manodeep Sinha')
-__all__ = ('DDtheta_mocks',)
+__author__ = ('Manodeep Sinha', 'Kris Akira Stern')
+__all__ = ('DDtheta_mocks', 'find_fastest_DDtheta_mocks_bin_refs')
 
 
 def DDtheta_mocks(autocorr, nthreads, binfile,
@@ -211,7 +211,7 @@ def DDtheta_mocks(autocorr, nthreads, binfile,
         the time spent within the C library and ignores all python overhead.
 
     Example
-    --------
+    -------
 
     >>> from __future__ import print_function
     >>> import numpy as np
@@ -239,7 +239,8 @@ def DDtheta_mocks(autocorr, nthreads, binfile,
     ...                         weights1=weights, weight_type='pair_product',
     ...                         link_in_dec=link_in_dec, link_in_ra=link_in_ra,
     ...                         isa=isa, verbose=True)
-    >>> for r in results: print("{0:10.6f} {1:10.6f} {2:10.6f} {3:10d} {4:10.6f}".
+    >>> for r in results:
+    ...    print("{0:10.6f} {1:10.6f} {2:10.6f} {3:10d} {4:10.6f}".
     ...                         format(r['thetamin'], r['thetamax'],
     ...                         r['thetaavg'], r['npairs'], r['weightavg']))
     ...                         # doctest: +NORMALIZE_WHITESPACE
@@ -267,34 +268,35 @@ def DDtheta_mocks(autocorr, nthreads, binfile,
     """
 
     try:
-        from Corrfunc._countpairs_mocks import countpairs_theta_mocks as\
+        from Corrfunc._countpairs_mocks import countpairs_theta_mocks as \
             DDtheta_mocks_extn
     except ImportError:
-        msg = "Could not import the C extension for the angular "\
+        msg = "Could not import the C extension for the angular " \
               "correlation function for mocks."
         raise ImportError(msg)
 
     import numpy as np
-    from Corrfunc.utils import translate_isa_string_to_enum, fix_ra_dec,\
-        return_file_with_rbins, convert_to_native_endian,\
+    from Corrfunc.utils import translate_isa_string_to_enum, fix_ra_dec, \
+        return_file_with_rbins, convert_to_native_endian, \
         sys_pipes, process_weights
     from future.utils import bytes_to_native_str
 
     if autocorr == 0:
         if RA2 is None or DEC2 is None:
-            msg = "Must pass valid arrays for RA2/DEC2 for "\
+            msg = "Must pass valid arrays for RA2/DEC2 for " \
                   "computing cross-correlation"
             raise ValueError(msg)
     else:
         RA2 = np.empty(1)
         DEC2 = np.empty(1)
 
-    weights1, weights2 = process_weights(weights1, weights2, RA1, RA2, weight_type, autocorr)
+    weights1, weights2 = \
+        process_weights(weights1, weights2, RA1, RA2, weight_type, autocorr)
 
     # Ensure all input arrays are native endian
     RA1, DEC1, weights1, RA2, DEC2, weights2 = [
-            convert_to_native_endian(arr, warn=True) for arr in
-            [RA1, DEC1, weights1, RA2, DEC2, weights2]]
+        convert_to_native_endian(arr, warn=True) for arr in
+        [RA1, DEC1, weights1, RA2, DEC2, weights2]]
 
     fix_ra_dec(RA1, DEC1)
     if autocorr == 0:
@@ -303,7 +305,6 @@ def DDtheta_mocks(autocorr, nthreads, binfile,
     if link_in_ra is True:
         link_in_dec = True
 
-
     # Passing None parameters breaks the parsing code, so avoid this
     kwargs = {}
     for k in ['weights1', 'weights2', 'weight_type', 'RA2', 'DEC2']:
@@ -314,20 +315,21 @@ def DDtheta_mocks(autocorr, nthreads, binfile,
     integer_isa = translate_isa_string_to_enum(isa)
     rbinfile, delete_after_use = return_file_with_rbins(binfile)
     with sys_pipes():
-      extn_results = DDtheta_mocks_extn(autocorr, nthreads, rbinfile,
-                                        RA1, DEC1,
-                                        verbose=verbose,
-                                        link_in_dec=link_in_dec,
-                                        link_in_ra=link_in_ra,
-                                        output_thetaavg=output_thetaavg,
-                                        fast_acos=fast_acos,
-                                        ra_refine_factor=ra_refine_factor,
-                                        dec_refine_factor=dec_refine_factor,
-                                        max_cells_per_dim=max_cells_per_dim,
-                                        copy_particles=copy_particles,
-                                        enable_min_sep_opt=enable_min_sep_opt,
-                                        c_api_timer=c_api_timer,
-                                        isa=integer_isa, **kwargs)
+        extn_results = DDtheta_mocks_extn(
+            autocorr, nthreads, rbinfile,
+            RA1, DEC1,
+            verbose=verbose,
+            link_in_dec=link_in_dec,
+            link_in_ra=link_in_ra,
+            output_thetaavg=output_thetaavg,
+            fast_acos=fast_acos,
+            ra_refine_factor=ra_refine_factor,
+            dec_refine_factor=dec_refine_factor,
+            max_cells_per_dim=max_cells_per_dim,
+            copy_particles=copy_particles,
+            enable_min_sep_opt=enable_min_sep_opt,
+            c_api_timer=c_api_timer,
+            isa=integer_isa, **kwargs)
     if extn_results is None:
         msg = "RuntimeError occurred"
         raise RuntimeError(msg)
@@ -351,6 +353,299 @@ def DDtheta_mocks(autocorr, nthreads, binfile,
         return results, api_time
 
 
+def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
+                                        RA1, DEC1, RA2=None, DEC2=None,
+                                        link_in_dec=True, link_in_ra=True,
+                                        verbose=False, output_thetaavg=False,
+                                        max_cells_per_dim=100,
+                                        isa=r'fastest',
+                                        maxbinref=3, nrepeats=3,
+                                        return_runtimes=False):
+    """
+
+    Parameters
+    ----------
+    autocorr : boolean, required
+        Boolean flag for auto/cross-correlation. If autocorr is set to 1,
+        then the second set of particle positions are not required.
+
+    nthreads : integer
+        Number of threads to use.
+
+    binfile: string or an list/array of floats. Units: degrees.
+        For string input: filename specifying the ``theta`` bins for
+        ``DDtheta_mocks``. The file should contain white-space separated values
+        of (thetamin, thetamax)  for each ``theta`` wanted. The bins need to be
+        contiguous and sorted in increasing order (smallest bins come first).
+
+        For array-like input: A sequence of ``theta`` values that provides the
+        bin-edges. For example,
+        ``np.logspace(np.log10(0.1), np.log10(10.0), 15)`` is a valid
+        input specifying **14** (logarithmic) bins between 0.1 and 10.0
+        degrees. This array does not need to be sorted.
+
+    RA1 : array-like, real (float/double)
+        The array of Right Ascensions for the first set of points. RA1's
+        are expected to be in [0.0, 360.0], but the code will try to fix cases
+        where the RA1's are in [-180, 180.0]. For peace of mind, always supply
+        RA1's in [0.0, 360.0].
+
+        Calculations are done in the precision of the supplied arrays.
+
+    DEC1 : array-like, real (float/double)
+        Array of Declinations for the first set of points. DEC1's are expected
+        to be in the [-90.0, 90.0], but the code will try to fix cases where
+        the DEC1's are in [0.0, 180.0]. Again, for peace of mind, always supply
+        DEC1's in [-90.0, 90.0].
+        Must be of same precision type as RA1.
+
+    RA2 : array-like, real (float/double)
+        The array of Right Ascensions for the second set of points. RA2's
+        are expected to be in [0.0, 360.0], but the code will try to fix cases
+        where the RA2's are in [-180, 180.0]. For peace of mind, always supply
+        RA2's in [0.0, 360.0].
+        Must be of same precision type as RA1/DEC1.
+
+    DEC2 : array-like, real (float/double)
+        Array of Declinations for the second set of points. DEC2's are expected
+        to be in the [-90.0, 90.0], but the code will try to fix cases where
+        the DEC2's are in [0.0, 180.0]. Again, for peace of mind, always supply
+        DEC2's in [-90.0, 90.0].
+        Must be of same precision type as RA1/DEC1.
+
+    verbose : boolean (default false)
+        Boolean flag to control output of informational messages
+
+    output_thetaavg : boolean (default false)
+        Boolean flag to output the average ``\theta`` for each bin. Code will
+        run slower if you set this flag.
+
+        If you are calculating in single-precision, ``thetaavg`` will
+        suffer from numerical loss of precision and can not be trusted. If you
+        need accurate ``thetaavg`` values, then pass in double precision arrays
+        for ``RA/DEC``.
+
+    isa: string, case-insensitive (default ``fastest``)
+        Controls the runtime dispatch for the instruction set to use. Options
+        are: [``fastest``, ``avx512f``, ``avx``, ``sse42``, ``fallback``]
+
+        Setting isa to ``fastest`` will pick the fastest available instruction
+        set on the current computer. However, if you set ``isa`` to, say,
+        ``avx`` and ``avx`` is not available on the computer, then the code
+        will revert to using ``fallback`` (even though ``sse42`` might be
+        available).
+
+        Unless you are benchmarking the different instruction sets, you should
+        always leave ``isa`` to the default value. And if you *are*
+        benchmarking, then the string supplied here gets translated into an
+        ``enum`` for the instruction set defined in ``utils/defs.h``.
+
+    max_cells_per_dim: integer, default is 100, typical values in [50-300]
+        Controls the maximum number of cells per dimension. Total number of
+        cells can be up to (max_cells_per_dim)^3. Only increase if ``rpmax`` is
+        too small relative to the boxsize (and increasing helps the runtime).
+
+    maxbinref: integer (default 3)
+        The maximum ``bin refine factor`` to use along each dimension. From
+        experience, values larger than 3 do not improve ``wp`` runtime.
+
+        Runtime of module scales as ``maxbinref^3``, so change the value of
+        ``maxbinref`` with caution.
+
+    nrepeats: integer (default 3)
+        Number of times to repeat the timing for an individual run. Accounts
+        for the dispersion in runtimes on computers with multiple user
+        processes.
+
+    return_runtimes: boolean (default ``false``)
+        If set, also returns the array of runtimes.
+
+    Returns
+    -------
+    (nRA, nDEC) : tuple of integers
+        The combination of ``bin refine factors`` along each dimension that
+        produces the fastest code.
+
+    runtimes : numpy structured array
+
+        Only returned if ``return_runtimes`` is set, then the return value
+        is a tuple containing ((nRA, nDEC), runtimes). ``runtimes`` is a
+        ``numpy`` structured array containing the fields, [``nRA``, ``nDEC``,
+        ``avg_runtime``, ``sigma_time``]. Here, ``avg_runtime`` is the
+        average time, measured over ``nrepeats`` invocations, spent in
+        the python extension. ``sigma_time`` is the dispersion of the
+        run times across those ``nrepeats`` invocations.
+
+    Example
+    -------
+    >>> from __future__ import print_function
+    >>> import numpy as np
+    >>> import time
+    >>> from math import pi
+    >>> from os.path import dirname, abspath, join as pjoin
+    >>> import Corrfunc
+    >>> from Corrfunc.mocks.DDtheta_mocks \
+        import find_fastest_DDtheta_mocks_bin_refs
+    >>> binfile = pjoin(dirname(abspath(Corrfunc.__file__)),
+    ...                 "../mocks/tests/", "angular_bins")
+    >>> N = 100000
+    >>> nthreads = 4
+    >>> seed = 42
+    >>> np.random.seed(seed)
+    >>> RA1 = np.random.uniform(0.0, 2.0*pi, N)*180.0/pi
+    >>> cos_theta = np.random.uniform(-1.0, 1.0, N)
+    >>> DEC1 = 90.0 - np.arccos(cos_theta)*180.0/pi
+    >>> autocorr = 1
+    >>> best, _ = find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, \
+    ...                                               binfile, RA1, DEC1, \
+    ...                                               return_runtimes=True)
+    >>> print(best) # doctest:+SKIP
+    (2, 1)
+
+    .. note:: Since the result might change depending on the computer,
+    doctest is skipped for this function.
+    """
+
+    import logging
+
+    weights1 = None
+    weights2 = None
+    weight_type = None
+
+    try:
+        from Corrfunc._countpairs_mocks import countpairs_theta_mocks as \
+            DDtheta_mocks_extn
+    except ImportError:
+        msg0 = "Could not import the C extension for the angular " \
+              "correlation function for mocks."
+        raise ImportError(msg0)
+
+    import numpy as np
+    from Corrfunc.utils import translate_isa_string_to_enum, fix_ra_dec, \
+        return_file_with_rbins, convert_to_native_endian, process_weights
+    from future.utils import bytes_to_native_str
+    import itertools
+    import time
+
+    if autocorr == 0:
+        if RA2 is None or DEC2 is None:
+            msg1 = "Must pass valid arrays for RA2/DEC2 for " \
+                  "computing cross-correlation."
+            raise ValueError(msg1)
+    else:
+        RA2 = np.empty(1)
+        DEC2 = np.empty(1)
+
+    if link_in_ra is True:
+        link_in_dec = True
+
+    if link_in_dec is False & link_in_ra is False:
+        msg2 = "Warning: Brute-force calculation without any gridding " \
+               "is forced,as link_in_dec and link_in_ra are both set " \
+               "to False. Please be sure to turn on at least link_in_dec," \
+               "or both link_in_dec and link_in_ra on."
+        raise ValueError(msg2)
+
+    if link_in_dec is True:
+        if link_in_ra is False:
+            msg3 = "Info: Gridding in the declination only, as link_in_dec " \
+                   "is set to True while link_in_ra is set to False." \
+                   "Thus looping is only needed over the range of " \
+                   "(min, max) bin, with refinements in the declination."
+            print(msg3)
+
+    weights1, weights2 = \
+        process_weights(weights1, weights2, RA1, RA2, weight_type, autocorr)
+
+    # Ensure all input arrays are native endian
+    RA1, DEC1, weights1, RA2, DEC2, weights2 = [
+        convert_to_native_endian(arr, warn=True) for arr in
+        [RA1, DEC1, weights1, RA2, DEC2, weights2]]
+
+    fix_ra_dec(RA1, DEC1)
+    if autocorr == 0:
+        fix_ra_dec(RA2, DEC2)
+
+    # Passing None parameters breaks the parsing code, so avoid this
+    kwargs = {}
+    for k in ['weights1', 'weights2', 'weight_type', 'RA2', 'DEC2']:
+        v = locals()[k]
+        if v is not None:
+            kwargs[k] = v
+
+    integer_isa = translate_isa_string_to_enum(isa)
+    rbinfile, delete_after_use = return_file_with_rbins(binfile)
+    bin_refs = np.arange(1, maxbinref + 1)
+    bin_ref_perms = itertools.product(bin_refs, bin_refs)
+    dtype = np.dtype([(bytes_to_native_str(b'nRA'), np.int),
+                      (bytes_to_native_str(b'nDEC'), np.int),
+                      (bytes_to_native_str(b'avg_time'), np.float),
+                      (bytes_to_native_str(b'sigma_time'), np.float)])
+    all_runtimes = np.zeros(maxbinref ** 3, dtype=dtype)
+    all_runtimes[:] = np.inf
+
+    for ii, (nRA, nDEC) in enumerate(bin_ref_perms):
+        total_runtime = 0.0
+        total_sqr_runtime = 0.0
+
+        for _ in range(nrepeats):
+            t0 = time.time()
+            extn_results = DDtheta_mocks_extn(
+                autocorr, nthreads, rbinfile,
+                RA1, DEC1, RA2, DEC2,
+                link_in_dec=link_in_dec,
+                link_in_ra=link_in_ra,
+                verbose=verbose,
+                output_thetaavg=output_thetaavg,
+                ra_refine_factor=nRA,
+                dec_refine_factor=nDEC,
+                max_cells_per_dim=max_cells_per_dim,
+                isa=integer_isa)
+            t1 = time.time()
+
+            if extn_results is None:
+                msg4 = "RuntimeError occurred with perms = ({0}, {1})". \
+                    format(nRA, nDEC)
+                print(msg4)
+                print("Continuing...")
+                continue
+
+            dt = (t1 - t0)
+            total_runtime += dt
+            total_sqr_runtime += dt * dt
+
+        avg_runtime = total_runtime / nrepeats
+
+        # variance = E(X^2) - E^2(X)
+        # disp = sqrt(variance)
+        runtime_disp = np.sqrt(total_sqr_runtime / nrepeats -
+                               avg_runtime * avg_runtime)
+
+        all_runtimes[ii]['nRA'] = nRA
+        all_runtimes[ii]['nDEC'] = nDEC
+        all_runtimes[ii]['avg_time'] = avg_runtime
+        all_runtimes[ii]['sigma_time'] = runtime_disp
+
+    if delete_after_use:
+        import os
+        os.remove(rbinfile)
+
+    all_runtimes.sort(order=('avg_time', 'sigma_time'))
+    results = (all_runtimes[0]['nRA'],
+               all_runtimes[0]['nDEC'])
+
+    optional_returns = return_runtimes
+    if not optional_returns:
+        ret = results
+    else:
+        ret = (results,)
+        if return_runtimes:
+            ret += (all_runtimes,)
+
+    return ret
+
+
 if __name__ == '__main__':
     import doctest
+
     doctest.testmod()
diff --git a/Corrfunc/theory/wp.py b/Corrfunc/theory/wp.py
index 31bf9269..64d3c71e 100644
--- a/Corrfunc/theory/wp.py
+++ b/Corrfunc/theory/wp.py
@@ -116,7 +116,7 @@ def find_fastest_wp_bin_refs(boxsize, pimax, nthreads, binfile, X, Y, Z,
 
     runtimes : numpy structured array
 
-        if ``return_runtimes`` is set, then the return value is a tuple
+        Only returned if ``return_runtimes`` is set, then the return value is a tuple
         containing ((nx, ny, nz), runtimes). ``runtimes`` is a ``numpy``
         structured array containing the fields, [``nx``, ``ny``, ``nz``,
         ``avg_runtime``, ``sigma_time``]. Here, ``avg_runtime`` is the

From c845ba6097c1055ae5c4e48e0f55b8c2baa7c493 Mon Sep 17 00:00:00 2001
From: Kris Stern <krisastern@gobuddy.asia>
Date: Wed, 25 Mar 2020 20:58:26 +0800
Subject: [PATCH 02/16] Change & into and for proper Python logical operator

---
 Corrfunc/mocks/DDtheta_mocks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Corrfunc/mocks/DDtheta_mocks.py b/Corrfunc/mocks/DDtheta_mocks.py
index db7ec9e7..77a34bb1 100644
--- a/Corrfunc/mocks/DDtheta_mocks.py
+++ b/Corrfunc/mocks/DDtheta_mocks.py
@@ -539,7 +539,7 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
     if link_in_ra is True:
         link_in_dec = True
 
-    if link_in_dec is False & link_in_ra is False:
+    if link_in_dec is False and link_in_ra is False:
         msg2 = "Warning: Brute-force calculation without any gridding " \
                "is forced,as link_in_dec and link_in_ra are both set " \
                "to False. Please be sure to turn on at least link_in_dec," \

From 1154c4b29caadfbb64975f52df8125cbf5aae29e Mon Sep 17 00:00:00 2001
From: Kris Stern <krisastern@gobuddy.asia>
Date: Wed, 25 Mar 2020 21:00:52 +0800
Subject: [PATCH 03/16] Remove unused import logging statement

---
 Corrfunc/mocks/DDtheta_mocks.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/Corrfunc/mocks/DDtheta_mocks.py b/Corrfunc/mocks/DDtheta_mocks.py
index 77a34bb1..1f84987f 100644
--- a/Corrfunc/mocks/DDtheta_mocks.py
+++ b/Corrfunc/mocks/DDtheta_mocks.py
@@ -506,8 +506,6 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
     doctest is skipped for this function.
     """
 
-    import logging
-
     weights1 = None
     weights2 = None
     weight_type = None

From 240de6c923bb0f0ac9beaf7a04d919dda49af428 Mon Sep 17 00:00:00 2001
From: Kris Stern <krisastern@gobuddy.asia>
Date: Thu, 26 Mar 2020 19:42:35 +0800
Subject: [PATCH 04/16] Use ra_bin_ref=1 for link_in_dec=True case

---
 Corrfunc/mocks/DDtheta_mocks.py | 278 +++++++++++++++++++++-----------
 1 file changed, 186 insertions(+), 92 deletions(-)

diff --git a/Corrfunc/mocks/DDtheta_mocks.py b/Corrfunc/mocks/DDtheta_mocks.py
index 1f84987f..9bcfb47b 100644
--- a/Corrfunc/mocks/DDtheta_mocks.py
+++ b/Corrfunc/mocks/DDtheta_mocks.py
@@ -539,108 +539,202 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
 
     if link_in_dec is False and link_in_ra is False:
         msg2 = "Warning: Brute-force calculation without any gridding " \
-               "is forced,as link_in_dec and link_in_ra are both set " \
-               "to False. Please be sure to turn on at least link_in_dec," \
+               "is forced, as link_in_dec and link_in_ra are both set " \
+               "to False. Please be sure to turn on at least link_in_dec, " \
                "or both link_in_dec and link_in_ra on."
         raise ValueError(msg2)
 
+    if link_in_dec is True:
+        if link_in_ra is True:
+
+            weights1, weights2 = \
+                process_weights(weights1, weights2, RA1, RA2, weight_type, autocorr)
+
+            # Ensure all input arrays are native endian
+            RA1, DEC1, weights1, RA2, DEC2, weights2 = [
+                convert_to_native_endian(arr, warn=True) for arr in
+                [RA1, DEC1, weights1, RA2, DEC2, weights2]]
+
+            fix_ra_dec(RA1, DEC1)
+            if autocorr == 0:
+                fix_ra_dec(RA2, DEC2)
+
+            # Passing None parameters breaks the parsing code, so avoid this
+            kwargs = {}
+            for k in ['weights1', 'weights2', 'weight_type', 'RA2', 'DEC2']:
+                v = locals()[k]
+                if v is not None:
+                    kwargs[k] = v
+
+            integer_isa = translate_isa_string_to_enum(isa)
+            rbinfile, delete_after_use = return_file_with_rbins(binfile)
+            bin_refs = np.arange(1, maxbinref + 1)
+            bin_ref_perms = itertools.product(bin_refs, bin_refs)
+            dtype = np.dtype([(bytes_to_native_str(b'nRA'), np.int),
+                              (bytes_to_native_str(b'nDEC'), np.int),
+                              (bytes_to_native_str(b'avg_time'), np.float),
+                              (bytes_to_native_str(b'sigma_time'), np.float)])
+            all_runtimes = np.zeros(maxbinref ** 3, dtype=dtype)
+            all_runtimes[:] = np.inf
+
+            for ii, (nRA, nDEC) in enumerate(bin_ref_perms):
+                total_runtime = 0.0
+                total_sqr_runtime = 0.0
+
+                for _ in range(nrepeats):
+                    t0 = time.time()
+                    extn_results = DDtheta_mocks_extn(
+                        autocorr, nthreads, rbinfile,
+                        RA1, DEC1, RA2, DEC2,
+                        link_in_dec=link_in_dec,
+                        link_in_ra=link_in_ra,
+                        verbose=verbose,
+                        output_thetaavg=output_thetaavg,
+                        ra_refine_factor=nRA,
+                        dec_refine_factor=nDEC,
+                        max_cells_per_dim=max_cells_per_dim,
+                        isa=integer_isa)
+                    t1 = time.time()
+
+                    if extn_results is None:
+                        msg4 = "RuntimeError occurred with perms = ({0}, {1})". \
+                            format(nRA, nDEC)
+                        print(msg4)
+                        print("Continuing...")
+                        continue
+
+                    dt = (t1 - t0)
+                    total_runtime += dt
+                    total_sqr_runtime += dt * dt
+
+                avg_runtime = total_runtime / nrepeats
+
+                # variance = E(X^2) - E^2(X)
+                # disp = sqrt(variance)
+                runtime_disp = np.sqrt(total_sqr_runtime / nrepeats -
+                                       avg_runtime * avg_runtime)
+
+                all_runtimes[ii]['nRA'] = nRA
+                all_runtimes[ii]['nDEC'] = nDEC
+                all_runtimes[ii]['avg_time'] = avg_runtime
+                all_runtimes[ii]['sigma_time'] = runtime_disp
+
+            if delete_after_use:
+                import os
+                os.remove(rbinfile)
+
+            all_runtimes.sort(order=('avg_time', 'sigma_time'))
+            results = (all_runtimes[0]['nRA'],
+                       all_runtimes[0]['nDEC'])
+
+            optional_returns = return_runtimes
+            if not optional_returns:
+                ret = results
+            else:
+                ret = (results,)
+                if return_runtimes:
+                    ret += (all_runtimes,)
+
+            return ret
+
     if link_in_dec is True:
         if link_in_ra is False:
+
             msg3 = "Info: Gridding in the declination only, as link_in_dec " \
-                   "is set to True while link_in_ra is set to False." \
+                   "is set to True while link_in_ra is set to False. " \
                    "Thus looping is only needed over the range of " \
                    "(min, max) bin, with refinements in the declination."
             print(msg3)
 
-    weights1, weights2 = \
-        process_weights(weights1, weights2, RA1, RA2, weight_type, autocorr)
-
-    # Ensure all input arrays are native endian
-    RA1, DEC1, weights1, RA2, DEC2, weights2 = [
-        convert_to_native_endian(arr, warn=True) for arr in
-        [RA1, DEC1, weights1, RA2, DEC2, weights2]]
-
-    fix_ra_dec(RA1, DEC1)
-    if autocorr == 0:
-        fix_ra_dec(RA2, DEC2)
-
-    # Passing None parameters breaks the parsing code, so avoid this
-    kwargs = {}
-    for k in ['weights1', 'weights2', 'weight_type', 'RA2', 'DEC2']:
-        v = locals()[k]
-        if v is not None:
-            kwargs[k] = v
-
-    integer_isa = translate_isa_string_to_enum(isa)
-    rbinfile, delete_after_use = return_file_with_rbins(binfile)
-    bin_refs = np.arange(1, maxbinref + 1)
-    bin_ref_perms = itertools.product(bin_refs, bin_refs)
-    dtype = np.dtype([(bytes_to_native_str(b'nRA'), np.int),
-                      (bytes_to_native_str(b'nDEC'), np.int),
-                      (bytes_to_native_str(b'avg_time'), np.float),
-                      (bytes_to_native_str(b'sigma_time'), np.float)])
-    all_runtimes = np.zeros(maxbinref ** 3, dtype=dtype)
-    all_runtimes[:] = np.inf
-
-    for ii, (nRA, nDEC) in enumerate(bin_ref_perms):
-        total_runtime = 0.0
-        total_sqr_runtime = 0.0
-
-        for _ in range(nrepeats):
-            t0 = time.time()
-            extn_results = DDtheta_mocks_extn(
-                autocorr, nthreads, rbinfile,
-                RA1, DEC1, RA2, DEC2,
-                link_in_dec=link_in_dec,
-                link_in_ra=link_in_ra,
-                verbose=verbose,
-                output_thetaavg=output_thetaavg,
-                ra_refine_factor=nRA,
-                dec_refine_factor=nDEC,
-                max_cells_per_dim=max_cells_per_dim,
-                isa=integer_isa)
-            t1 = time.time()
-
-            if extn_results is None:
-                msg4 = "RuntimeError occurred with perms = ({0}, {1})". \
-                    format(nRA, nDEC)
-                print(msg4)
-                print("Continuing...")
-                continue
-
-            dt = (t1 - t0)
-            total_runtime += dt
-            total_sqr_runtime += dt * dt
-
-        avg_runtime = total_runtime / nrepeats
-
-        # variance = E(X^2) - E^2(X)
-        # disp = sqrt(variance)
-        runtime_disp = np.sqrt(total_sqr_runtime / nrepeats -
-                               avg_runtime * avg_runtime)
-
-        all_runtimes[ii]['nRA'] = nRA
-        all_runtimes[ii]['nDEC'] = nDEC
-        all_runtimes[ii]['avg_time'] = avg_runtime
-        all_runtimes[ii]['sigma_time'] = runtime_disp
-
-    if delete_after_use:
-        import os
-        os.remove(rbinfile)
-
-    all_runtimes.sort(order=('avg_time', 'sigma_time'))
-    results = (all_runtimes[0]['nRA'],
-               all_runtimes[0]['nDEC'])
-
-    optional_returns = return_runtimes
-    if not optional_returns:
-        ret = results
-    else:
-        ret = (results,)
-        if return_runtimes:
-            ret += (all_runtimes,)
-
-    return ret
+            weights1, weights2 = \
+                process_weights(weights1, weights2, RA1, RA2, weight_type, autocorr)
+
+            # Ensure all input arrays are native endian
+            RA1, DEC1, weights1, RA2, DEC2, weights2 = [
+                convert_to_native_endian(arr, warn=True) for arr in
+                [RA1, DEC1, weights1, RA2, DEC2, weights2]]
+
+            fix_ra_dec(RA1, DEC1)
+            if autocorr == 0:
+                fix_ra_dec(RA2, DEC2)
+
+            # Passing None parameters breaks the parsing code, so avoid this
+            kwargs = {}
+            for k in ['weights1', 'weights2', 'weight_type', 'RA2', 'DEC2']:
+                v = locals()[k]
+                if v is not None:
+                    kwargs[k] = v
+
+            integer_isa = translate_isa_string_to_enum(isa)
+            rbinfile, delete_after_use = return_file_with_rbins(binfile)
+            bin_refs = np.arange(1, maxbinref + 1)
+            bin_ref_perms = itertools.product(bin_refs, bin_refs)
+            dtype = np.dtype([(bytes_to_native_str(b'nRA'), np.int),
+                              (bytes_to_native_str(b'nDEC'), np.int),
+                              (bytes_to_native_str(b'avg_time'), np.float),
+                              (bytes_to_native_str(b'sigma_time'), np.float)])
+            all_runtimes = np.zeros(maxbinref ** 3, dtype=dtype)
+            all_runtimes[:] = np.inf
+
+            for ii, (nRA, nDEC) in enumerate(bin_ref_perms):
+                total_runtime = 0.0
+                total_sqr_runtime = 0.0
+
+                for _ in range(nrepeats):
+                    t0 = time.time()
+                    extn_results = DDtheta_mocks_extn(
+                        autocorr, nthreads, rbinfile,
+                        RA1, DEC1, RA2, DEC2,
+                        link_in_dec=link_in_dec,
+                        link_in_ra=link_in_ra,
+                        verbose=verbose,
+                        output_thetaavg=output_thetaavg,
+                        ra_refine_factor=1,
+                        dec_refine_factor=nDEC,
+                        max_cells_per_dim=max_cells_per_dim,
+                        isa=integer_isa)
+                    t1 = time.time()
+
+                    if extn_results is None:
+                        msg4 = "RuntimeError occurred with perms = ({0}, {1})". \
+                            format(nRA, nDEC)
+                        print(msg4)
+                        print("Continuing...")
+                        continue
+
+                    dt = (t1 - t0)
+                    total_runtime += dt
+                    total_sqr_runtime += dt * dt
+
+                avg_runtime = total_runtime / nrepeats
+
+                # variance = E(X^2) - E^2(X)
+                # disp = sqrt(variance)
+                runtime_disp = np.sqrt(total_sqr_runtime / nrepeats -
+                                       avg_runtime * avg_runtime)
+
+                all_runtimes[ii]['nRA'] = nRA
+                all_runtimes[ii]['nDEC'] = nDEC
+                all_runtimes[ii]['avg_time'] = avg_runtime
+                all_runtimes[ii]['sigma_time'] = runtime_disp
+
+            if delete_after_use:
+                import os
+                os.remove(rbinfile)
+
+            all_runtimes.sort(order=('avg_time', 'sigma_time'))
+            results = (all_runtimes[0]['nRA'],
+                       all_runtimes[0]['nDEC'])
+
+            optional_returns = return_runtimes
+            if not optional_returns:
+                ret = results
+            else:
+                ret = (results,)
+                if return_runtimes:
+                    ret += (all_runtimes,)
+
+            return ret
 
 
 if __name__ == '__main__':

From a658c5569d0b0c304fb481440664406f59e83473 Mon Sep 17 00:00:00 2001
From: Kris Stern <krisastern@gobuddy.asia>
Date: Sun, 29 Mar 2020 18:33:17 +0800
Subject: [PATCH 05/16] Remove unwanted trailing white spaces

---
 Corrfunc/mocks/DDtheta_mocks.py | 53 ++++++++++++++++-----------------
 1 file changed, 26 insertions(+), 27 deletions(-)

diff --git a/Corrfunc/mocks/DDtheta_mocks.py b/Corrfunc/mocks/DDtheta_mocks.py
index 9bcfb47b..4374dab6 100644
--- a/Corrfunc/mocks/DDtheta_mocks.py
+++ b/Corrfunc/mocks/DDtheta_mocks.py
@@ -266,32 +266,31 @@ def DDtheta_mocks(autocorr, nthreads, binfile,
       7.079458  10.000000   8.622400   37842502   1.000000
 
     """
-
     try:
-        from Corrfunc._countpairs_mocks import countpairs_theta_mocks as \
+        from Corrfunc._countpairs_mocks import countpairs_theta_mocks as\
             DDtheta_mocks_extn
     except ImportError:
-        msg = "Could not import the C extension for the angular " \
+        msg = "Could not import the C extension for the angular "\
               "correlation function for mocks."
         raise ImportError(msg)
 
     import numpy as np
-    from Corrfunc.utils import translate_isa_string_to_enum, fix_ra_dec, \
-        return_file_with_rbins, convert_to_native_endian, \
+    from Corrfunc.utils import translate_isa_string_to_enum, fix_ra_dec,\
+        return_file_with_rbins, convert_to_native_endian,\
         sys_pipes, process_weights
     from future.utils import bytes_to_native_str
 
     if autocorr == 0:
         if RA2 is None or DEC2 is None:
-            msg = "Must pass valid arrays for RA2/DEC2 for " \
+            msg = "Must pass valid arrays for RA2/DEC2 for "\
                   "computing cross-correlation"
             raise ValueError(msg)
     else:
         RA2 = np.empty(1)
         DEC2 = np.empty(1)
 
-    weights1, weights2 = \
-        process_weights(weights1, weights2, RA1, RA2, weight_type, autocorr)
+    weights1, weights2 = process_weights(weights1, weights2,
+                                         RA1, RA2, weight_type, autocorr)
 
     # Ensure all input arrays are native endian
     RA1, DEC1, weights1, RA2, DEC2, weights2 = [
@@ -484,7 +483,7 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
     >>> from math import pi
     >>> from os.path import dirname, abspath, join as pjoin
     >>> import Corrfunc
-    >>> from Corrfunc.mocks.DDtheta_mocks \
+    >>> from Corrfunc.mocks.DDtheta_mocks\
         import find_fastest_DDtheta_mocks_bin_refs
     >>> binfile = pjoin(dirname(abspath(Corrfunc.__file__)),
     ...                 "../mocks/tests/", "angular_bins")
@@ -496,8 +495,8 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
     >>> cos_theta = np.random.uniform(-1.0, 1.0, N)
     >>> DEC1 = 90.0 - np.arccos(cos_theta)*180.0/pi
     >>> autocorr = 1
-    >>> best, _ = find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, \
-    ...                                               binfile, RA1, DEC1, \
+    >>> best, _ = find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads,\
+    ...                                               binfile, RA1, DEC1,\
     ...                                               return_runtimes=True)
     >>> print(best) # doctest:+SKIP
     (2, 1)
@@ -511,15 +510,15 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
     weight_type = None
 
     try:
-        from Corrfunc._countpairs_mocks import countpairs_theta_mocks as \
+        from Corrfunc._countpairs_mocks import countpairs_theta_mocks as\
             DDtheta_mocks_extn
     except ImportError:
-        msg0 = "Could not import the C extension for the angular " \
+        msg0 = "Could not import the C extension for the angular "\
               "correlation function for mocks."
         raise ImportError(msg0)
 
     import numpy as np
-    from Corrfunc.utils import translate_isa_string_to_enum, fix_ra_dec, \
+    from Corrfunc.utils import translate_isa_string_to_enum, fix_ra_dec,\
         return_file_with_rbins, convert_to_native_endian, process_weights
     from future.utils import bytes_to_native_str
     import itertools
@@ -527,7 +526,7 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
 
     if autocorr == 0:
         if RA2 is None or DEC2 is None:
-            msg1 = "Must pass valid arrays for RA2/DEC2 for " \
+            msg1 = "Must pass valid arrays for RA2/DEC2 for "\
                   "computing cross-correlation."
             raise ValueError(msg1)
     else:
@@ -538,17 +537,17 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
         link_in_dec = True
 
     if link_in_dec is False and link_in_ra is False:
-        msg2 = "Warning: Brute-force calculation without any gridding " \
-               "is forced, as link_in_dec and link_in_ra are both set " \
-               "to False. Please be sure to turn on at least link_in_dec, " \
+        msg2 = "Warning: Brute-force calculation without any gridding "\
+               "is forced, as link_in_dec and link_in_ra are both set "\
+               "to False. Please be sure to turn on at least link_in_dec, "\
                "or both link_in_dec and link_in_ra on."
         raise ValueError(msg2)
 
     if link_in_dec is True:
         if link_in_ra is True:
 
-            weights1, weights2 = \
-                process_weights(weights1, weights2, RA1, RA2, weight_type, autocorr)
+            weights1, weights2 = process_weights(weights1, weights2, RA1, RA2,
+                                                 weight_type, autocorr)
 
             # Ensure all input arrays are native endian
             RA1, DEC1, weights1, RA2, DEC2, weights2 = [
@@ -597,7 +596,7 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
                     t1 = time.time()
 
                     if extn_results is None:
-                        msg4 = "RuntimeError occurred with perms = ({0}, {1})". \
+                        msg4 = "RuntimeError occurred with perms = ({0}, {1})".\
                             format(nRA, nDEC)
                         print(msg4)
                         print("Continuing...")
@@ -640,14 +639,14 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
     if link_in_dec is True:
         if link_in_ra is False:
 
-            msg3 = "Info: Gridding in the declination only, as link_in_dec " \
-                   "is set to True while link_in_ra is set to False. " \
-                   "Thus looping is only needed over the range of " \
+            msg3 = "Info: Gridding in the declination only, as link_in_dec "\
+                   "is set to True while link_in_ra is set to False. "\
+                   "Thus looping is only needed over the range of "\
                    "(min, max) bin, with refinements in the declination."
             print(msg3)
 
-            weights1, weights2 = \
-                process_weights(weights1, weights2, RA1, RA2, weight_type, autocorr)
+            weights1, weights2 = process_weights(weights1, weights2, RA1, RA2,
+                                                 weight_type, autocorr)
 
             # Ensure all input arrays are native endian
             RA1, DEC1, weights1, RA2, DEC2, weights2 = [
@@ -696,7 +695,7 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
                     t1 = time.time()
 
                     if extn_results is None:
-                        msg4 = "RuntimeError occurred with perms = ({0}, {1})". \
+                        msg4 = "RuntimeError occurred with perms = ({0}, {1})".\
                             format(nRA, nDEC)
                         print(msg4)
                         print("Continuing...")

From d7dfdb82b66fab373d504746764982f61ca5d246 Mon Sep 17 00:00:00 2001
From: Kris Stern <krisastern@gobuddy.asia>
Date: Sun, 29 Mar 2020 18:46:30 +0800
Subject: [PATCH 06/16] Make first batch of changes according to reviewer
 suggestions

---
 Corrfunc/mocks/DDtheta_mocks.py | 222 ++++++++++++++------------------
 1 file changed, 95 insertions(+), 127 deletions(-)

diff --git a/Corrfunc/mocks/DDtheta_mocks.py b/Corrfunc/mocks/DDtheta_mocks.py
index 4374dab6..f94a8dd3 100644
--- a/Corrfunc/mocks/DDtheta_mocks.py
+++ b/Corrfunc/mocks/DDtheta_mocks.py
@@ -158,7 +158,7 @@ def DDtheta_mocks(autocorr, nthreads, binfile,
 
     max_cells_per_dim : integer, default is 100, typical values in [50-300]
         Controls the maximum number of cells per dimension. Total number of
-        cells can be up to (max_cells_per_dim)^3. Only increase if ``thetamax``
+        cells can be up to (thetamax)^3. Only increase if ``thetamax``
         is too small relative to the boxsize (and increasing helps the
         runtime).
 
@@ -441,16 +441,18 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
 
     max_cells_per_dim: integer, default is 100, typical values in [50-300]
         Controls the maximum number of cells per dimension. Total number of
-        cells can be up to (max_cells_per_dim)^3. Only increase if ``rpmax`` is
+        cells can be up to (thetamax)^3. Only increase if ``rpmax`` is
         too small relative to the boxsize (and increasing helps the runtime).
 
     maxbinref: integer (default 3)
-        The maximum ``bin refine factor`` to use along each dimension. From
-        experience, values larger than 3 do not improve ``wp`` runtime.
+        The maximum ``bin refine factor`` to use along each dimension.
 
         Runtime of module scales as ``maxbinref^3``, so change the value of
         ``maxbinref`` with caution.
 
+        Note that ``max_cells_per_dim`` might need to be increased
+        to accommodate really large ``maxbinref``.
+
     nrepeats: integer (default 3)
         Number of times to repeat the timing for an individual run. Accounts
         for the dispersion in runtimes on computers with multiple user
@@ -524,6 +526,36 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
     import itertools
     import time
 
+    weights1, weights2 = process_weights(weights1, weights2, RA1, RA2,
+                                         weight_type, autocorr)
+
+    # Ensure all input arrays are native endian
+    RA1, DEC1, weights1, RA2, DEC2, weights2 = [
+        convert_to_native_endian(arr, warn=True) for arr in
+        [RA1, DEC1, weights1, RA2, DEC2, weights2]]
+
+    fix_ra_dec(RA1, DEC1)
+    if autocorr == 0:
+        fix_ra_dec(RA2, DEC2)
+
+    # Passing None parameters breaks the parsing code, so avoid this
+    kwargs = {}
+    for k in ['weights1', 'weights2', 'weight_type', 'RA2', 'DEC2']:
+        v = locals()[k]
+        if v is not None:
+            kwargs[k] = v
+
+    integer_isa = translate_isa_string_to_enum(isa)
+    rbinfile, delete_after_use = return_file_with_rbins(binfile)
+    bin_refs = np.arange(1, maxbinref + 1)
+    bin_ref_perms = itertools.product(bin_refs, bin_refs)
+    dtype = np.dtype([(bytes_to_native_str(b'nRA'), np.int),
+                      (bytes_to_native_str(b'nDEC'), np.int),
+                      (bytes_to_native_str(b'avg_time'), np.float),
+                      (bytes_to_native_str(b'sigma_time'), np.float)])
+    all_runtimes = np.zeros(maxbinref ** 3, dtype=dtype)
+    all_runtimes[:] = np.inf
+
     if autocorr == 0:
         if RA2 is None or DEC2 is None:
             msg1 = "Must pass valid arrays for RA2/DEC2 for "\
@@ -543,138 +575,74 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
                "or both link_in_dec and link_in_ra on."
         raise ValueError(msg2)
 
-    if link_in_dec is True:
-        if link_in_ra is True:
-
-            weights1, weights2 = process_weights(weights1, weights2, RA1, RA2,
-                                                 weight_type, autocorr)
-
-            # Ensure all input arrays are native endian
-            RA1, DEC1, weights1, RA2, DEC2, weights2 = [
-                convert_to_native_endian(arr, warn=True) for arr in
-                [RA1, DEC1, weights1, RA2, DEC2, weights2]]
-
-            fix_ra_dec(RA1, DEC1)
-            if autocorr == 0:
-                fix_ra_dec(RA2, DEC2)
-
-            # Passing None parameters breaks the parsing code, so avoid this
-            kwargs = {}
-            for k in ['weights1', 'weights2', 'weight_type', 'RA2', 'DEC2']:
-                v = locals()[k]
-                if v is not None:
-                    kwargs[k] = v
-
-            integer_isa = translate_isa_string_to_enum(isa)
-            rbinfile, delete_after_use = return_file_with_rbins(binfile)
-            bin_refs = np.arange(1, maxbinref + 1)
-            bin_ref_perms = itertools.product(bin_refs, bin_refs)
-            dtype = np.dtype([(bytes_to_native_str(b'nRA'), np.int),
-                              (bytes_to_native_str(b'nDEC'), np.int),
-                              (bytes_to_native_str(b'avg_time'), np.float),
-                              (bytes_to_native_str(b'sigma_time'), np.float)])
-            all_runtimes = np.zeros(maxbinref ** 3, dtype=dtype)
-            all_runtimes[:] = np.inf
-
-            for ii, (nRA, nDEC) in enumerate(bin_ref_perms):
-                total_runtime = 0.0
-                total_sqr_runtime = 0.0
-
-                for _ in range(nrepeats):
-                    t0 = time.time()
-                    extn_results = DDtheta_mocks_extn(
-                        autocorr, nthreads, rbinfile,
-                        RA1, DEC1, RA2, DEC2,
-                        link_in_dec=link_in_dec,
-                        link_in_ra=link_in_ra,
-                        verbose=verbose,
-                        output_thetaavg=output_thetaavg,
-                        ra_refine_factor=nRA,
-                        dec_refine_factor=nDEC,
-                        max_cells_per_dim=max_cells_per_dim,
-                        isa=integer_isa)
-                    t1 = time.time()
-
-                    if extn_results is None:
-                        msg4 = "RuntimeError occurred with perms = ({0}, {1})".\
-                            format(nRA, nDEC)
-                        print(msg4)
-                        print("Continuing...")
-                        continue
-
-                    dt = (t1 - t0)
-                    total_runtime += dt
-                    total_sqr_runtime += dt * dt
-
-                avg_runtime = total_runtime / nrepeats
-
-                # variance = E(X^2) - E^2(X)
-                # disp = sqrt(variance)
-                runtime_disp = np.sqrt(total_sqr_runtime / nrepeats -
-                                       avg_runtime * avg_runtime)
-
-                all_runtimes[ii]['nRA'] = nRA
-                all_runtimes[ii]['nDEC'] = nDEC
-                all_runtimes[ii]['avg_time'] = avg_runtime
-                all_runtimes[ii]['sigma_time'] = runtime_disp
-
-            if delete_after_use:
-                import os
-                os.remove(rbinfile)
-
-            all_runtimes.sort(order=('avg_time', 'sigma_time'))
-            results = (all_runtimes[0]['nRA'],
-                       all_runtimes[0]['nDEC'])
-
-            optional_returns = return_runtimes
-            if not optional_returns:
-                ret = results
-            else:
-                ret = (results,)
-                if return_runtimes:
-                    ret += (all_runtimes,)
-
-            return ret
+    if link_in_ra is True:
+        for ii, (nRA, nDEC) in enumerate(bin_ref_perms):
+            total_runtime = 0.0
+            total_sqr_runtime = 0.0
+            for _ in range(nrepeats):
+                t0 = time.time()
+                extn_results = DDtheta_mocks_extn(
+                    autocorr, nthreads, rbinfile,
+                    RA1, DEC1, RA2, DEC2,
+                    link_in_dec=link_in_dec,
+                    link_in_ra=link_in_ra,
+                    verbose=verbose,
+                    output_thetaavg=output_thetaavg,
+                    ra_refine_factor=nRA,
+                    dec_refine_factor=nDEC,
+                    max_cells_per_dim=max_cells_per_dim,
+                    isa=integer_isa)
+                t1 = time.time()
+
+                if extn_results is None:
+                    msg4 = "RuntimeError occurred with perms = ({0}, {1})".\
+                        format(nRA, nDEC)
+                    print(msg4)
+                    print("Continuing...")
+                    continue
+
+                dt = (t1 - t0)
+                total_runtime += dt
+                total_sqr_runtime += dt * dt
+
+            avg_runtime = total_runtime / nrepeats
+
+            # variance = E(X^2) - E^2(X)
+            # disp = sqrt(variance)
+            runtime_disp = np.sqrt(total_sqr_runtime / nrepeats -
+                                   avg_runtime * avg_runtime)
+
+            all_runtimes[ii]['nRA'] = nRA
+            all_runtimes[ii]['nDEC'] = nDEC
+            all_runtimes[ii]['avg_time'] = avg_runtime
+            all_runtimes[ii]['sigma_time'] = runtime_disp
+
+        if delete_after_use:
+            import os
+            os.remove(rbinfile)
+
+        all_runtimes.sort(order=('avg_time', 'sigma_time'))
+        results = (all_runtimes[0]['nRA'],
+                   all_runtimes[0]['nDEC'])
+
+        optional_returns = return_runtimes
+        if not optional_returns:
+            ret = results
+        else:
+            ret = (results,)
+            if return_runtimes:
+                ret += (all_runtimes,)
+
+        return ret
 
     if link_in_dec is True:
         if link_in_ra is False:
-
             msg3 = "Info: Gridding in the declination only, as link_in_dec "\
                    "is set to True while link_in_ra is set to False. "\
                    "Thus looping is only needed over the range of "\
                    "(min, max) bin, with refinements in the declination."
             print(msg3)
 
-            weights1, weights2 = process_weights(weights1, weights2, RA1, RA2,
-                                                 weight_type, autocorr)
-
-            # Ensure all input arrays are native endian
-            RA1, DEC1, weights1, RA2, DEC2, weights2 = [
-                convert_to_native_endian(arr, warn=True) for arr in
-                [RA1, DEC1, weights1, RA2, DEC2, weights2]]
-
-            fix_ra_dec(RA1, DEC1)
-            if autocorr == 0:
-                fix_ra_dec(RA2, DEC2)
-
-            # Passing None parameters breaks the parsing code, so avoid this
-            kwargs = {}
-            for k in ['weights1', 'weights2', 'weight_type', 'RA2', 'DEC2']:
-                v = locals()[k]
-                if v is not None:
-                    kwargs[k] = v
-
-            integer_isa = translate_isa_string_to_enum(isa)
-            rbinfile, delete_after_use = return_file_with_rbins(binfile)
-            bin_refs = np.arange(1, maxbinref + 1)
-            bin_ref_perms = itertools.product(bin_refs, bin_refs)
-            dtype = np.dtype([(bytes_to_native_str(b'nRA'), np.int),
-                              (bytes_to_native_str(b'nDEC'), np.int),
-                              (bytes_to_native_str(b'avg_time'), np.float),
-                              (bytes_to_native_str(b'sigma_time'), np.float)])
-            all_runtimes = np.zeros(maxbinref ** 3, dtype=dtype)
-            all_runtimes[:] = np.inf
-
             for ii, (nRA, nDEC) in enumerate(bin_ref_perms):
                 total_runtime = 0.0
                 total_sqr_runtime = 0.0

From 8d303ab290dca57be652a9bb6ebde1bbbf87f75f Mon Sep 17 00:00:00 2001
From: Kris Stern <krisastern@gobuddy.asia>
Date: Sun, 29 Mar 2020 18:59:08 +0800
Subject: [PATCH 07/16] Make second batch of formatting changes

---
 Corrfunc/mocks/DDtheta_mocks.py | 54 ++++++++++++++++-----------------
 1 file changed, 26 insertions(+), 28 deletions(-)

diff --git a/Corrfunc/mocks/DDtheta_mocks.py b/Corrfunc/mocks/DDtheta_mocks.py
index f94a8dd3..60997572 100644
--- a/Corrfunc/mocks/DDtheta_mocks.py
+++ b/Corrfunc/mocks/DDtheta_mocks.py
@@ -294,8 +294,8 @@ def DDtheta_mocks(autocorr, nthreads, binfile,
 
     # Ensure all input arrays are native endian
     RA1, DEC1, weights1, RA2, DEC2, weights2 = [
-        convert_to_native_endian(arr, warn=True) for arr in
-        [RA1, DEC1, weights1, RA2, DEC2, weights2]]
+            convert_to_native_endian(arr, warn=True) for arr in
+            [RA1, DEC1, weights1, RA2, DEC2, weights2]]
 
     fix_ra_dec(RA1, DEC1)
     if autocorr == 0:
@@ -314,21 +314,20 @@ def DDtheta_mocks(autocorr, nthreads, binfile,
     integer_isa = translate_isa_string_to_enum(isa)
     rbinfile, delete_after_use = return_file_with_rbins(binfile)
     with sys_pipes():
-        extn_results = DDtheta_mocks_extn(
-            autocorr, nthreads, rbinfile,
-            RA1, DEC1,
-            verbose=verbose,
-            link_in_dec=link_in_dec,
-            link_in_ra=link_in_ra,
-            output_thetaavg=output_thetaavg,
-            fast_acos=fast_acos,
-            ra_refine_factor=ra_refine_factor,
-            dec_refine_factor=dec_refine_factor,
-            max_cells_per_dim=max_cells_per_dim,
-            copy_particles=copy_particles,
-            enable_min_sep_opt=enable_min_sep_opt,
-            c_api_timer=c_api_timer,
-            isa=integer_isa, **kwargs)
+        extn_results = DDtheta_mocks_extn(autocorr, nthreads, rbinfile,
+                                          RA1, DEC1,
+                                          verbose=verbose,
+                                          link_in_dec=link_in_dec,
+                                          link_in_ra=link_in_ra,
+                                          output_thetaavg=output_thetaavg,
+                                          fast_acos=fast_acos,
+                                          ra_refine_factor=ra_refine_factor,
+                                          dec_refine_factor=dec_refine_factor,
+                                          max_cells_per_dim=max_cells_per_dim,
+                                          copy_particles=copy_particles,
+                                          enable_min_sep_opt=enable_min_sep_opt,
+                                          c_api_timer=c_api_timer,
+                                          isa=integer_isa, **kwargs)
     if extn_results is None:
         msg = "RuntimeError occurred"
         raise RuntimeError(msg)
@@ -581,17 +580,16 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
             total_sqr_runtime = 0.0
             for _ in range(nrepeats):
                 t0 = time.time()
-                extn_results = DDtheta_mocks_extn(
-                    autocorr, nthreads, rbinfile,
-                    RA1, DEC1, RA2, DEC2,
-                    link_in_dec=link_in_dec,
-                    link_in_ra=link_in_ra,
-                    verbose=verbose,
-                    output_thetaavg=output_thetaavg,
-                    ra_refine_factor=nRA,
-                    dec_refine_factor=nDEC,
-                    max_cells_per_dim=max_cells_per_dim,
-                    isa=integer_isa)
+                extn_results = DDtheta_mocks_extn(autocorr, nthreads, rbinfile,
+                                                  RA1, DEC1, RA2, DEC2,
+                                                  link_in_dec=link_in_dec,
+                                                  link_in_ra=link_in_ra,
+                                                  verbose=verbose,
+                                                  output_thetaavg=output_thetaavg,
+                                                  ra_refine_factor=nRA,
+                                                  dec_refine_factor=nDEC,
+                                                  max_cells_per_dim=max_cells_per_dim,
+                                                  isa=integer_isa)
                 t1 = time.time()
 
                 if extn_results is None:

From 227d7672cac1ed2661c5a1006cdab900f073bd2b Mon Sep 17 00:00:00 2001
From: Kris Stern <krisastern@gobuddy.asia>
Date: Sun, 29 Mar 2020 19:27:47 +0800
Subject: [PATCH 08/16] Make third batch of changes according to reviewer's
 suggestions

---
 Corrfunc/mocks/DDtheta_mocks.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/Corrfunc/mocks/DDtheta_mocks.py b/Corrfunc/mocks/DDtheta_mocks.py
index 60997572..1ce02b69 100644
--- a/Corrfunc/mocks/DDtheta_mocks.py
+++ b/Corrfunc/mocks/DDtheta_mocks.py
@@ -484,7 +484,7 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
     >>> from math import pi
     >>> from os.path import dirname, abspath, join as pjoin
     >>> import Corrfunc
-    >>> from Corrfunc.mocks.DDtheta_mocks\
+    >>> from Corrfunc.mocks.DDtheta_mocks \
         import find_fastest_DDtheta_mocks_bin_refs
     >>> binfile = pjoin(dirname(abspath(Corrfunc.__file__)),
     ...                 "../mocks/tests/", "angular_bins")
@@ -574,7 +574,7 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
                "or both link_in_dec and link_in_ra on."
         raise ValueError(msg2)
 
-    if link_in_ra is True:
+    if link_in_ra:
         for ii, (nRA, nDEC) in enumerate(bin_ref_perms):
             total_runtime = 0.0
             total_sqr_runtime = 0.0
@@ -641,6 +641,8 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
                    "(min, max) bin, with refinements in the declination."
             print(msg3)
 
+            # bin_ref_perms = [(None, x) for x in bin_ref_perms]
+
             for ii, (nRA, nDEC) in enumerate(bin_ref_perms):
                 total_runtime = 0.0
                 total_sqr_runtime = 0.0

From f6fac6c9491ffb0dabe2c656a6fdca15ec1ca9e2 Mon Sep 17 00:00:00 2001
From: Kris Stern <krisastern@gobuddy.asia>
Date: Fri, 3 Apr 2020 00:16:53 +0800
Subject: [PATCH 09/16] Simplify code structure

---
 Corrfunc/mocks/DDtheta_mocks.py | 182 +++++++++++---------------------
 1 file changed, 63 insertions(+), 119 deletions(-)

diff --git a/Corrfunc/mocks/DDtheta_mocks.py b/Corrfunc/mocks/DDtheta_mocks.py
index 1ce02b69..7d087e73 100644
--- a/Corrfunc/mocks/DDtheta_mocks.py
+++ b/Corrfunc/mocks/DDtheta_mocks.py
@@ -547,7 +547,10 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
     integer_isa = translate_isa_string_to_enum(isa)
     rbinfile, delete_after_use = return_file_with_rbins(binfile)
     bin_refs = np.arange(1, maxbinref + 1)
-    bin_ref_perms = itertools.product(bin_refs, bin_refs)
+    if link_in_ra:
+        bin_ref_perms = itertools.product(bin_refs, bin_refs)
+    else:
+        bin_ref_perms = [(1, binref) for binref in bin_refs]
     dtype = np.dtype([(bytes_to_native_str(b'nRA'), np.int),
                       (bytes_to_native_str(b'nDEC'), np.int),
                       (bytes_to_native_str(b'avg_time'), np.float),
@@ -575,63 +578,7 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
         raise ValueError(msg2)
 
     if link_in_ra:
-        for ii, (nRA, nDEC) in enumerate(bin_ref_perms):
-            total_runtime = 0.0
-            total_sqr_runtime = 0.0
-            for _ in range(nrepeats):
-                t0 = time.time()
-                extn_results = DDtheta_mocks_extn(autocorr, nthreads, rbinfile,
-                                                  RA1, DEC1, RA2, DEC2,
-                                                  link_in_dec=link_in_dec,
-                                                  link_in_ra=link_in_ra,
-                                                  verbose=verbose,
-                                                  output_thetaavg=output_thetaavg,
-                                                  ra_refine_factor=nRA,
-                                                  dec_refine_factor=nDEC,
-                                                  max_cells_per_dim=max_cells_per_dim,
-                                                  isa=integer_isa)
-                t1 = time.time()
-
-                if extn_results is None:
-                    msg4 = "RuntimeError occurred with perms = ({0}, {1})".\
-                        format(nRA, nDEC)
-                    print(msg4)
-                    print("Continuing...")
-                    continue
-
-                dt = (t1 - t0)
-                total_runtime += dt
-                total_sqr_runtime += dt * dt
-
-            avg_runtime = total_runtime / nrepeats
-
-            # variance = E(X^2) - E^2(X)
-            # disp = sqrt(variance)
-            runtime_disp = np.sqrt(total_sqr_runtime / nrepeats -
-                                   avg_runtime * avg_runtime)
-
-            all_runtimes[ii]['nRA'] = nRA
-            all_runtimes[ii]['nDEC'] = nDEC
-            all_runtimes[ii]['avg_time'] = avg_runtime
-            all_runtimes[ii]['sigma_time'] = runtime_disp
-
-        if delete_after_use:
-            import os
-            os.remove(rbinfile)
-
-        all_runtimes.sort(order=('avg_time', 'sigma_time'))
-        results = (all_runtimes[0]['nRA'],
-                   all_runtimes[0]['nDEC'])
-
-        optional_returns = return_runtimes
-        if not optional_returns:
-            ret = results
-        else:
-            ret = (results,)
-            if return_runtimes:
-                ret += (all_runtimes,)
-
-        return ret
+        pass
 
     if link_in_dec is True:
         if link_in_ra is False:
@@ -641,67 +588,64 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
                    "(min, max) bin, with refinements in the declination."
             print(msg3)
 
-            # bin_ref_perms = [(None, x) for x in bin_ref_perms]
-
-            for ii, (nRA, nDEC) in enumerate(bin_ref_perms):
-                total_runtime = 0.0
-                total_sqr_runtime = 0.0
-
-                for _ in range(nrepeats):
-                    t0 = time.time()
-                    extn_results = DDtheta_mocks_extn(
-                        autocorr, nthreads, rbinfile,
-                        RA1, DEC1, RA2, DEC2,
-                        link_in_dec=link_in_dec,
-                        link_in_ra=link_in_ra,
-                        verbose=verbose,
-                        output_thetaavg=output_thetaavg,
-                        ra_refine_factor=1,
-                        dec_refine_factor=nDEC,
-                        max_cells_per_dim=max_cells_per_dim,
-                        isa=integer_isa)
-                    t1 = time.time()
-
-                    if extn_results is None:
-                        msg4 = "RuntimeError occurred with perms = ({0}, {1})".\
-                            format(nRA, nDEC)
-                        print(msg4)
-                        print("Continuing...")
-                        continue
-
-                    dt = (t1 - t0)
-                    total_runtime += dt
-                    total_sqr_runtime += dt * dt
-
-                avg_runtime = total_runtime / nrepeats
-
-                # variance = E(X^2) - E^2(X)
-                # disp = sqrt(variance)
-                runtime_disp = np.sqrt(total_sqr_runtime / nrepeats -
-                                       avg_runtime * avg_runtime)
-
-                all_runtimes[ii]['nRA'] = nRA
-                all_runtimes[ii]['nDEC'] = nDEC
-                all_runtimes[ii]['avg_time'] = avg_runtime
-                all_runtimes[ii]['sigma_time'] = runtime_disp
-
-            if delete_after_use:
-                import os
-                os.remove(rbinfile)
-
-            all_runtimes.sort(order=('avg_time', 'sigma_time'))
-            results = (all_runtimes[0]['nRA'],
-                       all_runtimes[0]['nDEC'])
-
-            optional_returns = return_runtimes
-            if not optional_returns:
-                ret = results
-            else:
-                ret = (results,)
-                if return_runtimes:
-                    ret += (all_runtimes,)
-
-            return ret
+    for ii, (nRA, nDEC) in enumerate(bin_ref_perms):
+        total_runtime = 0.0
+        total_sqr_runtime = 0.0
+
+        for _ in range(nrepeats):
+            t0 = time.time()
+            extn_results = DDtheta_mocks_extn(autocorr, nthreads, rbinfile,
+                                              RA1, DEC1, RA2, DEC2,
+                                              link_in_dec=link_in_dec,
+                                              link_in_ra=link_in_ra,
+                                              verbose=verbose,
+                                              output_thetaavg=output_thetaavg,
+                                              ra_refine_factor=nRA,
+                                              dec_refine_factor=nDEC,
+                                              max_cells_per_dim=max_cells_per_dim,
+                                              isa=integer_isa)
+            t1 = time.time()
+
+            if extn_results is None:
+                msg4 = "RuntimeError occurred with perms = ({0}, {1})".\
+                    format(nRA, nDEC)
+                print(msg4)
+                print("Continuing...")
+                continue
+
+            dt = (t1 - t0)
+            total_runtime += dt
+            total_sqr_runtime += dt * dt
+
+        avg_runtime = total_runtime / nrepeats
+
+        # variance = E(X^2) - E^2(X)
+        # disp = sqrt(variance)
+        runtime_disp = np.sqrt(total_sqr_runtime / nrepeats -
+                               avg_runtime * avg_runtime)
+
+        all_runtimes[ii]['nRA'] = nRA
+        all_runtimes[ii]['nDEC'] = nDEC
+        all_runtimes[ii]['avg_time'] = avg_runtime
+        all_runtimes[ii]['sigma_time'] = runtime_disp
+
+    if delete_after_use:
+        import os
+        os.remove(rbinfile)
+
+    all_runtimes.sort(order=('avg_time', 'sigma_time'))
+    results = (all_runtimes[0]['nRA'],
+               all_runtimes[0]['nDEC'])
+
+    optional_returns = return_runtimes
+    if not optional_returns:
+        ret = results
+    else:
+        ret = (results,)
+        if return_runtimes:
+            ret += (all_runtimes,)
+
+    return ret
 
 
 if __name__ == '__main__':

From bc0fcff8a01af6e535d4c3bd168b4939123f4108 Mon Sep 17 00:00:00 2001
From: Kris Stern <krisastern@gobuddy.asia>
Date: Fri, 3 Apr 2020 20:33:03 +0800
Subject: [PATCH 10/16] Update Corrfunc/mocks/DDtheta_mocks.py for corrections
 and improvement

---
 Corrfunc/mocks/DDtheta_mocks.py | 47 +++++++++++++++------------------
 1 file changed, 21 insertions(+), 26 deletions(-)

diff --git a/Corrfunc/mocks/DDtheta_mocks.py b/Corrfunc/mocks/DDtheta_mocks.py
index 7d087e73..b362ef0d 100644
--- a/Corrfunc/mocks/DDtheta_mocks.py
+++ b/Corrfunc/mocks/DDtheta_mocks.py
@@ -158,7 +158,7 @@ def DDtheta_mocks(autocorr, nthreads, binfile,
 
     max_cells_per_dim : integer, default is 100, typical values in [50-300]
         Controls the maximum number of cells per dimension. Total number of
-        cells can be up to (thetamax)^3. Only increase if ``thetamax``
+        cells can be up to (max_cells_per_dim)^2. Only increase if ``thetamax``
         is too small relative to the boxsize (and increasing helps the
         runtime).
 
@@ -440,7 +440,7 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
 
     max_cells_per_dim: integer, default is 100, typical values in [50-300]
         Controls the maximum number of cells per dimension. Total number of
-        cells can be up to (thetamax)^3. Only increase if ``rpmax`` is
+        cells can be up to (max_cells_per_dim)^2. Only increase if ``rpmax`` is
         too small relative to the boxsize (and increasing helps the runtime).
 
     maxbinref: integer (default 3)
@@ -514,9 +514,9 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
         from Corrfunc._countpairs_mocks import countpairs_theta_mocks as\
             DDtheta_mocks_extn
     except ImportError:
-        msg0 = "Could not import the C extension for the angular "\
+        msg = "Could not import the C extension for the angular "\
               "correlation function for mocks."
-        raise ImportError(msg0)
+        raise ImportError(msg)
 
     import numpy as np
     from Corrfunc.utils import translate_isa_string_to_enum, fix_ra_dec,\
@@ -551,6 +551,7 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
         bin_ref_perms = itertools.product(bin_refs, bin_refs)
     else:
         bin_ref_perms = [(1, binref) for binref in bin_refs]
+
     dtype = np.dtype([(bytes_to_native_str(b'nRA'), np.int),
                       (bytes_to_native_str(b'nDEC'), np.int),
                       (bytes_to_native_str(b'avg_time'), np.float),
@@ -560,9 +561,9 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
 
     if autocorr == 0:
         if RA2 is None or DEC2 is None:
-            msg1 = "Must pass valid arrays for RA2/DEC2 for "\
+            msg = "Must pass valid arrays for RA2/DEC2 for "\
                   "computing cross-correlation."
-            raise ValueError(msg1)
+            raise ValueError(msg)
     else:
         RA2 = np.empty(1)
         DEC2 = np.empty(1)
@@ -571,22 +572,18 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
         link_in_dec = True
 
     if link_in_dec is False and link_in_ra is False:
-        msg2 = "Warning: Brute-force calculation without any gridding "\
-               "is forced, as link_in_dec and link_in_ra are both set "\
-               "to False. Please be sure to turn on at least link_in_dec, "\
-               "or both link_in_dec and link_in_ra on."
-        raise ValueError(msg2)
-
-    if link_in_ra:
-        pass
-
-    if link_in_dec is True:
-        if link_in_ra is False:
-            msg3 = "Info: Gridding in the declination only, as link_in_dec "\
-                   "is set to True while link_in_ra is set to False. "\
-                   "Thus looping is only needed over the range of "\
-                   "(min, max) bin, with refinements in the declination."
-            print(msg3)
+        msg = "Error: Brute-force calculation without any gridding " \
+              "is forced, as link_in_dec and link_in_ra are both set " \
+              "to False. Please set at least one of link_in_dec, link_in_ra=True " \
+              "to enable gridding along DEC or along both RA and DEC."
+        raise ValueError(msg)
+
+    if not link_in_ra:
+        if verbose:
+            msg = "INFO: Since ``link_in_ra`` is not set, only gridding in declination " \
+                  "Checking with refinements in declination ranging from [1, {}] and a " \
+                  "maximum of {} bins".format(maxbinref, max_cells_per_dim)
+            print(msg)
 
     for ii, (nRA, nDEC) in enumerate(bin_ref_perms):
         total_runtime = 0.0
@@ -607,11 +604,9 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
             t1 = time.time()
 
             if extn_results is None:
-                msg4 = "RuntimeError occurred with perms = ({0}, {1})".\
+                msg = "RuntimeError occurred with perms = ({0}, {1})".\
                     format(nRA, nDEC)
-                print(msg4)
-                print("Continuing...")
-                continue
+                raise ValueError(msg)
 
             dt = (t1 - t0)
             total_runtime += dt

From ebd8e3cbb1c44d8fcea06bbbc32e6e1fc591e02f Mon Sep 17 00:00:00 2001
From: Kris Stern <krisastern@gobuddy.asia>
Date: Sat, 4 Apr 2020 11:25:52 +0800
Subject: [PATCH 11/16] Add changelog entry for PR

---
 CHANGES.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index 72abcd5a..678073c3 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -17,7 +17,7 @@ This is a bug-fix release and contains general code quality improvements.
 
 Enhancements
 ------------
-
+- Add find_fastest_DDtheta_mocks_bin_refs() function to Corrfunc/mocks/DDtheta_mocks [#216]
 
 
 Bug fixes

From adff80d2fffaa18b2b7203d94b82c4ecb5e2c48b Mon Sep 17 00:00:00 2001
From: Kris Stern <krisastern@gobuddy.asia>
Date: Mon, 6 Apr 2020 14:50:51 +0800
Subject: [PATCH 12/16] Update L574's if statement to simplify logic

---
 Corrfunc/mocks/DDtheta_mocks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Corrfunc/mocks/DDtheta_mocks.py b/Corrfunc/mocks/DDtheta_mocks.py
index b362ef0d..06f91df5 100644
--- a/Corrfunc/mocks/DDtheta_mocks.py
+++ b/Corrfunc/mocks/DDtheta_mocks.py
@@ -571,7 +571,7 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
     if link_in_ra is True:
         link_in_dec = True
 
-    if link_in_dec is False and link_in_ra is False:
+    if not link_in_dec:
         msg = "Error: Brute-force calculation without any gridding " \
               "is forced, as link_in_dec and link_in_ra are both set " \
               "to False. Please set at least one of link_in_dec, link_in_ra=True " \

From a03a332a66d7fd1ae0c32420e9e3ed8035fa6439 Mon Sep 17 00:00:00 2001
From: Kris Stern <krisastern@gobuddy.asia>
Date: Wed, 8 Apr 2020 01:47:50 +0800
Subject: [PATCH 13/16] Modify all_runtimes formula in
 find_fastest_DDtheta_mocks_bin_refs

---
 Corrfunc/mocks/DDtheta_mocks.py               | 22 ++++++++++++-------
 .../countpairs_theta_mocks_impl.c.src         | 21 +++++++++++-------
 2 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/Corrfunc/mocks/DDtheta_mocks.py b/Corrfunc/mocks/DDtheta_mocks.py
index 06f91df5..7611e183 100644
--- a/Corrfunc/mocks/DDtheta_mocks.py
+++ b/Corrfunc/mocks/DDtheta_mocks.py
@@ -446,7 +446,7 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
     maxbinref: integer (default 3)
         The maximum ``bin refine factor`` to use along each dimension.
 
-        Runtime of module scales as ``maxbinref^3``, so change the value of
+        Runtime of module scales as ``maxbinref^2``, so change the value of
         ``maxbinref`` with caution.
 
         Note that ``max_cells_per_dim`` might need to be increased
@@ -549,14 +549,20 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
     bin_refs = np.arange(1, maxbinref + 1)
     if link_in_ra:
         bin_ref_perms = itertools.product(bin_refs, bin_refs)
+
+        dtype = np.dtype([(bytes_to_native_str(b'nRA'), np.int),
+                          (bytes_to_native_str(b'nDEC'), np.int),
+                          (bytes_to_native_str(b'avg_time'), np.float),
+                          (bytes_to_native_str(b'sigma_time'), np.float)])
+        all_runtimes = np.zeros(maxbinref ** 2, dtype=dtype)
     else:
         bin_ref_perms = [(1, binref) for binref in bin_refs]
 
-    dtype = np.dtype([(bytes_to_native_str(b'nRA'), np.int),
-                      (bytes_to_native_str(b'nDEC'), np.int),
-                      (bytes_to_native_str(b'avg_time'), np.float),
-                      (bytes_to_native_str(b'sigma_time'), np.float)])
-    all_runtimes = np.zeros(maxbinref ** 3, dtype=dtype)
+        dtype = np.dtype([(bytes_to_native_str(b'nRA'), np.int),
+                          (bytes_to_native_str(b'nDEC'), np.int),
+                          (bytes_to_native_str(b'avg_time'), np.float),
+                          (bytes_to_native_str(b'sigma_time'), np.float)])
+        all_runtimes = np.zeros(maxbinref ** 1, dtype=dtype)
     all_runtimes[:] = np.inf
 
     if autocorr == 0:
@@ -590,7 +596,7 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
         total_sqr_runtime = 0.0
 
         for _ in range(nrepeats):
-            t0 = time.time()
+            t0 = time.perf_counter()
             extn_results = DDtheta_mocks_extn(autocorr, nthreads, rbinfile,
                                               RA1, DEC1, RA2, DEC2,
                                               link_in_dec=link_in_dec,
@@ -601,7 +607,7 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
                                               dec_refine_factor=nDEC,
                                               max_cells_per_dim=max_cells_per_dim,
                                               isa=integer_isa)
-            t1 = time.time()
+            t1 = time.perf_counter()
 
             if extn_results is None:
                 msg = "RuntimeError occurred with perms = ({0}, {1})".\
diff --git a/mocks/DDtheta_mocks/countpairs_theta_mocks_impl.c.src b/mocks/DDtheta_mocks/countpairs_theta_mocks_impl.c.src
index 1fd759b2..345b659e 100644
--- a/mocks/DDtheta_mocks/countpairs_theta_mocks_impl.c.src
+++ b/mocks/DDtheta_mocks/countpairs_theta_mocks_impl.c.src
@@ -545,14 +545,19 @@ int countpairs_theta_mocks_DOUBLE(const int64_t ND1, DOUBLE *ra1, DOUBLE *dec1,
         options->bin_refine_factors[1]=numthreads;
     }
 #endif
-    /* Only check the ra and dec bin refine factors (not all 3 bin refs)*/
-    for(int i=0;i<2;i++) {
-        if(options->bin_refine_factors[i] < 1) {
-            fprintf(stderr,"Warning: bin refine factor along axis = %d *must* be >=1. Instead found bin refine factor =%d\n",
-                    i, options->bin_refine_factors[i]);
-            reset_bin_refine_factors(options);
-            break;/* all factors have been reset -> no point continuing with the loop */
-        }
+
+    /* Only check the ra and dec bin refine factors (not all 3 bin refs) */
+    /* As evidenced by the PR #216, the error-message and resetting is not quite right! */
+    if(options->link_in_ra && options->bin_refine_factors[0] < 1) {
+        fprintf(stderr,"Warning: Linking in RA is requested, so the RA-bin refine factor *must* be >=1. Instead found bin refine factor =%d...resetting\n",
+                options->bin_refine_factors[0]);
+        reset_bin_refine_factors(options);
+    }
+
+    if(options->link_in_dec && options->bin_refine_factors[1] < 1) {
+        fprintf(stderr,"Warning: Linking in DEC is requested, so the DEC-bin refine factor *must* be >=1. Instead found bin refine factor =%d...resetting\n",
+                options->bin_refine_factors[1]);
+        reset_bin_refine_factors(options);
     }
 
     if(options->max_cells_per_dim == 0) {

From 2f5212ad516f77f2df606bfd3862fc3db4d4d531 Mon Sep 17 00:00:00 2001
From: Kris Stern <krisastern@gobuddy.asia>
Date: Sat, 11 Apr 2020 23:52:59 +0800
Subject: [PATCH 14/16] Make changes to if/else statement to simplify code

---
 Corrfunc/mocks/DDtheta_mocks.py | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/Corrfunc/mocks/DDtheta_mocks.py b/Corrfunc/mocks/DDtheta_mocks.py
index 7611e183..c46ec327 100644
--- a/Corrfunc/mocks/DDtheta_mocks.py
+++ b/Corrfunc/mocks/DDtheta_mocks.py
@@ -549,20 +549,17 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
     bin_refs = np.arange(1, maxbinref + 1)
     if link_in_ra:
         bin_ref_perms = itertools.product(bin_refs, bin_refs)
+        nperms = maxbinref ** 2
 
-        dtype = np.dtype([(bytes_to_native_str(b'nRA'), np.int),
-                          (bytes_to_native_str(b'nDEC'), np.int),
-                          (bytes_to_native_str(b'avg_time'), np.float),
-                          (bytes_to_native_str(b'sigma_time'), np.float)])
-        all_runtimes = np.zeros(maxbinref ** 2, dtype=dtype)
     else:
         bin_ref_perms = [(1, binref) for binref in bin_refs]
+        nperms = maxbinref ** 1
 
-        dtype = np.dtype([(bytes_to_native_str(b'nRA'), np.int),
-                          (bytes_to_native_str(b'nDEC'), np.int),
-                          (bytes_to_native_str(b'avg_time'), np.float),
-                          (bytes_to_native_str(b'sigma_time'), np.float)])
-        all_runtimes = np.zeros(maxbinref ** 1, dtype=dtype)
+    dtype = np.dtype([(bytes_to_native_str(b'nRA'), np.int),
+                      (bytes_to_native_str(b'nDEC'), np.int),
+                      (bytes_to_native_str(b'avg_time'), np.float),
+                      (bytes_to_native_str(b'sigma_time'), np.float)])
+    all_runtimes = np.zeros(nperms, dtype=dtype)
     all_runtimes[:] = np.inf
 
     if autocorr == 0:

From 81a4e6e75406c1003fb4af141ceed041908959e4 Mon Sep 17 00:00:00 2001
From: Manodeep Sinha <manodeep@gmail.com>
Date: Mon, 13 Apr 2020 11:43:02 +1000
Subject: [PATCH 15/16] Tweaked to better adhere to conventions

---
 Corrfunc/mocks/DDtheta_mocks.py | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/Corrfunc/mocks/DDtheta_mocks.py b/Corrfunc/mocks/DDtheta_mocks.py
index c46ec327..52d6ea8f 100644
--- a/Corrfunc/mocks/DDtheta_mocks.py
+++ b/Corrfunc/mocks/DDtheta_mocks.py
@@ -301,7 +301,7 @@ def DDtheta_mocks(autocorr, nthreads, binfile,
     if autocorr == 0:
         fix_ra_dec(RA2, DEC2)
 
-    if link_in_ra is True:
+    if link_in_ra:
         link_in_dec = True
 
     # Passing None parameters breaks the parsing code, so avoid this
@@ -550,10 +550,9 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
     if link_in_ra:
         bin_ref_perms = itertools.product(bin_refs, bin_refs)
         nperms = maxbinref ** 2
-
     else:
         bin_ref_perms = [(1, binref) for binref in bin_refs]
-        nperms = maxbinref ** 1
+        nperms = maxbinref
 
     dtype = np.dtype([(bytes_to_native_str(b'nRA'), np.int),
                       (bytes_to_native_str(b'nDEC'), np.int),
@@ -571,7 +570,7 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
         RA2 = np.empty(1)
         DEC2 = np.empty(1)
 
-    if link_in_ra is True:
+    if link_in_ra:
         link_in_dec = True
 
     if not link_in_dec:
@@ -581,12 +580,11 @@ def find_fastest_DDtheta_mocks_bin_refs(autocorr, nthreads, binfile,
               "to enable gridding along DEC or along both RA and DEC."
         raise ValueError(msg)
 
-    if not link_in_ra:
-        if verbose:
-            msg = "INFO: Since ``link_in_ra`` is not set, only gridding in declination " \
-                  "Checking with refinements in declination ranging from [1, {}] and a " \
-                  "maximum of {} bins".format(maxbinref, max_cells_per_dim)
-            print(msg)
+    if verbose and not link_in_ra:
+        msg = "INFO: Since ``link_in_ra`` is not set, only gridding in declination " \
+              "Checking with refinements in declination ranging from [1, {}] and a " \
+              "maximum of {} bins".format(maxbinref, max_cells_per_dim)
+        print(msg)
 
     for ii, (nRA, nDEC) in enumerate(bin_ref_perms):
         total_runtime = 0.0

From 05e350670f56db538638c14168108eee5b876d39 Mon Sep 17 00:00:00 2001
From: Manodeep Sinha <manodeep@gmail.com>
Date: Mon, 13 Apr 2020 11:43:22 +1000
Subject: [PATCH 16/16] Tweaked the Changelog wording

---
 CHANGES.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index 678073c3..aafc3637 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -17,7 +17,7 @@ This is a bug-fix release and contains general code quality improvements.
 
 Enhancements
 ------------
-- Add find_fastest_DDtheta_mocks_bin_refs() function to Corrfunc/mocks/DDtheta_mocks [#216]
+- A new helper routine to find the combination of (RA, DEC) refinements that produces fastest runtime in ``DDtheta_mocks`` [#216]
 
 
 Bug fixes