Skip to content

Commit

Permalink
Merge pull request #350 from threeML/fix-install
Browse files Browse the repository at this point in the history
rm pagmo req for pip
  • Loading branch information
grburgess committed Apr 12, 2020
2 parents 3ecdbc0 + 9242fb1 commit 0168577
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 56 deletions.
3 changes: 2 additions & 1 deletion docs/requirements.txt
@@ -1 +1,2 @@
numpy>=1.6,<1.16
cython
numpy>=1.6,<1.16
2 changes: 1 addition & 1 deletion setup.cfg
Expand Up @@ -46,7 +46,7 @@ install_requires =
pandas
requests
speclite
pygmo>=2.4,<=2.11.
# pygmo>=2.4,<=2.11.
ipython
ipyparallel
numexpr
Expand Down
154 changes: 100 additions & 54 deletions threeML/utils/binner.py
Expand Up @@ -6,6 +6,7 @@
from threeML.utils.time_interval import TimeIntervalSet
from threeML.exceptions.custom_exceptions import custom_warnings


class NotEnoughData(RuntimeError):
pass

Expand All @@ -24,15 +25,20 @@ def __init__(self, vector_to_rebin_on, min_value_per_bin, mask=None):
total = np.sum(vector_to_rebin_on)

if total < min_value_per_bin:
raise NotEnoughData("Vector total is %s, cannot rebin at %s per bin" % (total, min_value_per_bin))
raise NotEnoughData(
"Vector total is %s, cannot rebin at %s per bin"
% (total, min_value_per_bin)
)

# Check if we have a mask, if not prepare a empty one
if mask is not None:

mask = np.array(mask, bool)

assert mask.shape[0] == len(vector_to_rebin_on), "The provided mask must have the same number of " \
"elements as the vector to rebin on"
assert mask.shape[0] == len(vector_to_rebin_on), (
"The provided mask must have the same number of "
"elements as the vector to rebin on"
)

else:

Expand Down Expand Up @@ -74,7 +80,7 @@ def __init__(self, vector_to_rebin_on, min_value_per_bin, mask=None):
if n_grouped_bins > 1:

# group all these bins
self._grouping[index - n_grouped_bins + 1: index] = -1
self._grouping[index - n_grouped_bins + 1 : index] = -1
self._grouping[index] = 1

# reset the number of bins in this group
Expand Down Expand Up @@ -112,7 +118,7 @@ def __init__(self, vector_to_rebin_on, min_value_per_bin, mask=None):
if n_grouped_bins > 1:

# group all these bins
self._grouping[index - n_grouped_bins + 1: index] = -1
self._grouping[index - n_grouped_bins + 1 : index] = -1
self._grouping[index] = 1

# reset the number of bins in this group
Expand All @@ -124,8 +130,9 @@ def __init__(self, vector_to_rebin_on, min_value_per_bin, mask=None):
if bin_open:
self._stops.append(len(vector_to_rebin_on))

assert len(self._starts) == len(self._stops), "This is a bug: the starts and stops of the bins are not in " \
"equal number"
assert len(self._starts) == len(self._stops), (
"This is a bug: the starts and stops of the bins are not in " "equal number"
)

self._min_value_per_bin = min_value_per_bin

Expand All @@ -150,8 +157,10 @@ def rebin(self, *vectors):

for vector in vectors:

assert len(vector) == len(self._mask), "The vector to rebin must have the same number of elements of the" \
"original (not-rebinned) vector"
assert len(vector) == len(self._mask), (
"The vector to rebin must have the same number of elements of the"
"original (not-rebinned) vector"
)

# Transform in array because we need to use the mask
vector_a = np.array(vector)
Expand All @@ -167,7 +176,14 @@ def rebin(self, *vectors):
# NOTE: we add 1e-100 because if both rebinned_vector and vector_a contains only 0, the check would
# fail when it shouldn't

assert abs((np.sum(rebinned_vector) + 1e-100) / (np.sum(vector_a[self._mask]) + 1e-100) - 1) < 1e-4
assert (
abs(
(np.sum(rebinned_vector) + 1e-100)
/ (np.sum(vector_a[self._mask]) + 1e-100)
- 1
)
< 1e-4
)

rebinned_vectors.append(np.array(rebinned_vector))

Expand All @@ -189,8 +205,10 @@ def rebin_errors(self, *vectors):

for vector in vectors: # type: np.ndarray[np.ndarray]

assert len(vector) == len(self._mask), "The vector to rebin must have the same number of elements of the" \
"original (not-rebinned) vector"
assert len(vector) == len(self._mask), (
"The vector to rebin must have the same number of elements of the"
"original (not-rebinned) vector"
)

rebinned_vector = []

Expand Down Expand Up @@ -250,15 +268,23 @@ def get_new_start_and_stop(self, old_start, old_stop):
# return np.array(self._edges[:-1]) + 1, np.array(self._edges[1:])



class TemporalBinner(TimeIntervalSet):
"""
An extension of the TimeInterval set that includes binning capabilities
"""

@classmethod
def bin_by_significance(cls, arrival_times, background_getter, background_error_getter=None, sigma_level=10, min_counts=1, tstart=None, tstop=None):
def bin_by_significance(
cls,
arrival_times,
background_getter,
background_error_getter=None,
sigma_level=10,
min_counts=1,
tstart=None,
tstop=None,
):
"""
Bin the data to a given significance level for a given background method and sigma
Expand All @@ -273,8 +299,6 @@ def bin_by_significance(cls, arrival_times, background_getter, background_error_
:return:
"""



if tstart is None:

tstart = arrival_times.min()
Expand Down Expand Up @@ -314,15 +338,19 @@ def bin_by_significance(cls, arrival_times, background_getter, background_error_

# first we need to see if the interval provided has enough counts

_, counts = TemporalBinner._select_events(arrival_times,current_start, arrival_times[-1])
_, counts = TemporalBinner._select_events(
arrival_times, current_start, arrival_times[-1]
)

# if it does not, the flag for the big loop never gets set
end_all_search = not TemporalBinner._check_exceeds_sigma_interval(current_start,
arrival_times[-1],
counts,
sigma_level,
background_getter,
background_error_getter)
end_all_search = not TemporalBinner._check_exceeds_sigma_interval(
current_start,
arrival_times[-1],
counts,
sigma_level,
background_getter,
background_error_getter,
)

# We will start the search at the mid point of the whole interval

Expand All @@ -341,24 +369,28 @@ def bin_by_significance(cls, arrival_times, background_getter, background_error_
# as long as we have not reached the end of the interval
# the loop will run
with progress_bar(arrival_times.shape[0]) as pbar:
while (not end_all_search):
while not end_all_search:

# start of the fast search
# we reset the flag for the interval
# having been decreased in the last pass
decreased_interval = False

while (not end_fast_search):
while not end_fast_search:

# we calculate the sigma of the current region
_, counts = TemporalBinner._select_events(arrival_times,current_start, current_stop)

sigma_exceeded = TemporalBinner._check_exceeds_sigma_interval(current_start,
current_stop,
counts,
sigma_level,
background_getter,
background_error_getter)
_, counts = TemporalBinner._select_events(
arrival_times, current_start, current_stop
)

sigma_exceeded = TemporalBinner._check_exceeds_sigma_interval(
current_start,
current_stop,
counts,
sigma_level,
background_getter,
background_error_getter,
)

time_step = abs(current_stop - current_start)

Expand All @@ -381,7 +413,9 @@ def bin_by_significance(cls, arrival_times, background_getter, background_error_
else:

# unless, we would increase it too far
if (current_stop + time_step * increase_factor) >= arrival_times[-1]:
if (
current_stop + time_step * increase_factor
) >= arrival_times[-1]:

# mark where we are in the interval
start_idx = searchsorted(arrival_times, current_stop)
Expand Down Expand Up @@ -432,8 +466,9 @@ def bin_by_significance(cls, arrival_times, background_getter, background_error_

bkg_error = background_error_getter(current_start, time)

sigma = sig.li_and_ma_equivalent_for_gaussian_background(bkg_error)[0]

sigma = sig.li_and_ma_equivalent_for_gaussian_background(
bkg_error
)[0]

else:

Expand Down Expand Up @@ -466,12 +501,11 @@ def bin_by_significance(cls, arrival_times, background_getter, background_error_
# so lets kill the main search
end_all_search = True




if not starts:

print("The requested sigma level could not be achieved in the interval. Try decreasing it.")
print(
"The requested sigma level could not be achieved in the interval. Try decreasing it."
)

else:

Expand Down Expand Up @@ -521,23 +555,31 @@ def bin_by_bayesian_blocks(cls, arrival_times, p0, bkg_integral_distribution=Non
"""

try:

final_edges = bayesian_blocks(arrival_times, arrival_times[0], arrival_times[-1], p0, bkg_integral_distribution)

final_edges = bayesian_blocks(
arrival_times,
arrival_times[0],
arrival_times[-1],
p0,
bkg_integral_distribution,
)

except Exception as e:

if 'duplicate' in str(e):
if "duplicate" in str(e):

custom_warnings.warn('There where possible duplicate time tags in the data. We will try to run a different algorithm')

final_edges = bayesian_blocks_not_unique(arrival_times, arrival_times[0], arrival_times[-1], p0)
custom_warnings.warn(
"There where possible duplicate time tags in the data. We will try to run a different algorithm"
)

final_edges = bayesian_blocks_not_unique(
arrival_times, arrival_times[0], arrival_times[-1], p0
)


starts = np.asarray(final_edges)[:-1]
stops = np.asarray(final_edges)[1:]

return cls.from_starts_and_stops(starts, stops)

return cls.from_starts_and_stops(starts, stops)

@classmethod
def bin_by_custom(cls, starts, stops):
Expand All @@ -550,12 +592,17 @@ def bin_by_custom(cls, starts, stops):
:return:
"""


return cls.from_starts_and_stops(starts, stops)

@staticmethod
def _check_exceeds_sigma_interval(start, stop, counts, sigma_level, background_getter,
background_error_getter=None):
def _check_exceeds_sigma_interval(
start,
stop,
counts,
sigma_level,
background_getter,
background_error_getter=None,
):

"""
Expand All @@ -581,7 +628,6 @@ def _check_exceeds_sigma_interval(start, stop, counts, sigma_level, background_g

sigma = sig.li_and_ma_equivalent_for_gaussian_background(bkg_error)[0]


else:

sigma = sig.li_and_ma()[0]
Expand All @@ -597,7 +643,7 @@ def _check_exceeds_sigma_interval(start, stop, counts, sigma_level, background_g
return False

@staticmethod
def _select_events(arrival_times, start, stop ):
def _select_events(arrival_times, start, stop):
"""
get the events and total counts over an interval
Expand Down

0 comments on commit 0168577

Please sign in to comment.