From a1f162474ddbb3ea520a92e6fa13e6fd68af7c49 Mon Sep 17 00:00:00 2001 From: code29563 <89779096+code29563@users.noreply.github.com> Date: Thu, 21 Mar 2024 21:18:29 +0000 Subject: [PATCH] re-worked cutoff_error parameter addressing issue #268 building on PR #451 --- radis/lbl/base.py | 56 +++++++++++++++++------------------------------ 1 file changed, 20 insertions(+), 36 deletions(-) diff --git a/radis/lbl/base.py b/radis/lbl/base.py index 4048bf2e2..9d5582151 100644 --- a/radis/lbl/base.py +++ b/radis/lbl/base.py @@ -3168,8 +3168,8 @@ def _cutoff_linestrength(self, cutoff=None, cutoff_error=None): times. If 0, no cutoff. Default 0 cutoff_error: float - user-inputted value of cutoff error, to keep the estimated error - below this value. If None, no error to consider. Default None. + percentage below which to keep the estimated error, + adjusting the cutoff if necessary. If None, no error to consider. Default None. Notes ----- @@ -3191,9 +3191,6 @@ def _cutoff_linestrength(self, cutoff=None, cutoff_error=None): cutoff_error = self.params.cutoff_error verbose = self.verbose df = self.df1 - df1 = self.df1 - lines_by_intensity = df1.S.tolist() - lines_by_intensity.sort() if len(df) == 0: # no lines self._Nlines_cutoff = None @@ -3224,35 +3221,23 @@ def _cutoff_linestrength(self, cutoff=None, cutoff_error=None): if cutoff_error is not None: if cutoff_error < error: - # Remove additional lines such that cutoff error is less than user-inputted value - intensity_sum = 0 - for i in lines_by_intensity: - intensity_sum += i - # Last value of the list is the cumulative intensity - lines_by_intensity.append(intensity_sum) - desired_intensity = (cutoff_error / 100) * lines_by_intensity[-1] - next_index = 0 - sum = 0 - # Find the index till which sum <= desired intensity and remove the rest of the elements - for i in lines_by_intensity[:-1]: - if sum <= desired_intensity: - if sum + lines_by_intensity[next_index] > desired_intensity: - break - else: - sum += i - next_index += 1 - else: - break - # Change a copy of the list containing lines by intensity - correct_lines = lines_by_intensity.copy() - for i in range(next_index + 1, len(lines_by_intensity) + 1): - correct_lines.pop() - # Update the database - df1["S"] = correct_lines + lines_by_intensity = df.S.sort_values() + cumsummed = lines_by_intensity.cumsum() + cond = cumsummed <= (cutoff_error / 100) * df.S.sum() + post_cutoff = lines_by_intensity[cond] + # to account for the unlikely case where the cutoff in cond lies between duplicate values: + max_value = post_cutoff.max() + if post_cutoff.value_counts()[max_value] < lines_by_intensity.value_counts()[max_value]: + b = df.S < max_value # exclude max_value as including it pushes us over cutoff_error + in_or_ex = 'exclusive' + else: + b = df.S <= max_value + in_or_ex = 'inclusive' # Print current error - current_error = sum / lines_by_intensity[-1] * 100 + error = df.S[b].sum() / df.S.sum() * 100 print( - "Current percentage error: {0:.2f}% ".format(current_error) + "Cutoff for discarded lines adjusted to {0} ({1}). ".format(max_value,in_or_ex) + + "Current percentage error: {0:.2f}% ".format(error) + "Inputted error: {0:.2f}%".format(cutoff_error) ) @@ -3264,15 +3249,14 @@ def _cutoff_linestrength(self, cutoff=None, cutoff_error=None): ) + " Estimated error: {0:.2f}%".format(error) ) - b = df.S <= cutoff Nlines_cutoff = b.sum() else: print("Cutoff error not inputted.") if verbose >= 2: print( - "Discarded {0:.2f}% of lines (linestrength<{1}cm-1/(#.cm-2))".format( - Nlines_cutoff / len(df.S) * 100, cutoff + "Discarded {0:.2f}% of lines".format( + Nlines_cutoff / len(df.S) * 100 ) + " Estimated error: {0:.2f}%".format(error) ) @@ -3281,7 +3265,7 @@ def _cutoff_linestrength(self, cutoff=None, cutoff_error=None): "Estimated error after discarding lines is large: {0:.2f}%".format( error ) - + ". Consider reducing cutoff", + + ". Consider reducing cutoff (or cutoff_error if adjusted)", "LinestrengthCutoffWarning", )