Refactor DNB local histogram equalization

mraspaud · Nov 24, 2021 · bd7b10a · bd7b10a
1 parent c2fe02c
commit bd7b10a
Showing 1 changed file with 165 additions and 162 deletions.
diff --git a/satpy/composites/viirs.py b/satpy/composites/viirs.py
@@ -480,8 +480,7 @@ def local_histogram_equalization(data, mask_to_equalize, valid_data_mask=None, n
     Returns: The equalized data
     """
     out = out if out is not None else np.zeros_like(data)
-    # if we don't have a valid mask, use the mask of what we should be
-    # equalizing
+    # if we don't have a valid mask, use the mask of what we should be equalizing
     if valid_data_mask is None:
         valid_data_mask = mask_to_equalize
 
@@ -495,77 +494,20 @@ def local_histogram_equalization(data, mask_to_equalize, valid_data_mask=None, n
         (total_cols % tile_size) == 0) else int(total_cols / tile_size) + 1
 
     # an array of our distribution functions for equalization
-    all_cumulative_dist_functions = [[]]
+    all_cumulative_dist_functions = [[] for _ in range(row_tiles)]
     # an array of our bin information for equalization
-    all_bin_information = [[]]
+    all_bin_information = [[] for _ in range(row_tiles)]
 
-    # loop through our tiles and create the histogram equalizations for each
-    # one
+    # loop through our tiles and create the histogram equalizations for each one
     for num_row_tile in range(row_tiles):
-
-        # make sure we have enough rows available to store info on this next
-        # row of tiles
-        if len(all_cumulative_dist_functions) <= num_row_tile:
-            all_cumulative_dist_functions.append([])
-        if len(all_bin_information) <= num_row_tile:
-            all_bin_information.append([])
-
-        # go through each tile in this row and calculate the equalization
         for num_col_tile in range(col_tiles):
-
-            # calculate the range for this tile (min is inclusive, max is
-            # exclusive)
-            min_row = num_row_tile * tile_size
-            max_row = min_row + tile_size
-            min_col = num_col_tile * tile_size
-            max_col = min_col + tile_size
-
-            # for speed of calculation, pull out the mask of pixels that should
-            # be used to calculate the histogram
-            mask_valid_data_in_tile = valid_data_mask[min_row:max_row, min_col:
-                                                      max_col]
-
-            # if we have any valid data in this tile, calculate a histogram equalization for this tile
-            # (note: even if this tile does no fall in the mask_to_equalize, it's histogram may be used by other tiles)
-            cumulative_dist_function, temp_bins = None, None
-            if mask_valid_data_in_tile.any():
-
-                # use all valid data in the tile, so separate sections will
-                # blend cleanly
-                temp_valid_data = data[min_row:max_row, min_col:max_col][
-                    mask_valid_data_in_tile]
-                temp_valid_data = temp_valid_data[
-                    temp_valid_data >= 0
-                ]  # TEMP, testing to see if negative data is messing everything up
-                # limit the contrast by only considering data within a certain
-                # range of the average
-                if std_mult_cutoff is not None:
-                    avg = np.mean(temp_valid_data)
-                    std = np.std(temp_valid_data)
-                    # limit our range to avg +/- std_mult_cutoff*std; e.g. the
-                    # default std_mult_cutoff is 4.0 so about 99.8% of the data
-                    concervative_mask = (
-                        temp_valid_data < (avg + std * std_mult_cutoff)) & (
-                            temp_valid_data > (avg - std * std_mult_cutoff))
-                    temp_valid_data = temp_valid_data[concervative_mask]
-
-                # if we are taking the log of our data, do so now
-                if do_log_scale:
-                    temp_valid_data = np.log(temp_valid_data + log_offset)
-
-                # do the histogram equalization and get the resulting
-                # distribution function and bin information
-                if temp_valid_data.size > 0:
-                    cumulative_dist_function, temp_bins = _histogram_equalization_helper(
-                        temp_valid_data,
-                        number_of_bins,
-                        clip_limit=clip_limit,
-                        slope_limit=slope_limit)
-
-            # hang on to our equalization related information for use later
-            all_cumulative_dist_functions[num_row_tile].append(
-                cumulative_dist_function)
-            all_bin_information[num_row_tile].append(temp_bins)
+            tile_dist_func, tile_bin_info = _histogram_equalize_one_tile(
+                data, valid_data_mask, std_mult_cutoff, do_log_scale, log_offset,
+                clip_limit, slope_limit, number_of_bins, num_row_tile, num_col_tile,
+                tile_size
+            )
+            all_cumulative_dist_functions[num_row_tile].append(tile_dist_func)
+            all_bin_information[num_row_tile].append(tile_bin_info)
 
     # get the tile weight array so we can use it to interpolate our data
     tile_weights = _calculate_weights(tile_size)
@@ -574,99 +516,11 @@ def local_histogram_equalization(data, mask_to_equalize, valid_data_mask=None, n
     # versions of the data
     for num_row_tile in range(row_tiles):
         for num_col_tile in range(col_tiles):
-
-            # calculate the range for this tile (min is inclusive, max is
-            # exclusive)
-            min_row = num_row_tile * tile_size
-            max_row = min_row + tile_size
-            min_col = num_col_tile * tile_size
-            max_col = min_col + tile_size
-
-            # for convenience, pull some of these tile sized chunks out
-            temp_all_data = data[min_row:max_row, min_col:max_col].copy()
-            temp_mask_to_equalize = mask_to_equalize[min_row:max_row, min_col:
-                                                     max_col]
-            temp_all_valid_data_mask = valid_data_mask[min_row:max_row,
-                                                       min_col:max_col]
-
-            # if we have any data in this tile, calculate our weighted sum
-            if temp_mask_to_equalize.any():
-                if do_log_scale:
-                    temp_all_data[temp_all_valid_data_mask] = np.log(
-                        temp_all_data[temp_all_valid_data_mask] + log_offset)
-                temp_data_to_equalize = temp_all_data[temp_mask_to_equalize]
-                temp_all_valid_data = temp_all_data[temp_all_valid_data_mask]
-
-                # a place to hold our weighted sum that represents the interpolated contributions
-                # of the histogram equalizations from the surrounding tiles
-                temp_sum = np.zeros_like(temp_data_to_equalize)
-
-                # how much weight were we unable to use because those tiles
-                # fell off the edge of the image?
-                unused_weight = np.zeros(temp_data_to_equalize.shape,
-                                         dtype=tile_weights.dtype)
-
-                # loop through all the surrounding tiles and process their
-                # contributions to this tile
-                for weight_row in range(3):
-                    for weight_col in range(3):
-                        # figure out which adjacent tile we're processing (in
-                        # overall tile coordinates instead of relative to our
-                        # current tile)
-                        calculated_row = num_row_tile - 1 + weight_row
-                        calculated_col = num_col_tile - 1 + weight_col
-                        tmp_tile_weights = tile_weights[
-                            weight_row, weight_col][np.where(
-                                temp_mask_to_equalize)]
-
-                        # if we're inside the tile array and the tile we're
-                        # processing has a histogram equalization for us to
-                        # use, process it
-                        if ((calculated_row >= 0) and
-                            (calculated_row < row_tiles) and
-                            (calculated_col >= 0) and
-                            (calculated_col < col_tiles) and (
-                                all_bin_information[calculated_row][
-                                    calculated_col] is not None) and
-                            (all_cumulative_dist_functions[calculated_row][
-                                calculated_col] is not None)):
-
-                            # equalize our current tile using the histogram
-                            # equalization from the tile we're processing
-                            temp_equalized_data = np.interp(
-                                temp_all_valid_data, all_bin_information[
-                                    calculated_row][calculated_col][:-1],
-                                all_cumulative_dist_functions[calculated_row][
-                                    calculated_col])
-                            temp_equalized_data = temp_equalized_data[np.where(
-                                temp_mask_to_equalize[
-                                    temp_all_valid_data_mask])]
-
-                            # add the contribution for the tile we're
-                            # processing to our weighted sum
-                            temp_sum += (temp_equalized_data *
-                                         tmp_tile_weights)
-
-                        # if the tile we're processing doesn't exist, hang onto the weight we
-                        # would have used for it so we can correct that later
-                        else:
-                            unused_weight -= tmp_tile_weights
-
-                # if we have unused weights, scale our values to correct for
-                # that
-                if unused_weight.any():
-                    # TODO, if the mask masks everything out this will be a
-                    # zero!
-                    temp_sum /= unused_weight + 1
-
-                # now that we've calculated the weighted sum for this tile, set
-                # it in our data array
-                out[min_row:max_row, min_col:max_col][
-                    temp_mask_to_equalize] = temp_sum
-                # TEMP, test without using weights
-                # data[min_row:max_row, min_col:max_col][temp_mask_to_equalize] = \
-                #     np.interp(temp_data_to_equalize, all_bin_information[num_row_tile][num_col_tile][:-1],
-                #               all_cumulative_dist_functions[num_row_tile][num_col_tile])
+            _interpolate_local_equalized_tiles(
+                data, out, mask_to_equalize, valid_data_mask, do_log_scale, log_offset,
+                tile_weights, all_bin_information, all_cumulative_dist_functions,
+                num_row_tile, num_col_tile, row_tiles, col_tiles, tile_size,
+            )
 
     # if we were asked to, normalize our data to be between zero and one,
     # rather than zero and number_of_bins
@@ -676,6 +530,155 @@ def local_histogram_equalization(data, mask_to_equalize, valid_data_mask=None, n
     return out
 
 
+def _histogram_equalize_one_tile(
+        data, valid_data_mask, std_mult_cutoff, do_log_scale, log_offset,
+        clip_limit, slope_limit, number_of_bins, num_row_tile, num_col_tile,
+        tile_size):
+    # calculate the range for this tile (min is inclusive, max is
+    # exclusive)
+    min_row = num_row_tile * tile_size
+    max_row = min_row + tile_size
+    min_col = num_col_tile * tile_size
+    max_col = min_col + tile_size
+
+    # for speed of calculation, pull out the mask of pixels that should
+    # be used to calculate the histogram
+    mask_valid_data_in_tile = valid_data_mask[min_row:max_row, min_col:max_col]
+
+    # if we have any valid data in this tile, calculate a histogram equalization for this tile
+    # (note: even if this tile does no fall in the mask_to_equalize, it's histogram may be used by other tiles)
+    if not mask_valid_data_in_tile.any():
+        return None, None
+
+    # use all valid data in the tile, so separate sections will
+    # blend cleanly
+    temp_valid_data = data[min_row:max_row, min_col:max_col][
+        mask_valid_data_in_tile]
+    temp_valid_data = temp_valid_data[
+        temp_valid_data >= 0
+        ]  # TEMP, testing to see if negative data is messing everything up
+    # limit the contrast by only considering data within a certain
+    # range of the average
+    if std_mult_cutoff is not None:
+        avg = np.mean(temp_valid_data)
+        std = np.std(temp_valid_data)
+        # limit our range to avg +/- std_mult_cutoff*std; e.g. the
+        # default std_mult_cutoff is 4.0 so about 99.8% of the data
+        concervative_mask = (
+                                    temp_valid_data < (avg + std * std_mult_cutoff)) & (
+                                    temp_valid_data > (avg - std * std_mult_cutoff))
+        temp_valid_data = temp_valid_data[concervative_mask]
+
+    # if we are taking the log of our data, do so now
+    if do_log_scale:
+        temp_valid_data = np.log(temp_valid_data + log_offset)
+
+    # do the histogram equalization and get the resulting
+    # distribution function and bin information
+    if not temp_valid_data.size:
+        return None, None
+
+    cumulative_dist_function, temp_bins = _histogram_equalization_helper(
+        temp_valid_data,
+        number_of_bins,
+        clip_limit=clip_limit,
+        slope_limit=slope_limit)
+    return cumulative_dist_function, temp_bins
+
+
+def _interpolate_local_equalized_tiles(
+        data, out, mask_to_equalize, valid_data_mask, do_log_scale, log_offset,
+        tile_weights, all_bin_information, all_cumulative_dist_functions,
+        row_idx, col_idx, row_tiles, col_tiles, tile_size):
+    # calculate the range for this tile (min is inclusive, max is
+    # exclusive)
+    num_row_tile = row_idx
+    num_col_tile = col_idx
+    min_row = num_row_tile * tile_size
+    max_row = min_row + tile_size
+    min_col = num_col_tile * tile_size
+    max_col = min_col + tile_size
+
+    # for convenience, pull some of these tile sized chunks out
+    temp_all_data = data[min_row:max_row, min_col:max_col].copy()
+    temp_mask_to_equalize = mask_to_equalize[min_row:max_row, min_col:max_col]
+    temp_all_valid_data_mask = valid_data_mask[min_row:max_row, min_col:max_col]
+
+    # if we have any data in this tile, calculate our weighted sum
+    if not temp_mask_to_equalize.any():
+        return
+
+    if do_log_scale:
+        temp_all_data[temp_all_valid_data_mask] = np.log(
+            temp_all_data[temp_all_valid_data_mask] + log_offset)
+    temp_data_to_equalize = temp_all_data[temp_mask_to_equalize]
+    temp_all_valid_data = temp_all_data[temp_all_valid_data_mask]
+
+    # a place to hold our weighted sum that represents the interpolated contributions
+    # of the histogram equalizations from the surrounding tiles
+    temp_sum = np.zeros_like(temp_data_to_equalize)
+
+    # how much weight were we unable to use because those tiles
+    # fell off the edge of the image?
+    unused_weight = np.zeros(temp_data_to_equalize.shape, dtype=tile_weights.dtype)
+
+    # loop through all the surrounding tiles and process their
+    # contributions to this tile
+    for weight_row in range(3):
+        for weight_col in range(3):
+            # figure out which adjacent tile we're processing (in
+            # overall tile coordinates instead of relative to our
+            # current tile)
+            calculated_row = num_row_tile - 1 + weight_row
+            calculated_col = num_col_tile - 1 + weight_col
+            tmp_tile_weights = tile_weights[
+                weight_row, weight_col][np.where(temp_mask_to_equalize)]
+
+            # if we're inside the tile array and the tile we're
+            # processing has a histogram equalization for us to
+            # use, process it
+            if ((calculated_row >= 0) and
+                    (calculated_row < row_tiles) and
+                    (calculated_col >= 0) and
+                    (calculated_col < col_tiles) and (
+                            all_bin_information[calculated_row][
+                                calculated_col] is not None) and
+                    (all_cumulative_dist_functions[calculated_row][
+                         calculated_col] is not None)):
+
+                # equalize our current tile using the histogram
+                # equalization from the tile we're processing
+                temp_equalized_data = np.interp(
+                    temp_all_valid_data, all_bin_information[calculated_row][calculated_col][:-1],
+                    all_cumulative_dist_functions[calculated_row][
+                        calculated_col])
+                temp_equalized_data = temp_equalized_data[np.where(
+                    temp_mask_to_equalize[temp_all_valid_data_mask])]
+
+                # add the contribution for the tile we're
+                # processing to our weighted sum
+                temp_sum += temp_equalized_data * tmp_tile_weights
+
+            # if the tile we're processing doesn't exist, hang onto the weight we
+            # would have used for it so we can correct that later
+            else:
+                unused_weight -= tmp_tile_weights
+
+    # if we have unused weights, scale our values to correct for that
+    if unused_weight.any():
+        # TODO: if the mask masks everything out this will be a zero!
+        temp_sum /= unused_weight + 1
+
+    # now that we've calculated the weighted sum for this tile, set
+    # it in our data array
+    out[min_row:max_row, min_col:max_col][
+        temp_mask_to_equalize] = temp_sum
+    # TEMP, test without using weights
+    # data[min_row:max_row, min_col:max_col][temp_mask_to_equalize] = \
+    #     np.interp(temp_data_to_equalize, all_bin_information[num_row_tile][num_col_tile][:-1],
+    #               all_cumulative_dist_functions[num_row_tile][num_col_tile])
+
+
 def _histogram_equalization_helper(valid_data, number_of_bins, clip_limit=None, slope_limit=None):
     """Calculate the simplest possible histogram equalization, using only valid data.