Refactor complex methods

mraspaud · Nov 23, 2021 · 6125338 · 6125338
1 parent e351e8f
commit 6125338
Show file tree

Hide file tree

Showing 4 changed files with 108 additions and 82 deletions.
diff --git a/satpy/readers/mersi2_l1b.py b/satpy/readers/mersi2_l1b.py
@@ -96,23 +96,7 @@ def get_dataset(self, dataset_id, ds_info):
         if 'rows_per_scan' in self.filetype_info:
             attrs.setdefault('rows_per_scan', self.filetype_info['rows_per_scan'])
 
-        fill_value = attrs.pop('FillValue', np.nan)  # covered by valid_range
-        valid_range = attrs.pop('valid_range', None)
-        if dataset_id.get('calibration') == 'counts':
-            # preserve integer type of counts if possible
-            attrs['_FillValue'] = fill_value
-            new_fill = fill_value
-        else:
-            new_fill = np.nan
-        if valid_range is not None:
-            # Due to a bug in the valid_range upper limit in the 10.8(24) and 12.0(25)
-            # in the HDF data, this is hardcoded here.
-            if dataset_id['name'] in ['24', '25'] and valid_range[1] == 4095:
-                valid_range[1] = 25000
-            # typically bad_values == 65535, saturated == 65534
-            # dead detector == 65533
-            data = data.where((data >= valid_range[0]) &
-                              (data <= valid_range[1]), new_fill)
+        data = self._mask_data(data, dataset_id, attrs)
 
         slope = attrs.pop('Slope', None)
         intercept = attrs.pop('Intercept', None)
@@ -130,36 +114,12 @@ def get_dataset(self, dataset_id, ds_info):
                                             ds_info['calibration_index'])
             data = coeffs[0] + coeffs[1] * data + coeffs[2] * data**2
         elif dataset_id.get('calibration') == "brightness_temperature":
-            cal_index = ds_info['calibration_index']
-            # Apparently we don't use these calibration factors for Rad -> BT
-            # coeffs = self._get_coefficients(ds_info['calibration_key'], cal_index)
-            # # coefficients are per-scan, we need to repeat the values for a
-            # # clean alignment
-            # coeffs = np.repeat(coeffs, data.shape[0] // coeffs.shape[1], axis=1)
-            # coeffs = coeffs.rename({
-            #     coeffs.dims[0]: 'coefficients', coeffs.dims[1]: 'y'
-            # })  # match data dims
-            # data = coeffs[0] + coeffs[1] * data + coeffs[2] * data**2 + coeffs[3] * data**3
-
+            calibration_index = ds_info['calibration_index']
             # Converts um^-1 (wavenumbers) and (mW/m^2)/(str/cm^-1) (radiance data)
             # to SI units m^-1, mW*m^-3*str^-1.
             wave_number = 1. / (dataset_id['wavelength'][1] / 1e6)
-            # pass the dask array
-            bt_data = rad2temp(wave_number, data.data * 1e-5)  # brightness temperature
-            if isinstance(bt_data, np.ndarray):
-                # old versions of pyspectral produce numpy arrays
-                data.data = da.from_array(bt_data, chunks=data.data.chunks)
-            else:
-                # new versions of pyspectral can do dask arrays
-                data.data = bt_data
-            # additional corrections from the file
-            corr_coeff_a = float(self['/attr/TBB_Trans_Coefficient_A'][cal_index])
-            corr_coeff_b = float(self['/attr/TBB_Trans_Coefficient_B'][cal_index])
-            if corr_coeff_a != 0:
-                data = (data - corr_coeff_b) / corr_coeff_a
-            # Some BT bands seem to have 0 in the first 10 columns
-            # and it is an invalid Kelvin measurement, so let's mask
-            data = data.where(data != 0)
+
+            data = self._get_bt_dataset(data, calibration_index, wave_number)
 
         data.attrs = attrs
         # convert bytes to str
@@ -174,3 +134,57 @@ def get_dataset(self, dataset_id, ds_info):
         })
 
         return data
+
+    def _mask_data(self, data, dataset_id, attrs):
+        """Mask the data using fill_value and valid_range attributes."""
+        fill_value = attrs.pop('FillValue', np.nan)  # covered by valid_range
+        valid_range = attrs.pop('valid_range', None)
+        if dataset_id.get('calibration') == 'counts':
+            # preserve integer type of counts if possible
+            attrs['_FillValue'] = fill_value
+            new_fill = fill_value
+        else:
+            new_fill = np.nan
+        if valid_range is not None:
+            # Due to a bug in the valid_range upper limit in the 10.8(24) and 12.0(25)
+            # in the HDF data, this is hardcoded here.
+            if dataset_id['name'] in ['24', '25'] and valid_range[1] == 4095:
+                valid_range[1] = 25000
+            # typically bad_values == 65535, saturated == 65534
+            # dead detector == 65533
+            data = data.where((data >= valid_range[0]) &
+                              (data <= valid_range[1]), new_fill)
+        return data
+
+    def _get_bt_dataset(self, data, calibration_index, wave_number):
+        """Get the dataset as brightness temperature.
+
+        Apparently we don't use these calibration factors for Rad -> BT::
+
+            coeffs = self._get_coefficients(ds_info['calibration_key'], calibration_index)
+            # coefficients are per-scan, we need to repeat the values for a
+            # clean alignment
+            coeffs = np.repeat(coeffs, data.shape[0] // coeffs.shape[1], axis=1)
+            coeffs = coeffs.rename({
+                coeffs.dims[0]: 'coefficients', coeffs.dims[1]: 'y'
+            })  # match data dims
+            data = coeffs[0] + coeffs[1] * data + coeffs[2] * data**2 + coeffs[3] * data**3
+
+        """
+        # pass the dask array
+        bt_data = rad2temp(wave_number, data.data * 1e-5)  # brightness temperature
+        if isinstance(bt_data, np.ndarray):
+            # old versions of pyspectral produce numpy arrays
+            data.data = da.from_array(bt_data, chunks=data.data.chunks)
+        else:
+            # new versions of pyspectral can do dask arrays
+            data.data = bt_data
+        # additional corrections from the file
+        corr_coeff_a = float(self['/attr/TBB_Trans_Coefficient_A'][calibration_index])
+        corr_coeff_b = float(self['/attr/TBB_Trans_Coefficient_B'][calibration_index])
+        if corr_coeff_a != 0:
+            data = (data - corr_coeff_b) / corr_coeff_a
+        # Some BT bands seem to have 0 in the first 10 columns
+        # and it is an invalid Kelvin measurement, so let's mask
+        data = data.where(data != 0)
+        return data
diff --git a/satpy/readers/tropomi_l2.py b/satpy/readers/tropomi_l2.py
@@ -115,21 +115,25 @@ def available_datasets(self, configured_datasets=None):
                 # then we should keep it going down the chain
                 yield is_avail, ds_info
 
-        # This is where we dynamically add new datasets
-        # We will sift through all groups and variables, looking for data matching
-        # the geolocation bounds
+        yield from self._iterate_over_dataset_contents(handled_variables, lat_shape)
 
-        # Iterate over dataset contents
+    def _iterate_over_dataset_contents(self, handled_variables, shape):
+        """Iterate over dataset contents.
+
+        This is where we dynamically add new datasets
+        We will sift through all groups and variables, looking for data matching
+        the geolocation bounds
+        """
         for var_name, val in self.file_content.items():
             # Only evaluate variables
             if isinstance(val, netCDF4.Variable):
                 logger.debug("Evaluating new variable: %s", var_name)
                 var_shape = self[var_name + "/shape"]
                 logger.debug("Dims:{}".format(var_shape))
-                if (lat_shape == var_shape[:len(lat_shape)]):
+                if shape == var_shape[:len(shape)]:
                     logger.debug("Found valid additional dataset: %s", var_name)
                     # Skip anything we have already configured
-                    if (var_name in handled_variables):
+                    if var_name in handled_variables:
                         logger.debug("Already handled, skipping: %s", var_name)
                         continue
                     handled_variables.add(var_name)

diff --git a/satpy/tests/reader_tests/test_tropomi_l2.py b/satpy/tests/reader_tests/test_tropomi_l2.py
@@ -41,7 +41,6 @@ class FakeNetCDF4FileHandlerTL2(FakeNetCDF4FileHandler):
 
     def get_test_content(self, filename, filename_info, filetype_info):
         """Mimic reader input file content."""
-        from xarray import DataArray
         dt_s = filename_info.get('start_time', datetime(2016, 1, 1, 12, 0, 0))
         dt_e = filename_info.get('end_time', datetime(2016, 1, 1, 12, 0, 0))
 
@@ -68,15 +67,7 @@ def get_test_content(self, filename, filename_info, filetype_info):
                     continue
                 file_content[k + '/shape'] = DEFAULT_FILE_SHAPE
 
-            # convert to xarrays
-            for key, val in file_content.items():
-                if isinstance(val, np.ndarray):
-                    if 1 < val.ndim <= 2:
-                        file_content[key] = DataArray(val, dims=('scanline', 'ground_pixel'))
-                    elif val.ndim > 2:
-                        file_content[key] = DataArray(val, dims=('scanline', 'ground_pixel', 'corner'))
-                    else:
-                        file_content[key] = DataArray(val)
+            self._convert_data_content_to_dataarrays(file_content)
             file_content['PRODUCT/latitude'].attrs['_FillValue'] = -999.0
             file_content['PRODUCT/longitude'].attrs['_FillValue'] = -999.0
             file_content['PRODUCT/SUPPORT_DATA/GEOLOCATIONS/latitude_bounds'].attrs['_FillValue'] = -999.0
@@ -92,6 +83,18 @@ def get_test_content(self, filename, filename_info, filetype_info):
 
         return file_content
 
+    def _convert_data_content_to_dataarrays(self, file_content):
+        """Convert data content to xarray's dataarrays."""
+        from xarray import DataArray
+        for key, val in file_content.items():
+            if isinstance(val, np.ndarray):
+                if 1 < val.ndim <= 2:
+                    file_content[key] = DataArray(val, dims=('scanline', 'ground_pixel'))
+                elif val.ndim > 2:
+                    file_content[key] = DataArray(val, dims=('scanline', 'ground_pixel', 'corner'))
+                else:
+                    file_content[key] = DataArray(val)
+
 
 class TestTROPOMIL2Reader(unittest.TestCase):
     """Test TROPOMI L2 Reader."""

diff --git a/satpy/writers/__init__.py b/satpy/writers/__init__.py
@@ -238,28 +238,7 @@ def add_overlay(orig_img, area, coast_dir, color=None, width=None, resolution=No
                            "overlays/decorations to non-RGB data.")
 
     if overlays is None:
-        overlays = dict()
-        # fill with sensible defaults
-        general_params = {'outline': color or (0, 0, 0),
-                          'width': width or 0.5}
-        for key, val in general_params.items():
-            if val is not None:
-                overlays.setdefault('coasts', {}).setdefault(key, val)
-                overlays.setdefault('borders', {}).setdefault(key, val)
-        if level_coast is None:
-            level_coast = 1
-        overlays.setdefault('coasts', {}).setdefault('level', level_coast)
-        if level_borders is None:
-            level_borders = 1
-        overlays.setdefault('borders', {}).setdefault('level', level_borders)
-
-        if grid is not None:
-            if 'major_lonlat' in grid and grid['major_lonlat']:
-                major_lonlat = grid.pop('major_lonlat')
-                minor_lonlat = grid.pop('minor_lonlat', major_lonlat)
-                grid.update({'Dlonlat': major_lonlat, 'dlonlat': minor_lonlat})
-            for key, val in grid.items():
-                overlays.setdefault('grid', {}).setdefault(key, val)
+        overlays = _create_overlays_dict(color, width, grid, level_coast, level_borders)
 
     cw_ = ContourWriterAGG(coast_dir)
     new_image = orig_img.apply_pil(_burn_overlay, res_mode,
@@ -268,6 +247,32 @@ def add_overlay(orig_img, area, coast_dir, color=None, width=None, resolution=No
     return new_image
 
 
+def _create_overlays_dict(color, width, grid, level_coast, level_borders):
+    """Fill in the overlays dict."""
+    overlays = dict()
+    # fill with sensible defaults
+    general_params = {'outline': color or (0, 0, 0),
+                      'width': width or 0.5}
+    for key, val in general_params.items():
+        if val is not None:
+            overlays.setdefault('coasts', {}).setdefault(key, val)
+            overlays.setdefault('borders', {}).setdefault(key, val)
+    if level_coast is None:
+        level_coast = 1
+    overlays.setdefault('coasts', {}).setdefault('level', level_coast)
+    if level_borders is None:
+        level_borders = 1
+    overlays.setdefault('borders', {}).setdefault('level', level_borders)
+    if grid is not None:
+        if 'major_lonlat' in grid and grid['major_lonlat']:
+            major_lonlat = grid.pop('major_lonlat')
+            minor_lonlat = grid.pop('minor_lonlat', major_lonlat)
+            grid.update({'Dlonlat': major_lonlat, 'dlonlat': minor_lonlat})
+        for key, val in grid.items():
+            overlays.setdefault('grid', {}).setdefault(key, val)
+    return overlays
+
+
 def add_text(orig, dc, img, text):
     """Add text to an image using the pydecorate package.