Merge pull request #9 from fmigneault/fix-warning-gdal-sentinel-zip

add fix for gdal warning on sentinel zip parsing
plstcharles · May 13, 2020 · b50f607 · b50f607
2 parents 0515b5a + d99302f
commit b50f607
Show file tree

Hide file tree

Showing 8 changed files with 26 additions and 18 deletions.
diff --git a/Dockerfile-geo b/Dockerfile-geo
@@ -5,6 +5,12 @@ FROM thelper:base
 LABEL name="thelper-geo"
 LABEL description.geo="Adds geospatial related packages to run machine learning projects with geo-referenced imagery"
 
+# fix logged warning from GDAL sub-package when accessing Sentinel data via SAFE.ZIP
+#   only problematic here when using the 'root' conda env
+#   normal user installation with conda activation configures everything correctly
+# (https://github.com/conda-forge/gdal-feedstock/issues/83#issue-162911573)
+ENV CPL_ZIP_ENCODING=UTF-8
+
 # everything already configured/copied by base thelper
 # don't change directory to remain on specified workdir in base image
 # only add extra geo packages

diff --git a/thelper/data/geo/parsers.py b/thelper/data/geo/parsers.py
@@ -39,8 +39,8 @@ def __init__(self, raster_path, vector_path, px_size=None, skew=None,
         # before anything else, create a hash to cache parsed data
         cache_hash = thelper.utils.get_params_hash(
             {k: v for k, v in vars().items() if not k.startswith("_") and k != "self"}) if not force_parse else None
-        assert isinstance(raster_path, str), f"raster file/folder path should be given as string"
-        assert isinstance(vector_path, str), f"vector file/folder path should be given as string"
+        assert isinstance(raster_path, str), "raster file/folder path should be given as string"
+        assert isinstance(vector_path, str), "vector file/folder path should be given as string"
         self.raster_path = raster_path
         self.vector_path = vector_path
         assert px_size is None or \
@@ -180,10 +180,10 @@ def _parse_rasters(path, srs, reproj_rasters):
             logger.debug(f"raster #{idx + 1} area = {data['target_roi'].area:.2f}")
             # here, we enforce that raster datatypes/bandcounts match
             assert data["band_count"] == rasters_data[0]["band_count"], \
-                f"parser expects that all raster band counts match" + \
+                "parser expects that all raster band counts match" + \
                 f"(found {str(data['band_count'])} and {str(rasters_data[0]['band_count'])})"
             assert data["data_type"] == rasters_data[0]["data_type"], \
-                f"parser expects that all raster data types match" + \
+                "parser expects that all raster data types match" + \
                 f"(found {str(data['data_type'])} and {str(rasters_data[0]['data_type'])})"
             data["to_target_transform"] = osr.CoordinateTransformation(data["srs"], srs)
             data["from_target_transform"] = osr.CoordinateTransformation(srs, data["srs"])
@@ -220,7 +220,7 @@ def _parse_crops(self, cropper, cache_file_path, cache_hash):
 
         Each 'crop' corresponds to a sample that can be loaded at runtime.
         """
-        logger.info(f"preparing crops...")
+        logger.info("preparing crops...")
         cache_file_path = os.path.join(os.path.dirname(cache_file_path), cache_hash + ".crops.pkl") \
             if cache_hash else None
         if cache_file_path is not None and os.path.exists(cache_file_path):

diff --git a/thelper/data/loaders.py b/thelper/data/loaders.py
@@ -92,8 +92,8 @@ def default_collate(batch, force_tensor=True):
     elif isinstance(batch[0], tuple) and hasattr(batch[0], '_fields'):  # namedtuple
         return type(batch[0])(*(default_collate(samples, force_tensor=force_tensor) for samples in zip(*batch)))
     elif isinstance(batch[0], container_abcs.Sequence):
-        if isinstance(batch, list) and all([isinstance(l, list) for l in batch]) and \
-                all([isinstance(b, thelper.data.BoundingBox) for l in batch for b in l]):
+        if isinstance(batch, list) and all([isinstance(lbl, list) for lbl in batch]) and \
+                all([isinstance(b, thelper.data.BoundingBox) for lbl in batch for b in lbl]):
             return batch
         transposed = zip(*batch)
         return [default_collate(samples, force_tensor=force_tensor) for samples in transposed]
@@ -300,7 +300,7 @@ def __init__(self, config):
                         if name in subset:
                             subset[name] /= usage
         self.skip_verif = thelper.utils.str2bool(config["skip_verif"]) if "skip_verif" in config else True
-        logger.debug(f"batch sizes:" +
+        logger.debug("batch sizes:" +
                      (f"\n\ttrain = {self.train_batch_size}" if self.train_split else "") +
                      (f"\n\tvalid = {self.valid_batch_size}" if self.valid_split else "") +
                      (f"\n\ttest = {self.test_batch_size}" if self.test_split else ""))

diff --git a/thelper/draw.py b/thelper/draw.py
@@ -452,7 +452,7 @@ def draw_classifs(images, preds=None, labels=None, class_names_map=None, redraw=
         if not isinstance(labels, list) and not (isinstance(labels, torch.Tensor) and labels.dim() == 1):
             raise AssertionError("expected classification labels to be in list or 1-d tensor format")
         if isinstance(labels, list):
-            if all([isinstance(l, list) for l in labels]):
+            if all([isinstance(lbl, list) for lbl in labels]):
                 labels = list(itertools.chain.from_iterable(labels))  # merge all augmented lists together
             if all([isinstance(t, torch.Tensor) for t in labels]):
                 labels = torch.cat(labels, 0)

diff --git a/thelper/optim/metrics.py b/thelper/optim/metrics.py
@@ -1344,7 +1344,8 @@ def update(self,         # see `thelper.typedefs.IterCallbackParams` for more in
         if task is not None:
             assert isinstance(task, thelper.tasks.Segmentation), "unexpected task type with IoU metric"
             if self.target_names is not None:
-                assert all([n in task.class_names for n in self.target_names]), f"missing iou target in task class names"
+                assert all([n in task.class_names for n in self.target_names]), \
+                    "missing iou target in task class names"
                 self.target_idxs = [task.class_indices[n] for n in self.target_names]
             else:
                 self.target_idxs = list(task.class_indices.values())

diff --git a/thelper/train/classif.py b/thelper/train/classif.py
@@ -173,7 +173,7 @@ def eval_epoch(self, model, epoch, dev, loader, metrics, output_path):
                     assert isinstance(label, list) and len(label) == len(input_val), \
                         "label should also be a list of the same length as input"
                     # this might be costly for nothing, we could remove the check and assume user is not dumb
-                    assert not any([not torch.eq(l, label[0]).all() for l in label]), \
+                    assert not any([not torch.eq(lbl, label[0]).all() for lbl in label]), \
                         "all labels should be identical! (why do eval-time augment otherwise?)"
                     label = label[0]  # since all identical, just pick the first one and pretend its the only one
                     preds = None

diff --git a/thelper/train/segm.py b/thelper/train/segm.py
@@ -161,7 +161,7 @@ def eval_epoch(self, model, epoch, dev, loader, metrics, output_path):
                     assert isinstance(label_map, list) and len(label_map) == len(input_val), \
                         "label maps should also be provided via a list of the same length as the input_val"
                     # this might be costly for nothing, we could remove the check and assume user is not dumb
-                    assert not any([not torch.eq(l, label_map[0]).all() for l in label_map]), \
+                    assert not any([not torch.eq(lbl, label_map[0]).all() for lbl in label_map]), \
                         "all label maps should be identical! (why do eval-time augment otherwise?)"
                     label_map = label_map[0]  # since all identical, just pick the first one and pretend its the only one
                     preds = None

diff --git a/thelper/train/utils.py b/thelper/train/utils.py
@@ -614,11 +614,12 @@ def group_bbox(self,
                 det_target_iou = [compute_bbox_iou(det, t) for t in target_bboxes]
                 best_iou_idx = int(np.argmax(det_target_iou))
                 target_detects[best_iou_idx].append({"bbox": det, "iou": det_target_iou[best_iou_idx]})
-            group_bboxes = [{"target": target_bboxes[i],
-                             "detect": list(sorted(
-                                 target_detects[i], key=lambda d: d["iou"], reverse=True))
-                                 if sort_by_iou else target_detects[i]
-                            } for i in range(target_count)]     # noqa: PEP8
+            group_bboxes = [{
+                "target": target_bboxes[i],
+                "detect": list(
+                    sorted(target_detects[i], key=lambda d: d["iou"], reverse=True)
+                ) if sort_by_iou else target_detects[i]
+            } for i in range(target_count)]
         # apply filters on grouped results
         if self.iou_threshold:
             for grp in group_bboxes:
@@ -725,7 +726,7 @@ def patch_none(to_patch, number_format='2.4f'):  # type: (Any, str) -> str
                 header += f",detect_{k + 1}_name,detect_{k + 1}_bbox,detect_{k + 1}_conf,detect_{k + 1}_iou"
         else:
             # unknown count total detections (can be variable)
-            header += f",detect_name[N],detect_bbox[N],detect_conf[N],detect_iou[N],(...)[N]"
+            header += ",detect_name[N],detect_bbox[N],detect_conf[N],detect_iou[N],(...)[N]"
         lines = [""] * len(report)
         for i, result in enumerate(report):
             target = result["target"]