lsst · timj · Feb 5, 2021 · Feb 4, 2021 · Feb 4, 2021 · natelust
diff --git a/python/lsst/daf/butler/core/_butlerUri/_butlerUri.py b/python/lsst/daf/butler/core/_butlerUri/_butlerUri.py
@@ -53,6 +53,9 @@
 # Regex for looking for URI escapes
 ESCAPES_RE = re.compile(r"%[A-F0-9]{2}")
 
+# Precomputed escaped hash
+ESCAPED_HASH = urllib.parse.quote("#")
+
 
 class ButlerURI:
     """Convenience wrapper around URI parsers.
@@ -144,6 +147,14 @@ def __new__(cls, uri: Union[str, urllib.parse.ParseResult, ButlerURI, Path],
                     log.warning("Possible double encoding of %s", uri)
                 else:
                     uri = urllib.parse.quote(uri)
+                    # Special case hash since we must support fragments
+                    # even in schemeless URIs -- although try to only replace
+                    # them in file part and not directory part
+                    if ESCAPED_HASH in uri:
+                        dirpos = uri.rfind("/")
+                        # Do replacement after this /
+                        uri = uri[:dirpos+1] + uri[dirpos+1:].replace(ESCAPED_HASH, "#")
+
             parsed = urllib.parse.urlparse(uri)
         elif isinstance(uri, urllib.parse.ParseResult):
             parsed = copy.copy(uri)

diff --git a/python/lsst/daf/butler/core/_butlerUri/schemeless.py b/python/lsst/daf/butler/core/_butlerUri/schemeless.py
@@ -198,7 +198,9 @@ def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[Union[str, Bu
         # ParseResult is a NamedTuple so _replace is standard API
         parsed = parsed._replace(**replacements)
 
-        if parsed.params or parsed.fragment or parsed.query:
+        # We do allow fragment but do not expect params or query to be
+        # specified for schemeless
+        if parsed.params or parsed.query:
             log.warning("Additional items unexpectedly encountered in schemeless URI: %s", parsed.geturl())
 
         return parsed, dirLike
diff --git a/python/lsst/daf/butler/core/fileTemplates.py b/python/lsst/daf/butler/core/fileTemplates.py
@@ -531,9 +531,12 @@ def format(self, ref: DatasetRef) -> str:
             output = output + literal + format(value, format_spec)
 
         # Replace periods with underscores in the non-directory part to
-        # prevent file extension confusion.
+        # prevent file extension confusion. Also replace # in the non-dir
+        # part to avoid confusion with URI fragments
         head, tail = os.path.split(output)
-        output = os.path.join(head, tail.replace(".", "_"))
+        tail = tail.replace(".", "_")
+        tail = tail.replace("#", "HASH")
+        output = os.path.join(head, tail)
 
         # Complain if we were meant to use a component
         if component is not None and not usedComponent:

diff --git a/tests/test_location.py b/tests/test_location.py
@@ -282,7 +282,7 @@ def testQuotedRoot(self):
 
         for pathInStore in ("relative/path/file.ext.gz",
                             "relative/path+2/file.ext.gz",
-                            "relative/path+3/file#.ext.gz"):
+                            "relative/path+3/file&.ext.gz"):
             loc1 = factory.fromPath(pathInStore)
 
             self.assertEqual(loc1.pathInStore.path, pathInStore)

diff --git a/tests/test_uri.py b/tests/test_uri.py
@@ -256,6 +256,13 @@ def testEscapes(self):
             uri = ButlerURI(urllib.parse.quote(plus_path), forceDirectory=True)
         self.assertEqual(uri.ospath, plus_path)
 
+        # Check that # is not escaped for schemeless URIs
+        hash_path = "/a/b#/c&d#xyz"
+        hpos = hash_path.rfind("#")
+        uri = ButlerURI(hash_path)
+        self.assertEqual(uri.ospath, hash_path[:hpos])
+        self.assertEqual(uri.fragment, hash_path[hpos + 1:])
+
 
 @unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
 @mock_s3