-
Notifications
You must be signed in to change notification settings - Fork 1
/
_resourcePath.py
1523 lines (1296 loc) · 56.6 KB
/
_resourcePath.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# This file is part of lsst-resources.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (https://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# Use of this source code is governed by a 3-clause BSD-style
# license that can be found in the LICENSE file.
from __future__ import annotations
import concurrent.futures
import contextlib
import copy
import io
import locale
import logging
import os
import posixpath
import re
import shutil
import tempfile
import urllib.parse
from pathlib import Path, PurePath, PurePosixPath
from random import Random
__all__ = ("ResourcePath", "ResourcePathExpression")
from collections.abc import Iterable, Iterator
from typing import TYPE_CHECKING, Any, Literal, overload
from ._resourceHandles._baseResourceHandle import ResourceHandleProtocol
from .utils import ensure_directory_is_writeable
if TYPE_CHECKING:
from .utils import TransactionProtocol
log = logging.getLogger(__name__)
# Regex for looking for URI escapes
ESCAPES_RE = re.compile(r"%[A-F0-9]{2}")
# Precomputed escaped hash
ESCAPED_HASH = urllib.parse.quote("#")
# Maximum number of worker threads for parallelized operations.
# If greater than 10, be aware that this number has to be consistent
# with connection pool sizing (for example in urllib3).
MAX_WORKERS = 10
class ResourcePath: # numpydoc ignore=PR02
"""Convenience wrapper around URI parsers.
Provides access to URI components and can convert file
paths into absolute path URIs. Scheme-less URIs are treated as if
they are local file system paths and are converted to absolute URIs.
A specialist subclass is created for each supported URI scheme.
Parameters
----------
uri : `str`, `pathlib.Path`, `urllib.parse.ParseResult`, or `ResourcePath`
URI in string form. Can be scheme-less if referring to a relative
path or an absolute path on the local file system.
root : `str` or `ResourcePath`, optional
When fixing up a relative path in a ``file`` scheme or if scheme-less,
use this as the root. Must be absolute. If `None` the current
working directory will be used. Can be any supported URI scheme.
Not used if ``forceAbsolute`` is `False`.
forceAbsolute : `bool`, optional
If `True`, scheme-less relative URI will be converted to an absolute
path using a ``file`` scheme. If `False` scheme-less URI will remain
scheme-less and will not be updated to ``file`` or absolute path unless
it is already an absolute path, in which case it will be updated to
a ``file`` scheme.
forceDirectory : `bool` or `None`, optional
If `True` forces the URI to end with a separator. If `False` the URI
is interpreted as a file-like entity. Default, `None`, is that the
given URI is interpreted as a directory if there is a trailing ``/`` or
for some schemes the system will check to see if it is a file or a
directory.
isTemporary : `bool`, optional
If `True` indicates that this URI points to a temporary resource.
The default is `False`, unless ``uri`` is already a `ResourcePath`
instance and ``uri.isTemporary is True``.
Notes
-----
A non-standard URI of the form ``file:dir/file.txt`` is always converted
to an absolute ``file`` URI.
"""
_pathLib: type[PurePath] = PurePosixPath
"""Path library to use for this scheme."""
_pathModule = posixpath
"""Path module to use for this scheme."""
transferModes: tuple[str, ...] = ("copy", "auto", "move")
"""Transfer modes supported by this implementation.
Move is special in that it is generally a copy followed by an unlink.
Whether that unlink works depends critically on whether the source URI
implements unlink. If it does not the move will be reported as a failure.
"""
transferDefault: str = "copy"
"""Default mode to use for transferring if ``auto`` is specified."""
quotePaths = True
"""True if path-like elements modifying a URI should be quoted.
All non-schemeless URIs have to internally use quoted paths. Therefore
if a new file name is given (e.g. to updatedFile or join) a decision must
be made whether to quote it to be consistent.
"""
isLocal = False
"""If `True` this URI refers to a local file."""
# This is not an ABC with abstract methods because the __new__ being
# a factory confuses mypy such that it assumes that every constructor
# returns a ResourcePath and then determines that all the abstract methods
# are still abstract. If they are not marked abstract but just raise
# mypy is fine with it.
# mypy is confused without these
_uri: urllib.parse.ParseResult
isTemporary: bool
dirLike: bool | None
"""Whether the resource looks like a directory resource. `None` means that
the status is uncertain."""
def __new__(
cls,
uri: ResourcePathExpression,
root: str | ResourcePath | None = None,
forceAbsolute: bool = True,
forceDirectory: bool | None = None,
isTemporary: bool | None = None,
) -> ResourcePath:
"""Create and return new specialist ResourcePath subclass."""
parsed: urllib.parse.ParseResult
dirLike: bool | None = forceDirectory
subclass: type[ResourcePath] | None = None
# Force root to be a ResourcePath -- this simplifies downstream
# code.
if root is None:
root_uri = None
elif isinstance(root, str):
root_uri = ResourcePath(root, forceDirectory=True, forceAbsolute=True)
else:
root_uri = root
if isinstance(uri, os.PathLike):
uri = str(uri)
# Record if we need to post process the URI components
# or if the instance is already fully configured
if isinstance(uri, str):
# Since local file names can have special characters in them
# we need to quote them for the parser but we can unquote
# later. Assume that all other URI schemes are quoted.
# Since sometimes people write file:/a/b and not file:///a/b
# we should not quote in the explicit case of file:
if "://" not in uri and not uri.startswith("file:"):
if ESCAPES_RE.search(uri):
log.warning("Possible double encoding of %s", uri)
else:
# Fragments are generally not encoded so we must search
# for the fragment boundary ourselves. This is making
# an assumption that the filename does not include a "#"
# and also that there is no "/" in the fragment itself.
to_encode = uri
fragment = ""
if "#" in uri:
dirpos = uri.rfind("/")
trailing = uri[dirpos + 1 :]
hashpos = trailing.rfind("#")
if hashpos != -1:
fragment = trailing[hashpos:]
to_encode = uri[: dirpos + hashpos + 1]
uri = urllib.parse.quote(to_encode) + fragment
parsed = urllib.parse.urlparse(uri)
elif isinstance(uri, urllib.parse.ParseResult):
parsed = copy.copy(uri)
# If we are being instantiated with a subclass, rather than
# ResourcePath, ensure that that subclass is used directly.
# This could lead to inconsistencies if this constructor
# is used externally outside of the ResourcePath.replace() method.
# S3ResourcePath(urllib.parse.urlparse("file://a/b.txt"))
# will be a problem.
# This is needed to prevent a schemeless absolute URI become
# a file URI unexpectedly when calling updatedFile or
# updatedExtension
if cls is not ResourcePath:
parsed, dirLike = cls._fixDirectorySep(parsed, forceDirectory)
subclass = cls
elif isinstance(uri, ResourcePath):
# Since ResourcePath is immutable we can return the argument
# unchanged if it already agrees with forceDirectory, isTemporary,
# and forceAbsolute.
# We invoke __new__ again with str(self) to add a scheme for
# forceAbsolute, but for the others that seems more likely to paper
# over logic errors than do something useful, so we just raise.
if forceDirectory is not None and uri.dirLike is not None and forceDirectory is not uri.dirLike:
# Can not force a file-like URI to become a dir-like one or
# vice versa.
raise RuntimeError(
f"{uri} can not be forced to change directory vs file state when previously declared."
)
if isTemporary is not None and isTemporary is not uri.isTemporary:
raise RuntimeError(
f"{uri} is already a {'temporary' if uri.isTemporary else 'permanent'} "
f"ResourcePath; cannot make it {'temporary' if isTemporary else 'permanent'}."
)
if forceAbsolute and not uri.scheme:
# Create new absolute from relative.
return ResourcePath(
str(uri),
root=root,
forceAbsolute=forceAbsolute,
forceDirectory=forceDirectory or uri.dirLike,
isTemporary=uri.isTemporary,
)
elif forceDirectory is not None and uri.dirLike is None:
# Clone but with a new dirLike status.
return uri.replace(forceDirectory=forceDirectory)
return uri
else:
raise ValueError(
f"Supplied URI must be string, Path, ResourcePath, or ParseResult but got '{uri!r}'"
)
if subclass is None:
# Work out the subclass from the URI scheme
if not parsed.scheme:
# Root may be specified as a ResourcePath that overrides
# the schemeless determination.
if (
root_uri is not None
and root_uri.scheme != "file" # file scheme has different code path
and not parsed.path.startswith("/") # Not already absolute path
):
if root_uri.dirLike is False:
raise ValueError(
f"Root URI ({root}) was not a directory so can not be joined with"
f" path {parsed.path!r}"
)
# If root is temporary or this schemeless is temporary we
# assume this URI is temporary.
isTemporary = isTemporary or root_uri.isTemporary
joined = root_uri.join(
parsed.path, forceDirectory=forceDirectory, isTemporary=isTemporary
)
# Rather than returning this new ResourcePath directly we
# instead extract the path and the scheme and adjust the
# URI we were given -- we need to do this to preserve
# fragments since join() will drop them.
parsed = parsed._replace(scheme=joined.scheme, path=joined.path, netloc=joined.netloc)
subclass = type(joined)
# Clear the root parameter to indicate that it has
# been applied already.
root_uri = None
else:
from .schemeless import SchemelessResourcePath
subclass = SchemelessResourcePath
elif parsed.scheme == "file":
from .file import FileResourcePath
subclass = FileResourcePath
elif parsed.scheme == "s3":
from .s3 import S3ResourcePath
subclass = S3ResourcePath
elif parsed.scheme.startswith("http"):
from .http import HttpResourcePath
subclass = HttpResourcePath
elif parsed.scheme == "gs":
from .gs import GSResourcePath
subclass = GSResourcePath
elif parsed.scheme == "resource":
# Rules for scheme names disallow pkg_resource
from .packageresource import PackageResourcePath
subclass = PackageResourcePath
elif parsed.scheme == "mem":
# in-memory datastore object
from .mem import InMemoryResourcePath
subclass = InMemoryResourcePath
else:
raise NotImplementedError(
f"No URI support for scheme: '{parsed.scheme}' in {parsed.geturl()}"
)
parsed, dirLike = subclass._fixupPathUri(
parsed, root=root_uri, forceAbsolute=forceAbsolute, forceDirectory=forceDirectory
)
# It is possible for the class to change from schemeless
# to file so handle that
if parsed.scheme == "file":
from .file import FileResourcePath
subclass = FileResourcePath
# Now create an instance of the correct subclass and set the
# attributes directly
self = object.__new__(subclass)
self._uri = parsed
self.dirLike = dirLike
if isTemporary is None:
isTemporary = False
self.isTemporary = isTemporary
return self
@property
def scheme(self) -> str:
"""Return the URI scheme.
Notes
-----
(``://`` is not part of the scheme).
"""
return self._uri.scheme
@property
def netloc(self) -> str:
"""Return the URI network location."""
return self._uri.netloc
@property
def path(self) -> str:
"""Return the path component of the URI."""
return self._uri.path
@property
def unquoted_path(self) -> str:
"""Return path component of the URI with any URI quoting reversed."""
return urllib.parse.unquote(self._uri.path)
@property
def ospath(self) -> str:
"""Return the path component of the URI localized to current OS."""
raise AttributeError(f"Non-file URI ({self}) has no local OS path.")
@property
def relativeToPathRoot(self) -> str:
"""Return path relative to network location.
This is the path property with posix separator stripped
from the left hand side of the path.
Always unquotes.
"""
relToRoot = self.path.lstrip("/")
if relToRoot == "":
return "./"
return urllib.parse.unquote(relToRoot)
@property
def is_root(self) -> bool:
"""Return whether this URI points to the root of the network location.
This means that the path components refers to the top level.
"""
relpath = self.relativeToPathRoot
if relpath == "./":
return True
return False
@property
def fragment(self) -> str:
"""Return the fragment component of the URI."""
return self._uri.fragment
@property
def params(self) -> str:
"""Return any parameters included in the URI."""
return self._uri.params
@property
def query(self) -> str:
"""Return any query strings included in the URI."""
return self._uri.query
def geturl(self) -> str:
"""Return the URI in string form.
Returns
-------
url : `str`
String form of URI.
"""
return self._uri.geturl()
def root_uri(self) -> ResourcePath:
"""Return the base root URI.
Returns
-------
uri : `ResourcePath`
Root URI.
"""
return self.replace(path="", query="", fragment="", params="", forceDirectory=True)
def split(self) -> tuple[ResourcePath, str]:
"""Split URI into head and tail.
Returns
-------
head: `ResourcePath`
Everything leading up to tail, expanded and normalized as per
ResourcePath rules.
tail : `str`
Last path component. Tail will be empty if path ends on a
separator or if the URI is known to be associated with a directory.
Tail will never contain separators. It will be unquoted.
Notes
-----
Equivalent to `os.path.split` where head preserves the URI
components. In some cases this method can result in a file system
check to verify whether the URI is a directory or not (only if
``forceDirectory`` was `None` during construction). For a scheme-less
URI this can mean that the result might change depending on current
working directory.
"""
if self.isdir():
# This is known to be a directory so must return itself and
# the empty string.
return self, ""
head, tail = self._pathModule.split(self.path)
headuri = self._uri._replace(path=head)
# The file part should never include quoted metacharacters
tail = urllib.parse.unquote(tail)
# Schemeless is special in that it can be a relative path.
# We need to ensure that it stays that way. All other URIs will
# be absolute already.
forceAbsolute = self.isabs()
return ResourcePath(headuri, forceDirectory=True, forceAbsolute=forceAbsolute), tail
def basename(self) -> str:
"""Return the base name, last element of path, of the URI.
Returns
-------
tail : `str`
Last part of the path attribute. Trail will be empty if path ends
on a separator.
Notes
-----
If URI ends on a slash returns an empty string. This is the second
element returned by `split()`.
Equivalent of `os.path.basename`.
"""
return self.split()[1]
def dirname(self) -> ResourcePath:
"""Return the directory component of the path as a new `ResourcePath`.
Returns
-------
head : `ResourcePath`
Everything except the tail of path attribute, expanded and
normalized as per ResourcePath rules.
Notes
-----
Equivalent of `os.path.dirname`. If this is a directory URI it will
be returned unchanged. If the parent directory is always required
use `parent`.
"""
return self.split()[0]
def parent(self) -> ResourcePath:
"""Return a `ResourcePath` of the parent directory.
Returns
-------
head : `ResourcePath`
Everything except the tail of path attribute, expanded and
normalized as per `ResourcePath` rules.
Notes
-----
For a file-like URI this will be the same as calling `dirname`.
For a directory-like URI this will always return the parent directory
whereas `dirname()` will return the original URI. This is consistent
with `os.path.dirname` compared to the `pathlib.Path` property
``parent``.
"""
if self.dirLike is False:
# os.path.split() is slightly faster than calling Path().parent.
return self.dirname()
# When self is dir-like, returns its parent directory,
# regardless of the presence of a trailing separator
originalPath = self._pathLib(self.path)
parentPath = originalPath.parent
return self.replace(path=str(parentPath), forceDirectory=True)
def replace(
self, forceDirectory: bool | None = None, isTemporary: bool = False, **kwargs: Any
) -> ResourcePath:
"""Return new `ResourcePath` with specified components replaced.
Parameters
----------
forceDirectory : `bool` or `None`, optional
Parameter passed to ResourcePath constructor to force this
new URI to be dir-like or file-like.
isTemporary : `bool`, optional
Indicate that the resulting URI is temporary resource.
**kwargs
Components of a `urllib.parse.ParseResult` that should be
modified for the newly-created `ResourcePath`.
Returns
-------
new : `ResourcePath`
New `ResourcePath` object with updated values.
Notes
-----
Does not, for now, allow a change in URI scheme.
"""
# Disallow a change in scheme
if "scheme" in kwargs:
raise ValueError(f"Can not use replace() method to change URI scheme for {self}")
return self.__class__(
self._uri._replace(**kwargs), forceDirectory=forceDirectory, isTemporary=isTemporary
)
def updatedFile(self, newfile: str) -> ResourcePath:
"""Return new URI with an updated final component of the path.
Parameters
----------
newfile : `str`
File name with no path component.
Returns
-------
updated : `ResourcePath`
Updated `ResourcePath` with new updated final component.
Notes
-----
Forces the ``ResourcePath.dirLike`` attribute to be false. The new file
path will be quoted if necessary. If the current URI is known to
refer to a directory, the new file will be joined to the current file.
It is recommended that this behavior no longer be used and a call
to `isdir` by the caller should be used to decide whether to join or
replace. In the future this method may be modified to always replace
the final element of the path.
"""
if self.dirLike:
return self.join(newfile, forceDirectory=False)
return self.parent().join(newfile, forceDirectory=False)
def updatedExtension(self, ext: str | None) -> ResourcePath:
"""Return a new `ResourcePath` with updated file extension.
All file extensions are replaced.
Parameters
----------
ext : `str` or `None`
New extension. If an empty string is given any extension will
be removed. If `None` is given there will be no change.
Returns
-------
updated : `ResourcePath`
URI with the specified extension. Can return itself if
no extension was specified.
"""
if ext is None:
return self
# Get the extension
current = self.getExtension()
# Nothing to do if the extension already matches
if current == ext:
return self
# Remove the current extension from the path
# .fits.gz counts as one extension do not use os.path.splitext
path = self.path
if current:
path = path.removesuffix(current)
# Ensure that we have a leading "." on file extension (and we do not
# try to modify the empty string)
if ext and not ext.startswith("."):
ext = "." + ext
return self.replace(path=path + ext, forceDirectory=False)
def getExtension(self) -> str:
"""Return the extension(s) associated with this URI path.
Returns
-------
ext : `str`
The file extension (including the ``.``). Can be empty string
if there is no file extension. Usually returns only the last
file extension unless there is a special extension modifier
indicating file compression, in which case the combined
extension (e.g. ``.fits.gz``) will be returned.
Notes
-----
Does not distinguish between file and directory URIs when determining
a suffix. An extension is only determined from the final component
of the path.
"""
special = {".gz", ".bz2", ".xz", ".fz"}
# path lib will ignore any "." in directories.
# path lib works well:
# extensions = self._pathLib(self.path).suffixes
# But the constructor is slow. Therefore write our own implementation.
# Strip trailing separator if present, do not care if this is a
# directory or not.
parts = self.path.rstrip("/").rsplit(self._pathModule.sep, 1)
_, *extensions = parts[-1].split(".")
if not extensions:
return ""
extensions = ["." + x for x in extensions]
ext = extensions.pop()
# Multiple extensions, decide whether to include the final two
if extensions and ext in special:
ext = f"{extensions[-1]}{ext}"
return ext
def join(
self, path: str | ResourcePath, isTemporary: bool | None = None, forceDirectory: bool | None = None
) -> ResourcePath:
"""Return new `ResourcePath` with additional path components.
Parameters
----------
path : `str`, `ResourcePath`
Additional file components to append to the current URI. Will be
quoted depending on the associated URI scheme. If the path looks
like a URI referring to an absolute location, it will be returned
directly (matching the behavior of `os.path.join`). It can
also be a `ResourcePath`.
isTemporary : `bool`, optional
Indicate that the resulting URI represents a temporary resource.
Default is ``self.isTemporary``.
forceDirectory : `bool` or `None`, optional
If `True` forces the URI to end with a separator. If `False` the
resultant URI is declared to refer to a file. `None` indicates
that the file directory status is unknown.
Returns
-------
new : `ResourcePath`
New URI with the path appended.
Notes
-----
Schemeless URIs assume local path separator but all other URIs assume
POSIX separator if the supplied path has directory structure. It
may be this never becomes a problem but datastore templates assume
POSIX separator is being used.
If an absolute `ResourcePath` is given for ``path`` is is assumed that
this should be returned directly. Giving a ``path`` of an absolute
scheme-less URI is not allowed for safety reasons as it may indicate
a mistake in the calling code.
It is an error to attempt to join to something that is known to
refer to a file. Use `updatedFile` if the file is to be
replaced.
Raises
------
ValueError
Raised if the given path object refers to a directory but the
``forceDirectory`` parameter insists the outcome should be a file,
and vice versa. Also raised if the URI being joined with is known
to refer to a file.
RuntimeError
Raised if this attempts to join a temporary URI to a non-temporary
URI.
"""
if self.dirLike is False:
raise ValueError("Can not join a new path component to a file.")
if isTemporary is None:
isTemporary = self.isTemporary
elif not isTemporary and self.isTemporary:
raise RuntimeError("Cannot join temporary URI to non-temporary URI.")
# If we have a full URI in path we will use it directly
# but without forcing to absolute so that we can trap the
# expected option of relative path.
path_uri = ResourcePath(
path, forceAbsolute=False, forceDirectory=forceDirectory, isTemporary=isTemporary
)
if forceDirectory is not None and path_uri.dirLike is not forceDirectory:
raise ValueError(
"The supplied path URI to join has inconsistent directory state "
f"with forceDirectory parameter: {path_uri.dirLike} vs {forceDirectory}"
)
forceDirectory = path_uri.dirLike
if path_uri.isabs():
# Absolute URI so return it directly.
return path_uri
# If this was originally a ResourcePath extract the unquoted path from
# it. Otherwise we use the string we were given to allow "#" to appear
# in the filename if given as a plain string.
if not isinstance(path, str):
path = path_uri.unquoted_path
# Might need to quote the path.
if self.quotePaths:
path = urllib.parse.quote(path)
newpath = self._pathModule.normpath(self._pathModule.join(self.path, path))
# normpath can strip trailing / so we force directory if the supplied
# path ended with a /
has_dir_sep = path.endswith(self._pathModule.sep)
if forceDirectory is None and has_dir_sep:
forceDirectory = True
elif forceDirectory is False and has_dir_sep:
raise ValueError("Path to join has trailing / but is being forced to be a file.")
return self.replace(
path=newpath,
forceDirectory=forceDirectory,
isTemporary=isTemporary,
)
def relative_to(self, other: ResourcePath) -> str | None:
"""Return the relative path from this URI to the other URI.
Parameters
----------
other : `ResourcePath`
URI to use to calculate the relative path. Must be a parent
of this URI.
Returns
-------
subpath : `str`
The sub path of this URI relative to the supplied other URI.
Returns `None` if there is no parent child relationship.
Scheme and netloc must match.
"""
# Scheme-less self is handled elsewhere.
if self.scheme != other.scheme:
return None
if self.netloc != other.netloc:
# Special case for localhost vs empty string.
# There can be many variants of localhost.
local_netlocs = {"", "localhost", "localhost.localdomain", "127.0.0.1"}
if not {self.netloc, other.netloc}.issubset(local_netlocs):
return None
enclosed_path = self._pathLib(self.relativeToPathRoot)
parent_path = other.relativeToPathRoot
subpath: str | None
try:
subpath = str(enclosed_path.relative_to(parent_path))
except ValueError:
subpath = None
else:
subpath = urllib.parse.unquote(subpath)
return subpath
def exists(self) -> bool:
"""Indicate that the resource is available.
Returns
-------
exists : `bool`
`True` if the resource exists.
"""
raise NotImplementedError()
@classmethod
def mexists(cls, uris: Iterable[ResourcePath]) -> dict[ResourcePath, bool]:
"""Check for existence of multiple URIs at once.
Parameters
----------
uris : iterable of `ResourcePath`
The URIs to test.
Returns
-------
existence : `dict` of [`ResourcePath`, `bool`]
Mapping of original URI to boolean indicating existence.
"""
# Group by scheme to allow a subclass to be able to use
# specialized implementations.
grouped: dict[type, list[ResourcePath]] = {}
for uri in uris:
uri_class = uri.__class__
if uri_class not in grouped:
grouped[uri_class] = []
grouped[uri_class].append(uri)
existence: dict[ResourcePath, bool] = {}
for uri_class in grouped:
existence.update(uri_class._mexists(grouped[uri_class]))
return existence
@classmethod
def _mexists(cls, uris: Iterable[ResourcePath]) -> dict[ResourcePath, bool]:
"""Check for existence of multiple URIs at once.
Implementation helper method for `mexists`.
Parameters
----------
uris : iterable of `ResourcePath`
The URIs to test.
Returns
-------
existence : `dict` of [`ResourcePath`, `bool`]
Mapping of original URI to boolean indicating existence.
"""
exists_executor = concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)
future_exists = {exists_executor.submit(uri.exists): uri for uri in uris}
results: dict[ResourcePath, bool] = {}
for future in concurrent.futures.as_completed(future_exists):
uri = future_exists[future]
try:
exists = future.result()
except Exception:
exists = False
results[uri] = exists
return results
def remove(self) -> None:
"""Remove the resource."""
raise NotImplementedError()
def isabs(self) -> bool:
"""Indicate that the resource is fully specified.
For non-schemeless URIs this is always true.
Returns
-------
isabs : `bool`
`True` in all cases except schemeless URI.
"""
return True
def abspath(self) -> ResourcePath:
"""Return URI using an absolute path.
Returns
-------
abs : `ResourcePath`
Absolute URI. For non-schemeless URIs this always returns itself.
Schemeless URIs are upgraded to file URIs.
"""
return self
def _as_local(self) -> tuple[str, bool]:
"""Return the location of the (possibly remote) resource as local file.
This is a helper function for `as_local` context manager.
Returns
-------
path : `str`
If this is a remote resource, it will be a copy of the resource
on the local file system, probably in a temporary directory.
For a local resource this should be the actual path to the
resource.
is_temporary : `bool`
Indicates if the local path is a temporary file or not.
"""
raise NotImplementedError()
@contextlib.contextmanager
def as_local(self) -> Iterator[ResourcePath]:
"""Return the location of the (possibly remote) resource as local file.
Yields
------
local : `ResourcePath`
If this is a remote resource, it will be a copy of the resource
on the local file system, probably in a temporary directory.
For a local resource this should be the actual path to the
resource.
Notes
-----
The context manager will automatically delete any local temporary
file.
Examples
--------
Should be used as a context manager:
.. code-block:: py
with uri.as_local() as local:
ospath = local.ospath
"""
if self.isdir():
raise IsADirectoryError(f"Directory-like URI {self} cannot be fetched as local.")
local_src, is_temporary = self._as_local()
local_uri = ResourcePath(local_src, isTemporary=is_temporary)
try:
yield local_uri
finally:
# The caller might have relocated the temporary file.
# Do not ever delete if the temporary matches self
# (since it may have been that a temporary file was made local
# but already was local).
if self != local_uri and is_temporary and local_uri.exists():
local_uri.remove()
@classmethod
@contextlib.contextmanager
def temporary_uri(
cls,
prefix: ResourcePath | None = None,
suffix: str | None = None,
delete: bool = True,
) -> Iterator[ResourcePath]:
"""Create a temporary file-like URI.
Parameters
----------
prefix : `ResourcePath`, optional
Prefix to use. Without this the path will be formed as a local
file URI in a temporary directory. Ensuring that the prefix
location exists is the responsibility of the caller.
suffix : `str`, optional
A file suffix to be used. The ``.`` should be included in this
suffix.
delete : `bool`, optional
By default the resource will be deleted when the context manager
is exited. Setting this flag to `False` will leave the resource
alone. `False` will also retain any directories that may have
been created.
Yields
------
uri : `ResourcePath`
The temporary URI. Will be removed when the context is completed.
"""
use_tempdir = False
if prefix is None:
directory = tempfile.mkdtemp()
# If the user has set a umask that restricts the owner-write bit,
# the directory returned from mkdtemp may not initially be
# writeable by us
ensure_directory_is_writeable(directory)
prefix = ResourcePath(directory, forceDirectory=True, isTemporary=True)
# Record that we need to delete this directory. Can not rely
# on isTemporary flag since an external prefix may have that
# set as well.
use_tempdir = True
# Need to create a randomized file name. For consistency do not
# use mkstemp for local and something else for remote. Additionally
# this method does not create the file to prevent name clashes.
characters = "abcdefghijklmnopqrstuvwxyz0123456789_"