-
Notifications
You must be signed in to change notification settings - Fork 81
/
file_client.py
535 lines (455 loc) · 16.4 KB
/
file_client.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
"""Tools for remote file IO using paramiko."""
from __future__ import annotations
import shutil
import stat
import warnings
from functools import wraps
from glob import glob
from gzip import GzipFile
from pathlib import Path
from typing import Any, Callable
import paramiko
from monty.io import zopen
from paramiko import SFTPClient, SSHClient
__all__ = ["FileClient", "auto_fileclient"]
class FileClient:
"""
Tool for performing operations on files.
The client is agnostic of whether file operations are happening locally or via SSH.
All operations have a ``host`` parameter that specifies which remote host the
operation should be performed on.
.. Note::
To use abbreviated host names without user information, the FileClient requires
the appropriate configuration to be defined in the ssh config file.
Parameters
----------
key_filename : str or Path
Path to private key file (for remote connections only).
config_filename : str or Path
Path to OpenSSH config file defining host connection settings.
"""
def __init__(
self,
key_filename: str | Path = "~/.ssh/id_rsa",
config_filename: str | Path = "~/.ssh/config",
):
self.key_filename = key_filename
self.config_filename = config_filename
self.connections: dict[str, dict[str, Any]] = {}
def connect(self, host: str):
"""
Connect to a remote host.
Parameters
----------
host : str
A remote host filesystem. Supports using hosts defined in the ssh
config file. The host can be specified as either "username@remote_host" or
just "remote_host" in which case the username will be inferred from the
current user.
"""
if host in self.connections:
return
if "@" in host:
username, hostname = host.split("@", 1)
else:
username = None # paramiko sets default username
hostname = host
ssh = get_ssh_connection(
username, hostname, self.key_filename, self.config_filename
)
self.connections[host] = {"ssh": ssh, "sftp": ssh.open_sftp()}
def get_ssh(self, host: str) -> SSHClient:
"""
Get an SSH connection to a host.
Parameters
----------
host : str
A remote host filesystem. Supports using hosts defined in the ssh
config file. The host can be specified as either "username@remote_host" or
just "remote_host" in which case the username will be inferred from the
current user.
Returns
-------
.SSHClient
An ssh client to the host.
"""
if host not in self.connections:
self.connect(host)
return self.connections[host]["ssh"]
def get_sftp(self, host: str) -> SFTPClient:
"""
Get an SFTP connection to a host.
Parameters
----------
host : str
A remote host filesystem. Supports using hosts defined in the ssh
config file. The host can be specified as either "username@remote_host" or
just "remote_host" in which case the username will be inferred from the
current user.
Returns
-------
.SFTPClient
An sftp client to the host.
"""
if host not in self.connections:
self.connect(host)
return self.connections[host]["sftp"]
def exists(self, path: str | Path, host: str | None = None) -> bool:
"""
Check whether a file exists.
Parameters
----------
path : str or Path
A path to check existence of.
host : str or None
A remote file system host on which to perform file operations.
Returns
-------
bool
Whether the file exists.
"""
if host is None:
return Path(path).exists()
path = str(self.abspath(path, host=host))
try:
self.get_sftp(host).stat(path)
return True
except FileNotFoundError:
return False
def is_file(self, path: str | Path, host: str | None = None) -> bool:
"""
Whether a path is a file.
Parameters
----------
path : str or Path
A path.
host : str or None
A remote file system host on which to perform file operations.
Returns
-------
bool
Whether the path is a file.
"""
if host is None:
return Path(path).is_file()
path = str(self.abspath(path, host=host))
try:
return stat.S_ISREG(self.get_sftp(host).lstat(path).st_mode)
except FileNotFoundError:
return False
def is_dir(self, path: str | Path, host: str | None = None) -> bool:
"""
Whether a path is a directory.
Parameters
----------
path : str or Path
A path.
host : str or None
A remote file system host on which to perform file operations.
Returns
-------
bool
Whether the path is a directory.
"""
if host is None:
return Path(path).is_dir()
path = str(self.abspath(path, host=host))
try:
return stat.S_ISDIR(self.get_sftp(host).lstat(path).st_mode)
except FileNotFoundError:
return False
def listdir(self, path: str | Path, host: str | None = None) -> list[Path]:
"""
Get the directory listing.
Parameters
----------
path : str or Path
Full path to the directory.
host : str or None
A remote file system host on which to perform file operations.
Returns
-------
list of Path
List of filenames and directories.
"""
if host is None:
path = self.abspath(path, host=host)
return [p.relative_to(path) for p in Path(path).iterdir()]
path = str(self.abspath(path, host=host))
return [Path(p) for p in self.get_sftp(host).listdir(path)]
def copy(
self,
src_filename: str | Path,
dest_filename: str | Path,
src_host: str | None = None,
dest_host: str | None = None,
):
"""
Copy a file from source to destination.
Parameters
----------
src_filename : str or Path
Full path to source file.
dest_filename : str or Path
Full path to destination file.
src_host : str or None
A remote file system host for the source file.
dest_host : str or None
A remote file system host for the destination file.
"""
src_filename = self.abspath(src_filename, host=src_host)
dest_filename = self.abspath(dest_filename, host=dest_host)
if src_host is None and dest_host is None:
# copying on local machine
shutil.copy2(src_filename, dest_filename)
elif src_host is not None and dest_host is None:
# copying from remote to local
self.get_sftp(src_host).get(str(src_filename), str(dest_filename))
elif src_host is None and dest_host is not None:
# copying from local to remote
self.get_sftp(dest_host).put(str(src_filename), str(dest_filename))
elif src_host == dest_host:
# copying between the same remote machine.
ssh = self.get_ssh(src_host)
_, _, stderr = ssh.exec_command(f"cp {src_filename} {dest_filename}")
if len(stderr.readlines()) > 0:
warnings.warn(f"Copy command gave error: {stderr}", stacklevel=2)
else:
# copying between two remote hosts; this is a pain and it is unlikely anyone
# will want to do it.
raise ValueError(
"Copying between two different remote hosts is not supported."
)
def remove(self, path: str | Path, host: str | None = None):
"""
Remove a file (does not work on directories).
Parameters
----------
path : str or Path
Path to a file.
host : str or None
A remote file system host on which to perform file operations.
"""
if host is None:
Path(path).unlink()
else:
path = str(self.abspath(host, host=host))
self.get_sftp(host).unlink(path)
def rename(
self,
old_path: str | Path,
new_path: str | Path,
host: str | None = None,
):
"""
Rename (move) a file.
Parameters
----------
old_path : str or Path
Path to an existing file.
new_path : str or Path
Requested path to new file.
host : str or None
A remote file system host on which to perform file operations.
"""
if host is None:
Path(old_path).rename(new_path)
else:
old_path = str(self.abspath(old_path, host=host))
new_path = str(self.abspath(new_path, host=host))
self.get_sftp(host).rename(old_path, new_path)
def abspath(self, path: str | Path, host: str | None = None) -> Path:
"""
Get the absolute path.
Parameters
----------
path : str or Path
A path to a file or directory.
host : str or None
A remote file system host on which to perform file operations.
Returns
-------
Path
The absolute file path.
"""
if host is None:
return Path(path).absolute()
ssh = self.get_ssh(host)
_, stdout, _ = ssh.exec_command(f"readlink -f {path}")
return Path([o.split("\n")[0] for o in stdout][0])
def glob(self, path: str | Path, host: str | None = None) -> list[Path]:
"""
Glob files and folders.
Parameters
----------
path : str or Path
A path to glob.
host : str or None
A remote file system host on which to perform file operations.
Returns
-------
list[Path]
A list of globs files and directories.
"""
if host is None:
files = glob(str(path))
else:
ssh = self.get_ssh(host)
_, stdout, _ = ssh.exec_command(f"readlink -f {path}")
files = [o.split("\n")[0] for o in stdout]
return [Path(f) for f in files]
def gzip(
self,
path: str | Path,
host: str | None = None,
compresslevel: int = 6,
force: bool = False,
):
"""
Gzip a file.
Parameters
----------
path : str or Path
Path to a file to gzip.
host : str or None
A remote file system host on which to perform file operations.
compresslevel : bool
Level of compression, 1-9. 9 is default for GzipFile, 6 is default for gzip.
force : bool
Overwrite gzipped file if it already exists.
"""
path = self.abspath(path, host=host)
path_gz = path.parent / f"{path.name}.gz"
if str(path).lower().endswith("gz"):
warnings.warn(f"{path} is already gzipped, skipping...", stacklevel=1)
return
if self.is_dir(path, host=host):
warnings.warn(f"{path} is a directory, skipping...", stacklevel=1)
return
if self.exists(path_gz, host=host) and not force:
raise FileExistsError(f"{path_gz} file already exists.")
if host is None:
with open(path, "rb") as f_in, GzipFile(
path_gz, "wb", compresslevel=compresslevel
) as f_out:
shutil.copyfileobj(f_in, f_out)
shutil.copystat(path, path_gz)
path.unlink()
else:
ssh = self.get_ssh(host)
_, stdout, _ = ssh.exec_command(f"gzip -f {path!s}")
def gunzip(
self,
path: str | Path,
host: str | None = None,
force: bool = False,
):
"""
Ungzip a file.
Parameters
----------
path : str or Path
Path to a file to gzip.
host : str or None
A remote file system host on which to perform file operations.
force : bool
Overwrite non-gzipped file if it already exists.
"""
path = self.abspath(path, host=host)
path_nongz = path.with_suffix("")
if not str(path).lower().endswith("gz"):
warnings.warn(f"{path} is not gzipped, skipping...", stacklevel=2)
return
if self.exists(path_nongz, host=host) and not force:
raise FileExistsError(f"{path_nongz} file already exists")
if host is None:
with open(path_nongz, "wb") as f_out, zopen(path, "rb") as f_in:
f_out.writelines(f_in)
path.unlink()
else:
ssh = self.get_ssh(host)
_, stdout, _ = ssh.exec_command(f"gunzip -f {path!s}")
def close(self):
"""Close all connections."""
for connection in self.connections.values():
connection["ssh"].close()
connection["sftp"].close()
self.connections = {}
def __enter__(self):
"""Support for "with" context."""
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Support for "with" context."""
self.close()
def get_ssh_connection(
username: str | None,
hostname: str,
key_filename: str | Path,
config_filename: str | Path | None = None,
) -> SSHClient:
"""
Connect to a remote host via paramiko.
If the host key is not present it will be added automatically.
Parameters
----------
username : str or None
The username. If ``None``, the current logged in username will be used.
hostname : str
The host name. Supports host aliases defined in the ssh config file.
key_filename : str or Path
Path to private key file.
config_filename : str or Path or None
Path to OpenSSH config file.
Returns
-------
.SSHClient
An ssh connection to the host.
"""
key_filename = Path(key_filename).expanduser()
if not key_filename.exists():
raise ValueError(f"Cannot locate private key file: {key_filename}")
config: dict[str, Any] = {"hostname": hostname, "username": username}
config_filename = Path(config_filename).expanduser()
if Path(config_filename).exists():
# try reading ssh config file
ssh_config = paramiko.SSHConfig().from_path(str(config_filename))
host_config = ssh_config.lookup(hostname) # type: ignore[attr-defined]
for k in ("hostname", "user", "port"):
if k in host_config:
config[k.replace("user", "username")] = host_config[k]
if "proxycommand" in host_config:
config["sock"] = paramiko.ProxyCommand(host_config["proxycommand"])
client = paramiko.SSHClient()
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
client.connect(**config)
return client
def auto_fileclient(method: Callable | None = None):
"""
Automatically pass a FileClient to the function if not already present in kwargs.
This decorator should only be applied to functions with a ``file_client`` keyword
argument. If a custom file client is not supplied when the function is called, it
will automatically create a new FileClient, add it to the function arguments and
close the file client connects at the end of the function.
Parameters
----------
method : callable or None
A function to wrap. This should not be specified directly and is implied
by the decorator.
"""
def decorator(func):
@wraps(func)
def gen_fileclient(*args, **kwargs):
file_client = kwargs.get("file_client", None)
if file_client is None:
with FileClient() as file_client:
kwargs["file_client"] = file_client
return func(*args, **kwargs)
else:
return func(*args, **kwargs)
return gen_fileclient
# See if we're being called as @auto_fileclient or @auto_fileclient().
if method is None:
# We're called with parens.
return decorator
# We're called as @auto_fileclient without parens.
return decorator(method)