Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
feat: Zenodo remote provider for transparent storage on and retrieval…
… from Zenodo (#1455) * feat: Zenodo remote provider for transparent storage on and retrieval from Zenodo (#125) * added zenodo remote provider * added test * updated formatting * added sandbox, access_token is required, create new deposition when deposition id is not provided * updated test Snakefile * added initial zenodo remote documentation * added zenodo branches for ci * fixed code formatting * added zenodo sandbox pat to workflow * added zenodo remote test * fixed formatting * deleted comment * skip tibanna test for now * deleted branch * added zenodo file exception when hits api limit * moves file size assertion to remoteobject * updated test * added test file * run also too large upload test * updated docs * Delete superfluous file. * removed error handling around requests and re as unnecessary * updated error message * moved download code to remoteobject * use download url * fixed formatting * upload large files * fixed formatting * removed makedir * updated test, download to subdir * moved exception handling to request wrapper * removed zen branch from ci * fixed deposition id handling * fixed stats is not defined * fix download * fixed test download path, updated docs * Only run test if token is available. * fmt * envvars * fix skip criterion * fix error handling * windows secret Co-authored-by: Johannes Köster <johannes.koester@uni-due.de> Co-authored-by: Johannes Köster <johannes.koester@tu-dortmund.de> * add retry handling, fix code issues Co-authored-by: Taavi Päll <tapa741@gmail.com>
- Loading branch information
1 parent
771c0b2
commit 4586ef7
Showing
10 changed files
with
283 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,191 @@ | ||
__author__ = "Taavi Päll" | ||
__copyright__ = "Copyright 2019, Taavi Päll" | ||
__email__ = "tapa741@gmail.com" | ||
__license__ = "MIT" | ||
|
||
import os | ||
import hashlib | ||
from collections import namedtuple | ||
import requests | ||
from requests.exceptions import HTTPError | ||
from snakemake.remote import ( | ||
AbstractRemoteObject, | ||
AbstractRemoteProvider, | ||
AbstractRemoteRetryObject, | ||
) | ||
from snakemake.exceptions import ZenodoFileException, WorkflowError | ||
from snakemake.common import lazy_property | ||
|
||
|
||
ZenFileInfo = namedtuple("ZenFileInfo", ["checksum", "filesize", "id", "download"]) | ||
|
||
|
||
class RemoteProvider(AbstractRemoteProvider): | ||
def __init__(self, *args, stay_on_remote=False, **kwargs): | ||
super(RemoteProvider, self).__init__( | ||
*args, stay_on_remote=stay_on_remote, **kwargs | ||
) | ||
self._zen = ZENHelper(*args, **kwargs) | ||
|
||
def remote_interface(self): | ||
return self._zen | ||
|
||
@property | ||
def default_protocol(self): | ||
return "https://" | ||
|
||
@property | ||
def available_protocols(self): | ||
return ["https://"] | ||
|
||
|
||
class RemoteObject(AbstractRemoteRetryObject): | ||
def __init__( | ||
self, *args, keep_local=False, stay_on_remote=False, provider=None, **kwargs | ||
): | ||
super(RemoteObject, self).__init__( | ||
*args, | ||
keep_local=keep_local, | ||
stay_on_remote=stay_on_remote, | ||
provider=provider, | ||
**kwargs | ||
) | ||
if provider: | ||
self._zen = provider.remote_interface() | ||
else: | ||
self._zen = ZENHelper(*args, **kwargs) | ||
|
||
# === Implementations of abstract class members === | ||
def _stats(self): | ||
return self._zen.get_files()[os.path.basename(self.local_file())] | ||
|
||
def exists(self): | ||
return os.path.basename(self.local_file()) in self._zen.get_files() | ||
|
||
def size(self): | ||
if self.exists(): | ||
return self._stats().filesize | ||
else: | ||
return self._iofile.size_local | ||
|
||
def mtime(self): | ||
# There is no mtime info provided by Zenodo. | ||
# Hence, the files are always considered to be "ancient". | ||
return 0 | ||
|
||
def _download(self): | ||
stats = self._stats() | ||
download_url = stats.download | ||
r = self._zen._api_request(download_url) | ||
|
||
local_md5 = hashlib.md5() | ||
|
||
# Download file. | ||
with open(self.local_file(), "wb") as rf: | ||
for chunk in r.iter_content(chunk_size=1024 * 1024 * 10): | ||
local_md5.update(chunk) | ||
rf.write(chunk) | ||
local_md5 = local_md5.hexdigest() | ||
|
||
if local_md5 != stats.checksum: | ||
raise ZenodoFileException( | ||
"File checksums do not match for remote file id: {}".format(stats.id) | ||
) | ||
|
||
def _upload(self): | ||
with open(self.local_file(), "rb") as lf: | ||
self._zen._api_request( | ||
self._zen.bucket + "/{}".format(os.path.basename(self.remote_file())), | ||
method="PUT", | ||
data=lf, | ||
) | ||
|
||
@property | ||
def list(self): | ||
return [i for i in self._zen.get_files()] | ||
|
||
@property | ||
def name(self): | ||
return self.local_file() | ||
|
||
|
||
class ZENHelper(object): | ||
def __init__(self, *args, **kwargs): | ||
|
||
try: | ||
self._access_token = kwargs.pop("access_token") | ||
except KeyError: | ||
raise WorkflowError( | ||
"Zenodo personal access token must be passed in as 'access_token' argument.\n" | ||
"Separate registration and access token is needed for Zenodo sandbox " | ||
"environment at https://sandbox.zenodo.org." | ||
) | ||
|
||
if "sandbox" in kwargs: | ||
self._sandbox = kwargs.pop("sandbox") | ||
else: | ||
self._sandbox = False | ||
|
||
if self._sandbox: | ||
self._baseurl = "https://sandbox.zenodo.org" | ||
else: | ||
self._baseurl = "https://zenodo.org" | ||
|
||
if "deposition" in kwargs: | ||
self.deposition = kwargs.pop("deposition") | ||
self.bucket = self.get_bucket() | ||
else: | ||
# Creating a new deposition, as deposition id was not supplied. | ||
self.deposition, self.bucket = self.create_deposition().values() | ||
|
||
def _api_request( | ||
self, url, method="GET", data=None, headers={}, files=None, json=False | ||
): | ||
|
||
# Create a session with a hook to raise error on bad request. | ||
session = requests.Session() | ||
session.hooks = {"response": lambda r, *args, **kwargs: r.raise_for_status()} | ||
session.headers["Authorization"] = "Bearer {}".format(self._access_token) | ||
session.headers.update(headers) | ||
|
||
# Run query. | ||
try: | ||
r = session.request(method=method, url=url, data=data, files=files) | ||
if json: | ||
msg = r.json() | ||
return msg | ||
else: | ||
return r | ||
except HTTPError as e: | ||
raise WorkflowError("Failed to connect to zenodo", e) | ||
|
||
def create_deposition(self): | ||
resp = self._api_request( | ||
method="POST", | ||
url=self._baseurl + "/api/deposit/depositions", | ||
headers={"Content-Type": "application/json"}, | ||
data="{}", | ||
json=True, | ||
) | ||
return {"id": resp["id"], "bucket": resp["links"]["bucket"]} | ||
|
||
def get_bucket(self): | ||
resp = self._api_request( | ||
self._baseurl + "/api/deposit/depositions/{}".format(self.deposition), | ||
headers={"Content-Type": "application/json"}, | ||
json=True, | ||
) | ||
return resp["links"]["bucket"] | ||
|
||
def get_files(self): | ||
files = self._api_request( | ||
self._baseurl + "/api/deposit/depositions/{}/files".format(self.deposition), | ||
headers={"Content-Type": "application/json"}, | ||
json=True, | ||
) | ||
return { | ||
os.path.basename(f["filename"]): ZenFileInfo( | ||
f["checksum"], int(f["filesize"]), f["id"], f["links"]["download"] | ||
) | ||
for f in files | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import os | ||
from snakemake.remote.zenodo import RemoteProvider | ||
|
||
access_token_sandbox=os.environ["ZENODO_SANDBOX_PAT"] | ||
zen_sandbox = RemoteProvider(access_token=access_token_sandbox, sandbox=True) | ||
|
||
rule all: | ||
input: "download.txt", zen_sandbox.remote("large_upload.txt") | ||
|
||
rule download: | ||
input: | ||
zen_sandbox.remote("uploaded.txt") | ||
output: | ||
"download.txt" | ||
shell: | ||
"cp {input} {output}" | ||
|
||
rule upload: | ||
input: "test.txt" | ||
output: | ||
zen_sandbox.remote("uploaded.txt") | ||
shell: | ||
"cp {input} {output}" | ||
|
||
try: | ||
rule too_large_upload: | ||
output: zen_sandbox.remote("large_upload.txt") | ||
shell: "head -c 101000000 /dev/urandom > {output}" | ||
except ZenodoFileException: | ||
print("Current Zenodo stable API supports <=100MB per file.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
Freedom of self-doubt | ||
6 p.m. | ||
Rising |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
Freedom of self-doubt | ||
6 p.m. | ||
Rising |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters