-
Notifications
You must be signed in to change notification settings - Fork 359
/
zenodo.py
99 lines (88 loc) · 3.54 KB
/
zenodo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import json
import os
import shutil
from os import makedirs, path
from urllib.error import HTTPError
from urllib.request import Request
from ..utils import copytree, deep_get
from .doi import DoiProvider
class Zenodo(DoiProvider):
"""Provide contents of a Zenodo deposit."""
def __init__(self):
super().__init__()
# We need the hostname (url where records are), api url (for metadata),
# filepath (path to files in metadata), filename (path to filename in
# metadata), download (path to file download URL), and type (path to item type in metadata)
self.hosts = [
{
"hostname": [
"https://sandbox.zenodo.org/record/",
"http://sandbox.zenodo.org/record/",
"http://sandbox.zenodo.org/records/",
],
"api": "https://sandbox.zenodo.org/api/records/",
"files": "links.files",
"filepath": "entries",
"filename": "key",
"download": "links.content",
"type": "metadata.upload_type",
},
{
"hostname": [
"https://zenodo.org/record/",
"http://zenodo.org/record/",
"https://zenodo.org/records/",
],
"api": "https://zenodo.org/api/records/",
"files": "links.files",
"filepath": "entries",
"filename": "key",
"download": "links.content",
"type": "metadata.upload_type",
},
{
"hostname": [
"https://data.caltech.edu/records/",
"http://data.caltech.edu/records/",
],
"api": "https://data.caltech.edu/api/record/",
"files": "",
"filepath": "metadata.electronic_location_and_access",
"filename": "electronic_name.0",
"download": "uniform_resource_identifier",
"type": "metadata.resourceType.resourceTypeGeneral",
},
]
def detect(self, doi, ref=None, extra_args=None):
"""Trigger this provider for things that resolve to a Zenodo/Invenio record"""
url = self.doi2url(doi)
for host in self.hosts:
if any([url.startswith(s) for s in host["hostname"]]):
self.record_id = url.rsplit("/", maxsplit=1)[1]
return {"record": self.record_id, "host": host}
def fetch(self, spec, output_dir, yield_output=False):
"""Fetch and unpack a Zenodo record"""
record_id = spec["record"]
host = spec["host"]
yield f"Fetching Zenodo record {record_id}.\n"
resp = self.urlopen(
f'{host["api"]}{record_id}',
headers={"accept": "application/json"},
)
record = resp.json()
if host["files"]:
yield f"Fetching Zenodo record {record_id} files.\n"
files_url = deep_get(record, host["files"])
resp = self.urlopen(
files_url,
headers={"accept": "application/json"},
)
record = resp.json()
files = deep_get(record, host["filepath"])
only_one_file = len(files) == 1
for file_ref in files:
yield from self.fetch_file(file_ref, host, output_dir, unzip=only_one_file)
@property
def content_id(self):
"""The Zenodo record ID as the content of a record is immutable"""
return self.record_id