forked from InsightSoftwareConsortium/ITK
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ArchiveTestingDataOnGirder.py
executable file
·191 lines (173 loc) · 8.31 KB
/
ArchiveTestingDataOnGirder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
#!/usr/bin/env python
import argparse
import girder_client
from girder_client import GirderClient
import os
import fnmatch
import json
import mimetypes
from distutils.version import StrictVersion
if StrictVersion(girder_client.__version__) < StrictVersion("2.0.0"):
raise Exception("Girder 2.0.0 or newer is required")
class GirderExternalDataCli(GirderClient):
"""
A command line Python client for interacting with a Girder instance's
RESTful api, specifically for performing uploads into a Girder instance.
"""
def __init__(self, apiKey, objectStore):
"""initialization function to create a GirderCli instance, will attempt
to authenticate with the designated Girder instance.
"""
GirderClient.__init__(self,
apiUrl='https://data.kitware.com/api/v1')
self.objectStore = objectStore
self.authenticate(apiKey=apiKey)
def content_link_upload(self, localFolder, parentId, ext='.sha512',
parentType='folder', blacklist=['.git', '.ExternalData'],
reuseExisting=True, dryRun=False):
"""Upload objects corresponding to CMake ExternalData content links.
This will recursively walk down the tree and find content links ending
with the specified extension and create a hierarchy on the server under
the parentId.
:param ext: Content link file extension.
:param parentId: id of the parent in Girder or resource path.
:param parentType: one of (collection,folder,user), default of folder.
:param reuseExisting: bool whether to accept an existing item of
the same name in the same location, or create a new one instead.
:param dryRun: Do not actually upload any content.
"""
parentId = self._checkResourcePath(parentId)
localFolder = os.path.normpath(localFolder)
for entry in os.listdir(localFolder):
if entry in blacklist:
print("Ignoring file %s as it is blacklisted" % entry)
continue
full_entry = os.path.join(localFolder, entry)
if os.path.islink(full_entry):
# os.walk skips symlinks by default
print("Skipping file %s as it is a symlink" % entry)
continue
if os.path.isdir(full_entry):
self._uploadFolderRecursive(
full_entry, parentId, parentType, ext,
reuseExisting=reuseExisting, blacklist=blacklist,
dryRun=dryRun)
def _uploadContentLinkItem(self, name, content_link, folder,
ext='.sha512', parentType='folder', dryRun=False,
reuseExisting=False):
"""Upload objects corresponding to CMake ExternalData content links.
This will upload the file with name, *name*, for the content link
located at *content_link* to the Girder folder, *folder*.
:param ext: Content link file extension.
:param parentType: one of (collection,folder,user), default of folder.
:param reuseExisting: bool whether to accept an existing item of
the same name in the same location, or create a new one instead.
:param dryRun: Do not actually upload any content.
"""
content_link = os.path.normpath(content_link)
if os.path.isfile(content_link) and \
fnmatch.fnmatch(content_link, '*' + ext):
if parentType != 'folder':
raise Exception(('Attempting to upload an item under a %s.'
% parentType) +
' Items can only be added to folders.')
else:
with open(content_link, 'r') as fp:
hash_value = fp.readline().strip()
self._uploadAsItem(
name,
folder['_id'],
os.path.join(self.objectStore, hash_value),
reuseExisting=reuseExisting,
dryRun=dryRun)
def _uploadFolderRecursive(self, localFolder, parentId, parentType,
ext='.sha512',
reuseExisting=False,
blacklist=[],
dryRun=False):
"""Function to recursively upload a folder and all of its descendants.
:param localFolder: full path to local folder to be uploaded
:param parentId: id of parent in Girder,
where new folder will be added
:param parentType: one of (collection, folder, user)
:param leaf_folders_as_items: whether leaf folders should have all
files uploaded as single items
:param reuseExisting: boolean indicating whether to accept an existing
item
of the same name in the same location, or create a new one instead
"""
localFolder = os.path.normpath(localFolder)
filename = os.path.basename(localFolder)
if filename in blacklist:
print("Ignoring file %s as it is blacklisted" % filename)
return
# Do not add the folder if it does not contain any content links
has_content_link = False
for root, dirnames, filenames in os.walk(localFolder):
for filename in fnmatch.filter(filenames, '*' + ext):
has_content_link = True
break
if not has_content_link:
return
print('Creating Folder from %s' % localFolder)
if dryRun:
# create a dryRun placeholder
folder = {'_id': 'dryRun'}
elif localFolder == '.':
folder = {'_id': parentId}
else:
folder = self.loadOrCreateFolder(
os.path.basename(localFolder), parentId, parentType)
for entry in sorted(os.listdir(localFolder)):
if entry in blacklist:
print("Ignoring file %s as it is blacklisted" % entry)
continue
full_entry = os.path.join(localFolder, entry)
if os.path.islink(full_entry):
# os.walk skips symlinks by default
print("Skipping file %s as it is a symlink" % entry)
continue
elif os.path.isdir(full_entry):
# At this point we should have an actual folder, so can
# pass that as the parentType
self._uploadFolderRecursive(
full_entry, folder['_id'], 'folder',
ext, reuseExisting=reuseExisting,
blacklist=blacklist, dryRun=dryRun)
else:
name = os.path.splitext(entry)[0]
self._uploadContentLinkItem(name, full_entry, folder,
ext=ext, parentType=parentType, dryRun=dryRun,
reuseExisting=reuseExisting)
if not dryRun:
for callback in self._folderUploadCallbacks:
callback(folder, localFolder)
def main():
parser = argparse.ArgumentParser(
description='Upload CMake ExternalData content links to Girder')
parser.add_argument(
'--dry-run', action='store_true',
help='will not write anything to Girder, only report on what would '
'happen')
parser.add_argument('--api-key', required=True, default=None)
parser.add_argument('--local-folder', required=False,
default=os.path.join(os.path.dirname(__file__), '..',
'..'),
help='path to local target folder')
# Default is ITK/ITKTestingData/Nightly
parser.add_argument('--parent-id', required=False,
default='57b673388d777f10f269651c',
help='id of Girder parent target')
parser.add_argument('--object-store', required=True,
help='Path to the CMake ExternalData object store')
parser.add_argument(
'--no-reuse', action='store_true',
help='Don\'t reuse existing items of same name at same location')
args = parser.parse_args()
reuseExisting = not args.no_reuse
gc = GirderExternalDataCli(args.api_key,
objectStore=os.path.join(args.object_store, 'SHA512'))
gc.content_link_upload(args.local_folder, args.parent_id,
reuseExisting=reuseExisting, dryRun=args.dry_run)
if __name__ == '__main__':
main()