Skip to content

Commit 75e0371

Browse files
authored
Merge dcedd02 into 1b8b0c7
2 parents 1b8b0c7 + dcedd02 commit 75e0371

File tree

3 files changed

+497
-36
lines changed

3 files changed

+497
-36
lines changed
Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,16 @@
11
from .file_transfer_handlers import (FetchFileFromCentralHandler,
2-
PushFileToCentralHandler)
2+
PushFileToCentralHandler,
3+
DeleteFileFromCentralHandler)
4+
from qiita_core.util import is_test_environment
35

46
__all__ = ['FetchFileFromCentralHandler']
57

68
ENDPOINTS = [
79
(r"/cloud/fetch_file_from_central/(.*)", FetchFileFromCentralHandler),
810
(r"/cloud/push_file_to_central/", PushFileToCentralHandler)
911
]
12+
13+
if is_test_environment():
14+
ENDPOINTS.append(
15+
(r"/cloud/delete_file_from_central/(.*)",
16+
DeleteFileFromCentralHandler))

qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py

Lines changed: 226 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,78 @@
11
import os
2+
from pathlib import Path
23

34
from tornado.web import HTTPError, RequestHandler
45
from tornado.gen import coroutine
6+
import zipfile
7+
from io import BytesIO
8+
from shutil import rmtree
59

6-
from qiita_core.util import execute_as_transaction
10+
from qiita_core.util import execute_as_transaction, is_test_environment
711
from qiita_db.handlers.oauth2 import authenticate_oauth
12+
from qiita_pet.handlers.download import BaseHandlerDownload
813
from qiita_core.qiita_settings import qiita_config
14+
import qiita_db as qdb
15+
16+
17+
def is_directory(filepath):
18+
"""Tests if given filepath is listed as directory in Qiita DB.
19+
20+
Note: this is independent of the actual filesystem, only checks DB entries.
21+
22+
Parameters
23+
----------
24+
filepath : str
25+
The filepath to the directory that shall be tested for beeing listed
26+
as directory in Qiita's DB
27+
28+
Returns
29+
-------
30+
Bool: True if the last part of the filepath is contained as filepath in
31+
qiita.filepath AND part after base_data_dir is a mountpoint in
32+
qiita.data_directory AND the filepath_type is 'directory or
33+
'html_summary_dir'.
34+
False otherwise.
35+
"""
36+
working_filepath = filepath
37+
# chop off trailing / to ensure we point to a directory name properly
38+
if working_filepath.endswith(os.sep):
39+
working_filepath = os.path.dirname(working_filepath)
40+
41+
dirname = os.path.basename(working_filepath)
42+
# file-objects foo are stored in <base_data_dir>/<mountpoint>/foo. To
43+
# determine mountpoint from a given filepath, we need to chop of
44+
# base_data_dir and then take the top directory level.
45+
# Checking if user provided filepath contains a valid mountpoint adds
46+
# to preventing users to download arbitrary file contents
47+
try:
48+
mount_dirname = Path(working_filepath).relative_to(
49+
Path(qiita_config.base_data_dir)).parts[0]
50+
except ValueError:
51+
# base_data_dir is no proper prefix of given filepath
52+
return False
53+
except IndexError:
54+
# only base_data_dir given
55+
return False
56+
if dirname == '' or mount_dirname == '':
57+
# later should never be true due to above IndexError, but better save
58+
# than sorry
59+
return False
60+
61+
with qdb.sql_connection.TRN:
62+
# find entries that
63+
# a) are of filepath_type "directory" or "html_summary_dir"
64+
# b) whose filepath ends with directory name
65+
# c) whose mountpoint matches the provided parent_directory
66+
sql = """SELECT filepath_id
67+
FROM qiita.filepath
68+
JOIN qiita.filepath_type USING (filepath_type_id)
69+
JOIN qiita.data_directory USING (data_directory_id)
70+
WHERE filepath_type IN ('directory', 'html_summary_dir') AND
71+
filepath=%s AND
72+
position(%s in mountpoint)>0;"""
73+
qdb.sql_connection.TRN.add(sql, [dirname, mount_dirname])
74+
hits = qdb.sql_connection.TRN.execute_fetchflatten()
75+
return len(hits) > 0
976

1077

1178
class FetchFileFromCentralHandler(RequestHandler):
@@ -37,20 +104,95 @@ def get(self, requested_filepath):
37104
raise HTTPError(403, reason=(
38105
"The requested file is not present in Qiita's BASE_DATA_DIR!"))
39106

40-
# delivery of the file via nginx requires replacing the basedatadir
41-
# with the prefix defined in the nginx configuration for the
42-
# base_data_dir, '/protected/' by default
43-
protected_filepath = filepath.replace(basedatadir, '/protected')
107+
filename_directory = "qiita-main-data.zip"
108+
if os.path.isdir(filepath):
109+
# Test if this directory is managed by Qiita's DB as directory
110+
# Thus we can prevent that a lazy client simply downloads the whole
111+
# basa_data_directory
112+
if not is_directory(filepath):
113+
raise HTTPError(403, reason=(
114+
"You cannot access this directory!"))
115+
else:
116+
# flag the response for qiita_client
117+
self.set_header('Is-Qiita-Directory', 'yes')
44118

45119
self.set_header('Content-Type', 'application/octet-stream')
46120
self.set_header('Content-Transfer-Encoding', 'binary')
47-
self.set_header('X-Accel-Redirect', protected_filepath)
48121
self.set_header('Content-Description', 'File Transfer')
49122
self.set_header('Expires', '0')
50123
self.set_header('Cache-Control', 'no-cache')
51-
self.set_header('Content-Disposition',
52-
'attachment; filename=%s' % os.path.basename(
53-
protected_filepath))
124+
125+
# We here need to differentiate a request that comes directly to the
126+
# qiita instance (happens in testing) or was redirected through nginx
127+
# (should be the default). If nginx, we can use nginx' fast file
128+
# delivery mechanisms, otherwise, we need to send via slower tornado.
129+
# We indirectly infer this by looking for the "X-Forwarded-For" header,
130+
# which should only exists when redirectred through nginx.
131+
if self.request.headers.get('X-Forwarded-For') is None:
132+
# delivery via tornado
133+
if not is_directory(filepath):
134+
# a single file
135+
self.set_header(
136+
'Content-Disposition',
137+
'attachment; filename=%s' % os.path.basename(filepath))
138+
with open(filepath, "rb") as f:
139+
self.write(f.read())
140+
else:
141+
# a whole directory
142+
memfile = BytesIO()
143+
with zipfile.ZipFile(memfile, 'w', zipfile.ZIP_DEFLATED) as zf:
144+
for root, dirs, files in os.walk(filepath):
145+
for file in files:
146+
full_path = os.path.join(root, file)
147+
# make path in zip file relative
148+
rel_path = os.path.relpath(full_path, filepath)
149+
zf.write(full_path, rel_path)
150+
memfile.seek(0)
151+
self.set_header('Content-Type', 'application/zip')
152+
self.set_header('Content-Disposition',
153+
'attachment; filename=%s' % filename_directory)
154+
self.write(memfile.read())
155+
else:
156+
# delivery via nginx
157+
if not is_directory(filepath):
158+
# a single file:
159+
# delivery of the file via nginx requires replacing the
160+
# basedatadir with the prefix defined in the nginx
161+
# configuration for the base_data_dir, '/protected/' by default
162+
protected_filepath = filepath.replace(basedatadir,
163+
'/protected')
164+
self.set_header('X-Accel-Redirect', protected_filepath)
165+
self.set_header(
166+
'Content-Disposition',
167+
'attachment; filename=%s' % os.path.basename(
168+
protected_filepath))
169+
else:
170+
# a whole directory
171+
to_download = BaseHandlerDownload._list_dir_files_nginx(
172+
self, filepath)
173+
174+
# fp_subdir is the part of the filepath the user requested,
175+
# without QIITA_BASE_DIR
176+
fp_subdir = os.path.relpath(filepath, basedatadir)
177+
178+
# above function adds filepath to located files, which is
179+
# different from the non-nginx version, e.g.
180+
# fp = /protected/job/2_test_folder/testdir/fileA.txt
181+
# fp_name = job/2_test_folder/testdir/fileA.txt
182+
# where "job/2_test_folder" is what user requested and
183+
# "testdir/fileA.txt" is a file within this directory.
184+
# When extracting by qiita_client, the "job/2_test_folder"
185+
# part would be added twice (one by user request, second by
186+
# unzipping). Therefore, we need to correct these names here:
187+
to_download = [
188+
(fp, os.path.relpath(fp_name, fp_subdir), fp_checksum,
189+
fp_size)
190+
for fp, fp_name, fp_checksum, fp_size
191+
in to_download]
192+
BaseHandlerDownload._write_nginx_file_list(self, to_download)
193+
BaseHandlerDownload._set_nginx_headers(
194+
self, filename_directory)
195+
54196
self.finish()
55197

56198

@@ -65,30 +207,96 @@ def post(self):
65207
# canonic version of base_data_dir
66208
basedatadir = os.path.abspath(qiita_config.base_data_dir)
67209
stored_files = []
210+
stored_directories = []
68211

69212
for filespath, filelist in self.request.files.items():
70213
if filespath.startswith(basedatadir):
71214
filespath = filespath[len(basedatadir):]
72215

73216
for file in filelist:
217+
# differentiate between regular files and whole directories,
218+
# which must be zipped AND the client must provide the
219+
# is_directory='true' body argument.
220+
sent_directory = self.get_body_argument(
221+
'is_directory', "false") == "true"
222+
74223
filepath = os.path.join(filespath, file['filename'])
75224
# remove leading /
76225
if filepath.startswith(os.sep):
77226
filepath = filepath[len(os.sep):]
78227
filepath = os.path.abspath(os.path.join(basedatadir, filepath))
79228

80-
if os.path.exists(filepath):
229+
if sent_directory:
230+
# if a whole directory was send, we want to store it at
231+
# the given dirname of the filepath
232+
filepath = os.path.dirname(filepath)
233+
234+
# prevent overwriting existing files, except in test mode
235+
if os.path.exists(filepath) and (not is_test_environment()):
81236
raise HTTPError(403, reason=(
82-
"The requested file is already "
83-
"present in Qiita's BASE_DATA_DIR!"))
237+
"The requested %s is already "
238+
"present in Qiita's BASE_DATA_DIR!" %
239+
('directory' if sent_directory else 'file')))
84240

85241
os.makedirs(os.path.dirname(filepath), exist_ok=True)
86-
with open(filepath, "wb") as f:
87-
f.write(file['body'])
88-
stored_files.append(filepath)
242+
if sent_directory:
243+
with zipfile.ZipFile(BytesIO(file['body'])) as zf:
244+
zf.extractall(filepath)
245+
stored_directories.append(filepath)
246+
else:
247+
with open(filepath, "wb") as f:
248+
f.write(file['body'])
249+
stored_files.append(filepath)
250+
251+
for (_type, objs) in [('files', stored_files),
252+
('directories', stored_directories)]:
253+
if len(objs) > 0:
254+
self.write(
255+
"Stored %i %s into BASE_DATA_DIR of Qiita:\n%s\n" % (
256+
len(objs),
257+
_type,
258+
'\n'.join(map(lambda x: ' - %s' % x, objs))))
259+
260+
self.finish()
261+
262+
263+
class DeleteFileFromCentralHandler(RequestHandler):
264+
# Note: this function is NOT available in productive instances!
265+
@authenticate_oauth
266+
@coroutine
267+
@execute_as_transaction
268+
def get(self, requested_filepath):
269+
if not is_test_environment():
270+
raise HTTPError(403, reason=(
271+
"You cannot delete files through this API endpoint, when "
272+
"Qiita is not in test-mode!"))
273+
274+
# ensure we have an absolute path, i.e. starting at /
275+
filepath = os.path.join(os.path.sep, requested_filepath)
276+
# use a canonic version of the filepath
277+
filepath = os.path.abspath(filepath)
278+
279+
# canonic version of base_data_dir
280+
basedatadir = os.path.abspath(qiita_config.base_data_dir)
281+
282+
if not filepath.startswith(basedatadir):
283+
# attempt to access files outside of the BASE_DATA_DIR
284+
raise HTTPError(403, reason=(
285+
"You cannot delete file '%s', which is outside of "
286+
"the BASE_DATA_DIR of Qiita!" % filepath))
287+
288+
if not os.path.exists(filepath):
289+
raise HTTPError(403, reason=(
290+
"The requested file %s is not present "
291+
"in Qiita's BASE_DATA_DIR!" % filepath))
89292

90-
self.write("Stored %i files into BASE_DATA_DIR of Qiita:\n%s\n" % (
91-
len(stored_files),
92-
'\n'.join(map(lambda x: ' - %s' % x, stored_files))))
293+
if os.path.isdir(filepath):
294+
rmtree(filepath)
295+
self.write("Deleted directory %s from BASE_DATA_DIR of QIita" %
296+
filepath)
297+
else:
298+
os.remove(filepath)
299+
self.write("Deleted file %s from BASE_DATA_DIR of Qiita" %
300+
filepath)
93301

94302
self.finish()

0 commit comments

Comments
 (0)