11import os
2+ from pathlib import Path
23
34from tornado .web import HTTPError , RequestHandler
45from tornado .gen import coroutine
6+ import zipfile
7+ from io import BytesIO
8+ from shutil import rmtree
59
6- from qiita_core .util import execute_as_transaction
10+ from qiita_core .util import execute_as_transaction , is_test_environment
711from qiita_db .handlers .oauth2 import authenticate_oauth
12+ from qiita_pet .handlers .download import BaseHandlerDownload
813from qiita_core .qiita_settings import qiita_config
14+ import qiita_db as qdb
15+
16+
17+ def is_directory (filepath ):
18+ """Tests if given filepath is listed as directory in Qiita DB.
19+
20+ Note: this is independent of the actual filesystem, only checks DB entries.
21+
22+ Parameters
23+ ----------
24+ filepath : str
25+ The filepath to the directory that shall be tested for beeing listed
26+ as directory in Qiita's DB
27+
28+ Returns
29+ -------
30+ Bool: True if the last part of the filepath is contained as filepath in
31+ qiita.filepath AND part after base_data_dir is a mountpoint in
32+ qiita.data_directory AND the filepath_type is 'directory or
33+ 'html_summary_dir'.
34+ False otherwise.
35+ """
36+ working_filepath = filepath
37+ # chop off trailing / to ensure we point to a directory name properly
38+ if working_filepath .endswith (os .sep ):
39+ working_filepath = os .path .dirname (working_filepath )
40+
41+ dirname = os .path .basename (working_filepath )
42+ # file-objects foo are stored in <base_data_dir>/<mountpoint>/foo. To
43+ # determine mountpoint from a given filepath, we need to chop of
44+ # base_data_dir and then take the top directory level.
45+ # Checking if user provided filepath contains a valid mountpoint adds
46+ # to preventing users to download arbitrary file contents
47+ try :
48+ mount_dirname = Path (working_filepath ).relative_to (
49+ Path (qiita_config .base_data_dir )).parts [0 ]
50+ except ValueError :
51+ # base_data_dir is no proper prefix of given filepath
52+ return False
53+ except IndexError :
54+ # only base_data_dir given
55+ return False
56+ if dirname == '' or mount_dirname == '' :
57+ # later should never be true due to above IndexError, but better save
58+ # than sorry
59+ return False
60+
61+ with qdb .sql_connection .TRN :
62+ # find entries that
63+ # a) are of filepath_type "directory" or "html_summary_dir"
64+ # b) whose filepath ends with directory name
65+ # c) whose mountpoint matches the provided parent_directory
66+ sql = """SELECT filepath_id
67+ FROM qiita.filepath
68+ JOIN qiita.filepath_type USING (filepath_type_id)
69+ JOIN qiita.data_directory USING (data_directory_id)
70+ WHERE filepath_type IN ('directory', 'html_summary_dir') AND
71+ filepath=%s AND
72+ position(%s in mountpoint)>0;"""
73+ qdb .sql_connection .TRN .add (sql , [dirname , mount_dirname ])
74+ hits = qdb .sql_connection .TRN .execute_fetchflatten ()
75+ return len (hits ) > 0
976
1077
1178class FetchFileFromCentralHandler (RequestHandler ):
@@ -37,20 +104,95 @@ def get(self, requested_filepath):
37104 raise HTTPError (403 , reason = (
38105 "The requested file is not present in Qiita's BASE_DATA_DIR!" ))
39106
40- # delivery of the file via nginx requires replacing the basedatadir
41- # with the prefix defined in the nginx configuration for the
42- # base_data_dir, '/protected/' by default
43- protected_filepath = filepath .replace (basedatadir , '/protected' )
107+ filename_directory = "qiita-main-data.zip"
108+ if os .path .isdir (filepath ):
109+ # Test if this directory is managed by Qiita's DB as directory
110+ # Thus we can prevent that a lazy client simply downloads the whole
111+ # basa_data_directory
112+ if not is_directory (filepath ):
113+ raise HTTPError (403 , reason = (
114+ "You cannot access this directory!" ))
115+ else :
116+ # flag the response for qiita_client
117+ self .set_header ('Is-Qiita-Directory' , 'yes' )
44118
45119 self .set_header ('Content-Type' , 'application/octet-stream' )
46120 self .set_header ('Content-Transfer-Encoding' , 'binary' )
47- self .set_header ('X-Accel-Redirect' , protected_filepath )
48121 self .set_header ('Content-Description' , 'File Transfer' )
49122 self .set_header ('Expires' , '0' )
50123 self .set_header ('Cache-Control' , 'no-cache' )
51- self .set_header ('Content-Disposition' ,
52- 'attachment; filename=%s' % os .path .basename (
53- protected_filepath ))
124+
125+ # We here need to differentiate a request that comes directly to the
126+ # qiita instance (happens in testing) or was redirected through nginx
127+ # (should be the default). If nginx, we can use nginx' fast file
128+ # delivery mechanisms, otherwise, we need to send via slower tornado.
129+ # We indirectly infer this by looking for the "X-Forwarded-For" header,
130+ # which should only exists when redirectred through nginx.
131+ if self .request .headers .get ('X-Forwarded-For' ) is None :
132+ # delivery via tornado
133+ if not is_directory (filepath ):
134+ # a single file
135+ self .set_header (
136+ 'Content-Disposition' ,
137+ 'attachment; filename=%s' % os .path .basename (filepath ))
138+ with open (filepath , "rb" ) as f :
139+ self .write (f .read ())
140+ else :
141+ # a whole directory
142+ memfile = BytesIO ()
143+ with zipfile .ZipFile (memfile , 'w' , zipfile .ZIP_DEFLATED ) as zf :
144+ for root , dirs , files in os .walk (filepath ):
145+ for file in files :
146+ full_path = os .path .join (root , file )
147+ # make path in zip file relative
148+ rel_path = os .path .relpath (full_path , filepath )
149+ zf .write (full_path , rel_path )
150+ memfile .seek (0 )
151+ self .set_header ('Content-Type' , 'application/zip' )
152+ self .set_header ('Content-Disposition' ,
153+ 'attachment; filename=%s' % filename_directory )
154+ self .write (memfile .read ())
155+ else :
156+ # delivery via nginx
157+ if not is_directory (filepath ):
158+ # a single file:
159+ # delivery of the file via nginx requires replacing the
160+ # basedatadir with the prefix defined in the nginx
161+ # configuration for the base_data_dir, '/protected/' by default
162+ protected_filepath = filepath .replace (basedatadir ,
163+ '/protected' )
164+ self .set_header ('X-Accel-Redirect' , protected_filepath )
165+ self .set_header (
166+ 'Content-Disposition' ,
167+ 'attachment; filename=%s' % os .path .basename (
168+ protected_filepath ))
169+ else :
170+ # a whole directory
171+ to_download = BaseHandlerDownload ._list_dir_files_nginx (
172+ self , filepath )
173+
174+ # fp_subdir is the part of the filepath the user requested,
175+ # without QIITA_BASE_DIR
176+ fp_subdir = os .path .relpath (filepath , basedatadir )
177+
178+ # above function adds filepath to located files, which is
179+ # different from the non-nginx version, e.g.
180+ # fp = /protected/job/2_test_folder/testdir/fileA.txt
181+ # fp_name = job/2_test_folder/testdir/fileA.txt
182+ # where "job/2_test_folder" is what user requested and
183+ # "testdir/fileA.txt" is a file within this directory.
184+ # When extracting by qiita_client, the "job/2_test_folder"
185+ # part would be added twice (one by user request, second by
186+ # unzipping). Therefore, we need to correct these names here:
187+ to_download = [
188+ (fp , os .path .relpath (fp_name , fp_subdir ), fp_checksum ,
189+ fp_size )
190+ for fp , fp_name , fp_checksum , fp_size
191+ in to_download ]
192+ BaseHandlerDownload ._write_nginx_file_list (self , to_download )
193+ BaseHandlerDownload ._set_nginx_headers (
194+ self , filename_directory )
195+
54196 self .finish ()
55197
56198
@@ -65,30 +207,96 @@ def post(self):
65207 # canonic version of base_data_dir
66208 basedatadir = os .path .abspath (qiita_config .base_data_dir )
67209 stored_files = []
210+ stored_directories = []
68211
69212 for filespath , filelist in self .request .files .items ():
70213 if filespath .startswith (basedatadir ):
71214 filespath = filespath [len (basedatadir ):]
72215
73216 for file in filelist :
217+ # differentiate between regular files and whole directories,
218+ # which must be zipped AND the client must provide the
219+ # is_directory='true' body argument.
220+ sent_directory = self .get_body_argument (
221+ 'is_directory' , "false" ) == "true"
222+
74223 filepath = os .path .join (filespath , file ['filename' ])
75224 # remove leading /
76225 if filepath .startswith (os .sep ):
77226 filepath = filepath [len (os .sep ):]
78227 filepath = os .path .abspath (os .path .join (basedatadir , filepath ))
79228
80- if os .path .exists (filepath ):
229+ if sent_directory :
230+ # if a whole directory was send, we want to store it at
231+ # the given dirname of the filepath
232+ filepath = os .path .dirname (filepath )
233+
234+ # prevent overwriting existing files, except in test mode
235+ if os .path .exists (filepath ) and (not is_test_environment ()):
81236 raise HTTPError (403 , reason = (
82- "The requested file is already "
83- "present in Qiita's BASE_DATA_DIR!" ))
237+ "The requested %s is already "
238+ "present in Qiita's BASE_DATA_DIR!" %
239+ ('directory' if sent_directory else 'file' )))
84240
85241 os .makedirs (os .path .dirname (filepath ), exist_ok = True )
86- with open (filepath , "wb" ) as f :
87- f .write (file ['body' ])
88- stored_files .append (filepath )
242+ if sent_directory :
243+ with zipfile .ZipFile (BytesIO (file ['body' ])) as zf :
244+ zf .extractall (filepath )
245+ stored_directories .append (filepath )
246+ else :
247+ with open (filepath , "wb" ) as f :
248+ f .write (file ['body' ])
249+ stored_files .append (filepath )
250+
251+ for (_type , objs ) in [('files' , stored_files ),
252+ ('directories' , stored_directories )]:
253+ if len (objs ) > 0 :
254+ self .write (
255+ "Stored %i %s into BASE_DATA_DIR of Qiita:\n %s\n " % (
256+ len (objs ),
257+ _type ,
258+ '\n ' .join (map (lambda x : ' - %s' % x , objs ))))
259+
260+ self .finish ()
261+
262+
263+ class DeleteFileFromCentralHandler (RequestHandler ):
264+ # Note: this function is NOT available in productive instances!
265+ @authenticate_oauth
266+ @coroutine
267+ @execute_as_transaction
268+ def get (self , requested_filepath ):
269+ if not is_test_environment ():
270+ raise HTTPError (403 , reason = (
271+ "You cannot delete files through this API endpoint, when "
272+ "Qiita is not in test-mode!" ))
273+
274+ # ensure we have an absolute path, i.e. starting at /
275+ filepath = os .path .join (os .path .sep , requested_filepath )
276+ # use a canonic version of the filepath
277+ filepath = os .path .abspath (filepath )
278+
279+ # canonic version of base_data_dir
280+ basedatadir = os .path .abspath (qiita_config .base_data_dir )
281+
282+ if not filepath .startswith (basedatadir ):
283+ # attempt to access files outside of the BASE_DATA_DIR
284+ raise HTTPError (403 , reason = (
285+ "You cannot delete file '%s', which is outside of "
286+ "the BASE_DATA_DIR of Qiita!" % filepath ))
287+
288+ if not os .path .exists (filepath ):
289+ raise HTTPError (403 , reason = (
290+ "The requested file %s is not present "
291+ "in Qiita's BASE_DATA_DIR!" % filepath ))
89292
90- self .write ("Stored %i files into BASE_DATA_DIR of Qiita:\n %s\n " % (
91- len (stored_files ),
92- '\n ' .join (map (lambda x : ' - %s' % x , stored_files ))))
293+ if os .path .isdir (filepath ):
294+ rmtree (filepath )
295+ self .write ("Deleted directory %s from BASE_DATA_DIR of QIita" %
296+ filepath )
297+ else :
298+ os .remove (filepath )
299+ self .write ("Deleted file %s from BASE_DATA_DIR of Qiita" %
300+ filepath )
93301
94302 self .finish ()
0 commit comments