/
launch.py
293 lines (267 loc) · 9.94 KB
/
launch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
# This file is part of REANA.
# Copyright (C) 2022 CERN.
#
# REANA is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
"""REANA-Server launch functionality Flask-Blueprint."""
import json
import logging
import os
import shutil
import threading
import traceback
from bravado.exception import HTTPError
from flask import Blueprint, jsonify
from jsonschema import ValidationError
from marshmallow import Schema
from webargs import fields
from webargs.flaskparser import use_kwargs
import yaml
from reana_commons.errors import REANAValidationError, REANAQuotaExceededError
from reana_commons.specification import load_reana_spec
from reana_commons.validation.utils import validate_workflow_name
from reana_db.utils import (
_get_workflow_with_uuid_or_name,
get_disk_usage_or_zero,
store_workflow_disk_quota,
update_users_disk_quota,
)
from reana_server.api_client import current_rwc_api_client
from reana_server.config import FETCHER_ALLOWED_SCHEMES, LAUNCHER_ALLOWED_SNAKEMAKE_URLS
from reana_server.decorators import check_quota, signin_required
from reana_server.fetcher import REANAFetcherError, get_fetcher
from reana_server.utils import (
get_fetched_workflows_dir,
mv_workflow_files,
prevent_disk_quota_excess,
publish_workflow_submission,
filter_input_files,
get_workspace_retention_rules,
)
from reana_server.validation import validate_workflow
blueprint = Blueprint("launch", __name__)
load_reana_spec_lock = threading.Lock()
"""Lock used to make sure only one specification is loaded at a time."""
@blueprint.route("/launch", methods=["POST"])
@use_kwargs(
{
"url": fields.Url(schemes=FETCHER_ALLOWED_SCHEMES, required=True),
"name": fields.Str(),
"parameters": fields.Str(),
"specification": fields.Str(),
}
)
@signin_required()
@check_quota
def launch(user, url, name="", parameters="{}", specification=None):
r"""Endpoint to launch a REANA workflow from URL.
---
post:
summary: Launch workflow from a remote REANA specification file.
description: >-
This resource expects a remote reference to a REANA specification
file needed to launch a workflow via URL.
operationId: launch
consumes:
- application/json
produces:
- application/json
parameters:
- name: data
in: body
description: The remote origin data required to launch a workflow.
schema:
type: object
required:
- url
properties:
url:
description: Remote origin URL where the REANA specification file is hosted.
type: string
name:
description: Workflow name.
type: string
parameters:
description: Workflow parameters.
type: string
specification:
description: Path to the workflow specification file to be used.
type: string
responses:
200:
description: >-
Request succeeded. Information of the workflow launched.
schema:
type: object
required:
- workflow_id
- workflow_name
- message
properties:
workflow_id:
type: string
workflow_name:
type: string
message:
type: string
validation_warnings:
description: >-
Dictionary of validation warnings, if any. Each
key is a property that was not correctly validated.
type: object
properties:
additional_properties:
type: array
items:
type: string
examples:
application/json:
{
"workflow_id": "cdcf48b1-c2f3-4693-8230-b066e088c6ac",
"workflow_name": "mytest.1",
"message": "The workflow has been successfully submitted."
}
400:
description: >-
Request failed. The incoming payload seems malformed.
schema:
type: object
properties:
message:
type: string
examples:
application/json:
{
"message": "Malformed request."
}
500:
description: >-
Request failed. Internal server error.
schema:
type: object
properties:
message:
type: string
examples:
application/json:
{
"message": "Internal server error."
}
"""
try:
user_id = str(user.id_)
tmpdir = get_fetched_workflows_dir(user_id)
# Fetch the workflow spec
fetcher = get_fetcher(url, tmpdir, specification)
fetcher.fetch()
# Generate the workflow name
workflow_name = name.replace(" ", "") or fetcher.generate_workflow_name()
validate_workflow_name(workflow_name)
# Load and validate the workflow spec
spec_path = fetcher.workflow_spec_path()
# When launching a snakemake workflow, check if the url is allowed
# FIXME: This will not be needed when using a sandbox
with open(spec_path) as spec_fd:
reana_yaml = yaml.safe_load(spec_fd.read())
workflow_type = reana_yaml["workflow"]["type"]
if (
workflow_type == "snakemake"
and url not in LAUNCHER_ALLOWED_SNAKEMAKE_URLS
):
raise ValidationError(
"Unfortunately, it is not possible to launch generic Snakemake "
"workflows at the moment. Please contact the REANA admins for "
"more information."
)
# FIXME: locking will not be needed when the loading and validation of
# specifications will be done inside an external sandbox
with load_reana_spec_lock:
reana_yaml = load_reana_spec(spec_path, workspace_path=tmpdir)
input_parameters = json.loads(parameters)
validation_warnings = validate_workflow(reana_yaml, input_parameters)
# Keep only files and directories listed as workflow's inputs
filter_input_files(tmpdir, reana_yaml)
# Check the user's disk quota
disk_usage = get_disk_usage_or_zero(tmpdir)
prevent_disk_quota_excess(
user, disk_usage, action=f"Launching the workflow {workflow_name}"
)
# Get workspace retention rules
retention_days = reana_yaml.get("workspace", {}).get("retention_days")
retention_rules = get_workspace_retention_rules(retention_days)
# Create workflow
workflow_dict = {
"reana_specification": reana_yaml,
"workflow_name": workflow_name,
"operational_options": {},
"launcher_url": url,
"retention_rules": retention_rules,
}
response, http_response = current_rwc_api_client.api.create_workflow(
workflow=workflow_dict,
user=user_id,
).result()
workflow = _get_workflow_with_uuid_or_name(response["workflow_id"], user_id)
mv_workflow_files(tmpdir, workflow.workspace_path)
# Update the workflows's and user's disk usage
store_workflow_disk_quota(workflow, bytes_to_sum=disk_usage)
update_users_disk_quota(user, bytes_to_sum=disk_usage)
# Start the workflow
parameters = {"input_parameters": input_parameters}
publish_workflow_submission(workflow, user.id_, parameters)
response_data = {
"workflow_id": workflow.id_,
"workflow_name": workflow.name,
"message": "The workflow has been successfully submitted.",
}
if validation_warnings:
response_data["message"] = (
"The workflow has been successfully submitted, but some warnings were issued."
)
response_data["validation_warnings"] = validation_warnings
return LaunchSchema().dump(response_data)
except HTTPError as e:
logging.error(traceback.format_exc())
return jsonify(e.response.json()), e.response.status_code
except json.JSONDecodeError:
logging.error(traceback.format_exc())
return (
jsonify({"message": "The workflow 'parameters' field is not valid JSON."}),
400,
)
except REANAQuotaExceededError as e:
logging.error(traceback.format_exc())
return jsonify({"message": str(e)}), 403
except (
REANAFetcherError,
REANAValidationError,
ValueError,
ValidationError,
) as e:
logging.error(traceback.format_exc())
return jsonify({"message": str(e)}), 400
except Exception:
logging.error(traceback.format_exc())
return (
jsonify({"message": "Something went wrong while fetching the workflow."}),
500,
)
finally:
# FIXME: `load_reana_spec` is not thread-safe and it changes the cwd, so for the
# time being we can only delete the files inside the directory where the
# workflow is fetched. Deleting the directory would result in a
# `FileNotFoundError` when calling `os.getcwd()`, due to the cwd not existing
# anymore.
#
# remove_fetched_workflows_dir(tmpdir)
for entry in os.scandir(tmpdir):
if entry.is_file() or entry.is_symlink():
os.remove(entry)
else:
shutil.rmtree(entry)
class LaunchSchema(Schema):
"""Marshmallow schema for ``launch`` endpoint."""
workflow_id = fields.UUID()
workflow_name = fields.Str()
message = fields.Str()
validation_warnings = fields.Dict()