Skip to content
This repository has been archived by the owner on Feb 23, 2022. It is now read-only.

Commit

Permalink
Multipart Upload Endpoints (#448)
Browse files Browse the repository at this point in the history
* Add server endpoints for multipart upload and corresponding swagger api docs
  • Loading branch information
mvandenburgh committed Aug 19, 2020
1 parent 2d07777 commit 5e7b289
Show file tree
Hide file tree
Showing 9 changed files with 199 additions and 3 deletions.
5 changes: 5 additions & 0 deletions multinet/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ def create_app(config: Optional[MutableMapping] = None) -> Flask:
CORS(app, origins=allowed_origins, supports_credentials=True)
Swagger(app, template_file="swagger/template.yaml")

# Set max file upload size to 32 MB
app.config["MAX_CONTENT_LENGTH"] = 32 * 1024 * 1024

app.secret_key = flask_secret_key()

# Set up logging.
Expand All @@ -53,6 +56,8 @@ def create_app(config: Optional[MutableMapping] = None) -> Flask:
app.register_blueprint(uploaders.nested_json.bp, url_prefix="/api/nested_json")
app.register_blueprint(uploaders.d3_json.bp, url_prefix="/api/d3_json")

app.register_blueprint(uploaders.multipart_upload.bp, url_prefix="/api/uploads")

app.register_blueprint(downloaders.csv.bp, url_prefix="/api")
app.register_blueprint(downloaders.d3_json.bp, url_prefix="/api")

Expand Down
50 changes: 49 additions & 1 deletion multinet/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import copy
from functools import lru_cache
from uuid import uuid4

from arango import ArangoClient
from arango.graph import Graph
Expand Down Expand Up @@ -38,6 +39,7 @@
TableNotFound,
GraphNotFound,
NodeNotFound,
UploadNotFound,
AlreadyExists,
GraphCreationError,
AQLExecutionError,
Expand Down Expand Up @@ -93,7 +95,8 @@ def register_legacy_workspaces() -> None:
sysdb = db("_system")
coll = workspace_mapping_collection()

databases = {name for name in sysdb.databases() if name != "_system"}
system_databases = {"_system", "uploads"}
databases = {name for name in sysdb.databases() if name not in system_databases}
registered = {doc["internal"] for doc in coll.all()}

unregistered = databases - registered
Expand Down Expand Up @@ -608,3 +611,48 @@ def count_text(filt: str) -> str:
"edges": list(aql_query(workspace, query)),
"count": next(aql_query(workspace, count)),
}


@lru_cache(maxsize=1)
def uploads_database() -> StandardDatabase:
"""Return the database used for storing multipart upload collections."""
sysdb = db("_system")
if not sysdb.has_database("uploads"):
sysdb.create_database("uploads")
return db("uploads")


def create_upload_collection() -> str:
"""Insert empty multipart upload temp collection."""
uploads_db = uploads_database()
upload_id = f"u-{uuid4().hex}"
uploads_db.create_collection(upload_id)
return upload_id


def insert_file_chunk(upload_id: str, sequence: str, chunk: str) -> str:
"""Insert b64-encoded string `chunk` into temporary collection."""
uploads_db = uploads_database()
if not uploads_db.has_collection(upload_id):
raise UploadNotFound(upload_id)

collection = uploads_db.collection(upload_id)

if collection.get(sequence) is not None:
raise AlreadyExists("Upload Chunk", f"{upload_id}/{sequence}")

collection.insert({sequence: chunk, "_key": sequence})

return upload_id


def delete_upload_collection(upload_id: str) -> str:
"""Delete a multipart upload collection."""
uploads_db = uploads_database()

if not uploads_db.has_collection(upload_id):
raise UploadNotFound(upload_id)

uploads_db.delete_collection(upload_id)

return upload_id
10 changes: 9 additions & 1 deletion multinet/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ class RequiredParamsMissing(ServerError):

def __init__(self, missing: List[str]):
"""Initialize the exception."""
self.missing = missing
self.missing = {"missing": missing}

def flask_response(self) -> FlaskTuple:
"""Generate a 400 error."""
Expand Down Expand Up @@ -242,3 +242,11 @@ def __init__(self, message: str = ""):
def flask_response(self) -> FlaskTuple:
"""Generate a 400 error."""
return (self.message, "400 Error during AQL Execution")


class UploadNotFound(NotFound):
"""Exception for attempting to upload a chunk to a nonexistant upload collection."""

def __init__(self, upload_id: str):
"""Initialize the exception."""
super().__init__("Upload", upload_id)
11 changes: 11 additions & 0 deletions multinet/swagger/template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,15 @@ parameters:
type: string
example: key0

upload_id:
name: upload_id
in: path
description: The collection ID associated with this upload
required: true
schema:
type: string
example: u-1234abcd

direction:
name: direction
description: The type of edges to retrieve
Expand Down Expand Up @@ -226,5 +235,7 @@ tags:
description: Table retrieval, inspection, creation, and deletion
- name: uploader
description: Uploaders for various input data formats
- name: uploads
description: Endpoints for multipart upload operations
- name: user
description: Endpoints for user operations
2 changes: 1 addition & 1 deletion multinet/uploaders/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
"""Uploader blueprints for various filetypes."""
from . import csv, nested_json, newick, d3_json # noqa: F401
from . import csv, nested_json, newick, d3_json, multipart_upload # noqa: F401
48 changes: 48 additions & 0 deletions multinet/uploaders/multipart_upload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""Multinet uploader for multi-part uploaded files."""
from base64 import b64encode

from multinet import db, util

from flasgger import swag_from
from flask import Blueprint, request
from webargs import fields
from webargs.flaskparser import use_kwargs

# Import types
from typing import Any

from multinet.errors import RequiredParamsMissing

bp = Blueprint("uploads", __name__)
bp.before_request(util.require_db)


@bp.route("", methods=["POST"])
@swag_from("swagger/create_upload.yaml")
def create_upload() -> str:
"""Create a collection for multipart upload."""
return db.create_upload_collection()


@bp.route("/<upload_id>/chunk", methods=["POST"])
@use_kwargs({"sequence": fields.Str(required=True)})
@swag_from("swagger/chunk_upload.yaml")
def chunk_upload(upload_id: str, sequence: str) -> Any:
"""Upload a chunk to the specified collection."""
chunk = dict(request.files).get("chunk")

if chunk is None:
raise RequiredParamsMissing(["chunk"])

# convert bytes to base64 string since arango doesn't support binary blobs
stringified_blob = b64encode(chunk.read()).decode("ascii")

db.insert_file_chunk(upload_id, sequence, stringified_blob)
return sequence


@bp.route("/<upload_id>", methods=["DELETE"])
@swag_from("swagger/delete_upload_collection.yaml")
def delete_upload_collection(upload_id: str) -> Any:
"""Delete the database collection associated with the given upload_id."""
return db.delete_upload_collection(upload_id)
46 changes: 46 additions & 0 deletions multinet/uploaders/swagger/chunk_upload.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
Upload one chunk of data to the corresponding `uploads` document
---
parameters:
- $ref: "#/parameters/upload_id"
- name: sequence
in: query
description: The sequence number of the multipart upload
required: true
schema:
type: number
example: 0
- name: chunk
description: Raw file chunk data
required: true
in: formData
type: file

responses:
200:
description: The sequence number of the multipart upload
schema:
type: number
example: 0

400:
description: Missing required parameters
schema:
type: array
items:
type: string
example: {"missing": ['chunk'] }

404:
description: Collection with specified `upload_id` does not exist
schema:
type: string
example: u-1234abcd

409:
description: Document with specified sequence number already exists
schema:
type: number
example: 0

tags:
- uploads
11 changes: 11 additions & 0 deletions multinet/uploaders/swagger/create_upload.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Create a new collection in the `uploads` database for multipart upload storage
---
responses:
200:
description: Unique ID for the collection that was created
schema:
type: string
example: u-1234abcd

tags:
- uploads
19 changes: 19 additions & 0 deletions multinet/uploaders/swagger/delete_upload_collection.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
Delete a multipart-upload collection from the database
---
parameters:
- $ref: "#/parameters/upload_id"
responses:
200:
description: The `upload_id` of the deleted collection
schema:
type: string
example: u-1234abcd

404:
description: Collection with ID of `upload_id` not found
schema:
type: string
example: u-1234abcd

tags:
- uploads

0 comments on commit 5e7b289

Please sign in to comment.