From c98aa6ad2bfbd585940c3a2316d8e6f87a89c636 Mon Sep 17 00:00:00 2001 From: Brandon Date: Fri, 7 May 2021 02:17:24 -0400 Subject: [PATCH 01/10] interim updates --- markdown/documentation/dms/index.md | 2 +- .../configuration/prereq/buckets.md | 2 +- .../maestro/installation/installation.md | 2 +- markdown/documentation/score/_contents.yaml | 37 +++ markdown/documentation/score/index.md | 49 ++++ .../score/installation/authentication.md | 73 ++++++ .../installation/configuration/bootstrap.md | 27 +++ .../score/installation/configuration/index.md | 12 + .../configuration/object-storage.md | 80 +++++++ .../installation/configuration/profiles.md | 119 ++++++++++ .../score/installation/configuration/song.md | 24 ++ .../documentation/score/installation/index.md | 3 + .../score/installation/installation.md | 109 +++++++++ .../score/user-guide/client-setup.md | 216 ++++++++++++++++++ .../score/user-guide/commands.md | 9 + .../score/user-guide/download.md | 216 ++++++++++++++++++ .../documentation/score/user-guide/index.md | 3 + .../documentation/score/user-guide/upload.md | 216 ++++++++++++++++++ .../song/installation/auth-config.md | 4 +- .../installation/configuration/profiles.md | 5 +- .../song/installation/installation.md | 4 +- 21 files changed, 1203 insertions(+), 9 deletions(-) create mode 100644 markdown/documentation/score/_contents.yaml create mode 100644 markdown/documentation/score/index.md create mode 100644 markdown/documentation/score/installation/authentication.md create mode 100644 markdown/documentation/score/installation/configuration/bootstrap.md create mode 100644 markdown/documentation/score/installation/configuration/index.md create mode 100644 markdown/documentation/score/installation/configuration/object-storage.md create mode 100644 markdown/documentation/score/installation/configuration/profiles.md create mode 100644 markdown/documentation/score/installation/configuration/song.md create mode 100644 markdown/documentation/score/installation/index.md create mode 100644 markdown/documentation/score/installation/installation.md create mode 100644 markdown/documentation/score/user-guide/client-setup.md create mode 100644 markdown/documentation/score/user-guide/commands.md create mode 100644 markdown/documentation/score/user-guide/download.md create mode 100644 markdown/documentation/score/user-guide/index.md create mode 100644 markdown/documentation/score/user-guide/upload.md diff --git a/markdown/documentation/dms/index.md b/markdown/documentation/dms/index.md index c2c23313..564fda4e 100644 --- a/markdown/documentation/dms/index.md +++ b/markdown/documentation/dms/index.md @@ -25,7 +25,7 @@ Illustrated above are the five core Overture components: | Component | Purpose | | --------------------| ------------| -| [Score](/products/score) | Manages cloud-based data object storage and transfer. | +| [Score](/documentation/score) | Manages cloud-based data object storage and transfer. | | [Song](/documentation/song) | Manages the metadata associated with the data objects. | | [Maestro](/documentation/maestro) | Indexes the metadata in Song into [Elasticsearch](https://www.elastic.co/). | | [Arranger](/documentation/arranger) | Generates an easily-configurable web portal interface with faceted search against the Elasticsearch index. | diff --git a/markdown/documentation/dms/installation/configuration/prereq/buckets.md b/markdown/documentation/dms/installation/configuration/prereq/buckets.md index 0840b73d..9d095437 100644 --- a/markdown/documentation/dms/installation/configuration/prereq/buckets.md +++ b/markdown/documentation/dms/installation/configuration/prereq/buckets.md @@ -2,7 +2,7 @@ title: Setup Data Storage Buckets --- -The [Score service]((../../../../../score)) manages data transfer to (upload) and from (download) cloud object storage. As such, Score requires two specific buckets to get setup in advance in your storage service. These buckets are supplied as inputs to the DMS interactive configuration questionnaire later on. +The [Score service](/documentation/score) manages data transfer to (upload) and from (download) cloud object storage. As such, Score requires two specific buckets to get setup in advance in your storage service. These buckets are supplied as inputs to the DMS interactive configuration questionnaire later on. Score requires two buckets to be setup in your storage: diff --git a/markdown/documentation/maestro/installation/installation.md b/markdown/documentation/maestro/installation/installation.md index d57e1f4b..28a3c261 100644 --- a/markdown/documentation/maestro/installation/installation.md +++ b/markdown/documentation/maestro/installation/installation.md @@ -10,7 +10,7 @@ Before installing Maestro, the following software services needs to be installed |---------|---------|-------------|-------------| | [Elasticsearch](https://www.elastic.co/downloads/elasticsearch) | 7 or up | Required | For Maestro to build the index in | | [Song](https://github.com/overture-stack/SONG/releases) | Latest | Required | See [here](/documentation/song/installation) for installation instructions | -| [Apache Kafka](https://kafka.apache.org/downloads/) | Latest | Optional | Optionaly, only needed if you want to setup event-based indexing | +| [Apache Kafka](https://kafka.apache.org/downloads/) | Latest | Optional | Optional, only needed if you want to setup event-based indexing | # Installation diff --git a/markdown/documentation/score/_contents.yaml b/markdown/documentation/score/_contents.yaml new file mode 100644 index 00000000..c287a42d --- /dev/null +++ b/markdown/documentation/score/_contents.yaml @@ -0,0 +1,37 @@ +sectionSlug: score +sectionTitle: Score +pages: +- title: Introduction + url: score +- title: Installation Guide + url: score/installation + isHeading: true + pages: + - title: Installation + url: score/installation/installation + - title: Configuration + url: score/installation/configuration + isHeading: true + pages: + - title: Run Profiles + url: score/installation/configuration/profiles + - title: Song Server Integration + url: score/installation/configuration/song + - title: Object Storage Integration + url: score/installation/configuration/object-storage + - title: Other Bootstrap Properties + url: score/installation/configuration/bootstrap + - title: Authentication + url: score/installation/authentication +- title: User Guide + url: score/user-guide + isHeading: true + pages: + - title: Setting Up the Score Client + url: score/user-guide/client-setup + - title: Uploading Data + url: score/user-guide/upload + - title: Downloading Data + url: score/user-guide/download + - title: Command Reference + url: score/user-guide/commands \ No newline at end of file diff --git a/markdown/documentation/score/index.md b/markdown/documentation/score/index.md new file mode 100644 index 00000000..71456050 --- /dev/null +++ b/markdown/documentation/score/index.md @@ -0,0 +1,49 @@ +--- +title: Introduction +--- + +Score facilitates the transfer and storage of your data seamlessly and flexibly for cloud-based projects. This storage and transfer system helps you manage data upload and download with powerful features such as file bundling and resumable downloads. + +Score uses the concept of pre-signed URLs (see Amazon S3 definition [here](https://docs.aws.amazon.com/AmazonS3/latest/userguide/ShareObjectPreSignedURL.html)) to manage data transfer to and from your cloud storage provider. As such, Score can be thought of as a broker between an object storage system (such as Amazon S3) and the user authorization system, with the responsibility of validating user access and generating the pre-signed URLs required for object access. + +Working together, Song and Score enable secure and distributed data management. +Score works with object-based storage including Amazon Web Services S3, Azure Storage, +and Openstack Ceph to enable file upload and download that can be parallelized into multiple +parts and easily resumed with high integrity for a fault-tolerant data transfer. Specific features to +support genomic data have been built into Song and Score: file bundling to match genomic files +with their index files, and slicing of a sequencing read file for a targeted region instead of +downloading the whole file. + +# Features + +## Support for Multiple Storage Providers + +Score currently supports data transfer with several popular cloud-based storage providers: + +* [Amazon S3](https://aws.amazon.com/s3/) +* [Microsoft Azure Storage](https://azure.microsoft.com/en-ca/services/storage/) +* [Openstack](https://www.openstack.org/) with [Ceph](https://ceph.io/) +* [Minio](https://min.io/) + +## Multipart Uploads and Downloads + +To enable high performance transfers, Scores supports multipart file uploads and downloads. By implementing a multipart transfer solution, Score provides several key benefits: + +* File downloads can be done in parts, being paused and resumed as required by the user +* File transfers will automatically resumed if paused or interrupted mid-transfer (e.g. due to connection issues) +* Parallelization of these transfer operations makes upload and download of files efficietn and fast + +## Data Integrity + +Score performs standard [MD5 validation](https://www.ietf.org/rfc/rfc1321.txt) against all file uploads and downloads to check for corrupted files and ensure data integrity. + +## Applications to Genomics + +Similar to other products in the [Overture](https://www.overture.bio/products/) software suite, Score has particularly useful applications in the field of Genomics, including the following features: + +* Ability to slice BAM and CRAM files by genomic regions using integrated command line tools +* Integration of other samtools functionality in the Score client, such as ability to view reads from a BAM file + +# Integrations + +As a data transfer management system, Score is focused on managing data upload and download, and does not handle the complexities of file metadata validation. To handle this, Score is built to interact with a required companion application, [Song](/documentation/song). Song is responsibe for validating file metadata, assigning unique global identifiers for data management, assigning permisssions for open (public) versus controlled (authentication required) file access, and so on. \ No newline at end of file diff --git a/markdown/documentation/score/installation/authentication.md b/markdown/documentation/score/installation/authentication.md new file mode 100644 index 00000000..15c39eda --- /dev/null +++ b/markdown/documentation/score/installation/authentication.md @@ -0,0 +1,73 @@ +--- +title: Authentication +--- + +# Application Authentication & Authorization + +For an application to securely interact with Score, authentication and authorization must be provided. This ensures unauthorized users cannot access Score's API endpoints. To authorize properly with Score, either an authorized user's valid API key with appropriate scopes (permissions) must be supplied, or application-to-application authorization must be enabled following the [OAuth 2.0](https://oauth.net/2/) protocol. + +Although configuring authentication and authorization is technically optional, it is **highly recommended**, especially for production environments. Settings are configured in the `auth` section of the `score-server-[version]/conf/application.properties` file, using these profiles: + +| Profile | Requirement | Description | +|---------|-------------|-------------| +| secure | Required if using Ego | If the [Overture](https://overture.bio) product [Ego](/documentation/ego) is used as the authentication service for Score, this profile is required. It enables authentication for requests to the Score API using API keys issued by Ego. | +| jwt | Optional | Optionally, you can use this profile to support both JWT ([JSON Web Tokens](https://jwt.io/)) and API Key authentication for requests to Score. | + +# Secure Profile Example + +The `secure` profile is required if the [Overture](https://overture.bio) product [Ego](/documentation/ego) is used as the authentication service for Score. It enables authentication for requests to the Score API using API keys issued by Ego. + +To configure authentication and authorization via Ego, in the `score-server-[version]/conf/application.properties` file, make sure the `secure` profile exists and configure these settings in the `auth -> server` section: + +| Setting | Requirement | Description | +|---------|-------------|-------------| +| `url` | Required | URL to the Ego API endpoint that is used to authenticate a user's API key (token). Specify the host and port where the endpoint is hosted. The endpoint to use is `/oauth/check_token`. See the example below for guidance. | +| `tokenName` | Required | Name used to identify a token. Typically you should leave this set to the default value, `token`. | +| `clientId` | Required | This is the client ID for the Score application as configured in Ego. | +| `clientSecret` | Required | This is the client secret for the Score application as configured in Ego. | +| `scope -> download -> system` | Required | Scope (permission) that a user's API key must have to enable system-level downloads from Score. Typically you should leave this set to the default value, `score.READ`. | +| `scope -> download -> study -> prefix` | Required | Prefix that must come before the Song study name when assigning study-level download scopes (permissions) for Score. Typically you should leave this set to the default value, `score.`. | +| `scope -> download -> study -> suffix` | Required | Suffix that must come after the Song study name when assigning study-level download scopes (permissions) for Score. Typically you should leave this set to the default value, `.READ`. | +| `scope -> upload -> system` | Required | Scope (permission) that a user's API key must have to enable system-level uploads to Score. Typically you should leave this set to the default value, `score.READ`. | +| `scope -> upload -> study -> prefix` | Required | Prefix that must come before the Song study name when assigning study-level upload scopes (permissions) for Score. Typically you should leave this set to the default value, `score.`. | +| `scope -> upload -> study -> suffix` | Required | Suffix that must come after the Song study name when assigning study-level upload scopes (permissions) for Score. Typically you should leave this set to the default value, `.READ`. | + +For example: + +``` yaml +auth: + server: + url: "https://:/oauth/check_token" + tokenName: "token" + clientId: + clientSecret: + scope: + download: + system: "score.READ" + study: + prefix: "score." + suffix: ".READ" + upload: + system: "score.WRITE" + study: + prefix: "score." + suffix: ".WRITE" +``` + +# JWT Profile Example + +The `jwt` profile can be optionally used if you want to support both JWT and API Key authentication for requests to Score. Note that JWT authentication cannot be configured standalone, it still requires the aforementioned API key authentication to be setup first. + +To make use of JWT authentication, in the `score-server-[version]/conf/application.properties` file, make sure the `jwt` profile exists and configure these settings in the `auth -> jwt` section: + +| Setting | Requirement | Description | +|---------|-------------|-------------| +| `publicKeyUrl` | Required | URL to the Ego API endpoint that is used to retrieve a user's public key . Specify the host and port where the endpoint is hosted. The endpoint to use is `/oauth/token/public_key`. See the example below for guidance. | + +For example: + +```yaml +auth: + jwt: + publicKeyUrl: "https://:/oauth/token/public_key" +``` \ No newline at end of file diff --git a/markdown/documentation/score/installation/configuration/bootstrap.md b/markdown/documentation/score/installation/configuration/bootstrap.md new file mode 100644 index 00000000..4d818607 --- /dev/null +++ b/markdown/documentation/score/installation/configuration/bootstrap.md @@ -0,0 +1,27 @@ +--- +title: Other Bootstrap Properties +--- + +In addition to the `score-server-[version]/conf/application.properties` file that is created by default when you unzip the distribution, you must also create another file in the same `conf` folder. This file, `bootstrap.properties`, will contain some additional configurations required by the Score server. + +Assuming the directory path of the distribution is `$SCORE_SERVER_HOME`, do the following: + +1. Switch to the Score server configuration folder: + +```bash +$ cd $SCORE_SERVER_HOME/conf +``` + +2. Using the text editor of your choice, create a new file in the `/conf` directory named `bootstrap.properties`, and add the following settings: + + Setting | Requirement | Description | +|---------|-------------|-------------| +| `spring.cloud.vault.enabled` | Required | If [HashiCorp's Vault](https://www.vaultproject.io/) solution is being used to manage your authentication secrets, set this to `true`. Else, set this to `false`. Typically, most deployments will no be using Vault and hence this value should be defaulted to `false`. | + +For example: + +```yaml +spring.cloud.vault.enabled=false +``` + +3. Save the file. \ No newline at end of file diff --git a/markdown/documentation/score/installation/configuration/index.md b/markdown/documentation/score/installation/configuration/index.md new file mode 100644 index 00000000..b6208ad4 --- /dev/null +++ b/markdown/documentation/score/installation/configuration/index.md @@ -0,0 +1,12 @@ +--- +title: Configuring Score +--- + +There are several required components to configure for the Score server. These include: + +- [Run Profiles](/documentation/score/installation/configuration/profiles) +- [Song Server Integration](/documentation/score/installation/configuration/song) +- [Object Storage Integration](/documentation/score/installation/configuration/object-storage) +- [Other Bootstrap Properties](/documentation/score/installation/configuration/bootstrap) + +All of these configurations are managed within the same file, `score-server-[version]/conf/application.properties`. \ No newline at end of file diff --git a/markdown/documentation/score/installation/configuration/object-storage.md b/markdown/documentation/score/installation/configuration/object-storage.md new file mode 100644 index 00000000..863fb937 --- /dev/null +++ b/markdown/documentation/score/installation/configuration/object-storage.md @@ -0,0 +1,80 @@ +--- +title: Object Storage Integration +--- + +As indicated earlier, one of the prerequisite [dependencies](/documentation/score/) needed before installing Score is to setup a cloud object storage (see [Configuring Storage Providers](/documentation/score/installation#configuring-storage-providers)). Score uses the storage as a location for uploading data to and downloading data from. + +Once your object storage is setup, Score can be configured to connect to it using an appropriate profile in the `score-server-[version]/conf/application.properties` file, depending on your storage provider: + +| Profile | Purpose | +|---------|---------| +| default | Required if using [AWS](https://aws.amazon.com/s3/), [Ceph](https://ceph.io/), or [Minio](https://min.io/) as your object storage. | +| azure | Required if using Microsoft [Azure](https://azure.microsoft.com/en-ca/services/storage/) as your object storage. | + +# Default Profile Example + +To connect Score to AWS, Ceph, or Minio storage, in the `score-server-[version]/conf/application.properties` file, make sure the `default` profile exists and configure these settings: + +| Setting | Requirement | Description | +|---------|-------------|-------------| +| `s3 -> endpoint` | Required | URL of the storage service's API endpoint. Score will send requests to this URL when interacting with the service's API. | +| `s3 -> accessKey` | Required | Access key required to access the buckets in your object storage. You should have recorded this as part of your prequisite setup. | +| `s3 -> secretKey` | Required | Secret key required to access the buckets in your object storage. You should have recorded this as part of your prequisite setup. | +| `s3 -> sigV4Enabled` | Required | If your storage service uses the AWS S3 [Signature Version 4](https://docs.aws.amazon.com/general/latest/gr/signature-version-4.html) signing process for authentication, set this to `true`. Else set this to `false`. | +| `bucket -> name.object` | Required | ID of the bucket used to store object data for Score. You should have recorded this as part of your prequisite setup. | +| `bucket -> name.state` | Required | ID of the bucket used to store and maintain state information for Score. You should have recorded this as part of your prequisite setup. | +| `upload -> partsize` | Required | Size, expressing in bytes, of each part or chunk to upload at once to the object storage. You can use this parameter to adjust for your desired speed and performance. | +| `upload -> retry.limit` | Required | Number of times that Score will retry failed upload requests to the object storage before aborting. | +| `upload -> connectionltimeout` | Required | Number of milliseconds that Score will wait on a stale or idle connection to the object storage before timing out. | +| `upload -> clean.cron` | Optional | If `clean.enabled` = `true`, use this parameter to configure the schedule on which the cleanup cron job runs. The cleanup job cleans up old upload jobs, between Score and the object storage. | +| `upload -> clean.enabled` | Optional | Set to `true` if you want to run a cron job that cleans up old upload jobs between Score and the object storage. If `true`, the cron schedule can be set with the `clean.cron` parameter. Else set this value `false` if you do not want any cleanup. | + +For example: + +```yaml +s3: + endpoint: "http://localhost:9000" + accessKey: abc123 + secretKey: abc123 + sigV4Enabled: true + +bucket: + name.object: test_object_bucket + name.state: test_state_bucket + size.pool: 0 + size.key: 2 + +upload: + partsize: 1048576 + retry.limit: 10 + connection.timeout: 60000 + clean.cron: “0 0 0 * * ?” + clean.enabled: true +``` + +# Azure Profile Example + +To connect Score to Microsoft Azure storage, in the `score-server-[version]/conf/application.properties` file, make sure the `azure` profile exists and configure these settings: + +| Setting | Requirement | Description | +|---------|-------------|-------------| +| `azure -> endpointProtocol` | Required | Indicates the communication protocol used by the Azure storage service's API endpoint. For example, `https`. | +| `azure -> accountName` | Required | Account name required to access your Azure objectd storage. You should have recorded this as part of your prequisite setup. | +| `azure -> accountKey` | Required | Account key required to access your Azure objectd storage. You should have recorded this as part of your prequisite setup. | +| `bucket -> name.object` | Required | ID of the bucket used to store object data for Score. You should have recorded this as part of your prequisite setup. | +| `upload -> partsize` | Required | Size, expressing in bytes, of each part or chunk to upload at once to the object storage. You can use this parameter to adjust for your desired speed and performance. | + +For example: + +```yaml +azure: + endpointProtocol: https + accountName: abc123 + accountKey: abc123 + +bucket: + name.object: test_object_bucket + +upload: + partsize: 1048576 +``` \ No newline at end of file diff --git a/markdown/documentation/score/installation/configuration/profiles.md b/markdown/documentation/score/installation/configuration/profiles.md new file mode 100644 index 00000000..fbeb17e4 --- /dev/null +++ b/markdown/documentation/score/installation/configuration/profiles.md @@ -0,0 +1,119 @@ +--- +title: Run Profiles +--- + +Score uses [Spring Profiles](https://docs.spring.io/spring-boot/docs/1.2.0.M1/reference/html/boot-features-profiles.html) as a feature to manage the running of a Score server in different environments or when integrating with different services. For example, spring profiles allows different configuration settings to be applied depending on the type of object storage service being used. + +During configuration, you will need to enable the active profiles in the `score-server-[version]/conf/application.properties` file. The active profiles to use for a particular configuration can be specified using the `profiles` argument which should be added at the start of the `spring` block, for example: + +```yaml +spring: + profiles: + active: "default,prod,secure,jwt" + +``` +Descriptions of the profiles available to Score are provided below. Depending on the type of configuration, some profiles are required to run and some are optional. + +| Profile | Requirement | Description | +|---------|-------------|-------------| +| default | Required if using [AWS](https://aws.amazon.com/s3/), [Ceph](https://ceph.io/), or [Minio](https://min.io/) storage | You must use the default profile to configure your object storage if using AWS, Ceph, or Minio. | +| azure | Required if using [Azure](https://azure.microsoft.com/en-ca/services/storage/) storage | You must use the Azure profile to configure your object storage if using Microsoft Azure. | +| prod | Required | Used for production deployments and to specify the Song metadata server that Score must interact with. | +| secure | Required if using Ego | If the [Overture](https://overture.bio) product [Ego](/documentation/ego) is used as the authentication service for Score, this profile is required. It enables authentication for requests to the Score API using API keys issued by Ego. | +| jwt | Optional | Optionally, you can use this profile to support both JWT ([JSON Web Tokens](https://jwt.io/)) and API Key authentication for requests to Score. | + +# Default + +The `default` profile is required if using AWS, Ceph, or Minio as your object storage. It contains configuration settings that are common to these service providers. For detailed steps on configuring your object storage, see [Object Storage Integration](/documentation/score/installation/configuration/object-storage). + +For example: + +```yaml +s3: + endpoint: "http://localhost:9000" + accessKey: abc123 + secretKey: abc123 + sigV4Enabled: true + +bucket: + name.object: test_object_bucket + name.state: test_state_bucket + size.pool: 0 + size.key: 2 + +upload: + partsize: 1048576 + retry.limit: 10 + connection.timeout: 60000 + clean.cron: “0 0 0 * * ?” + clean.enabled: true +``` + +# Azure + +The `azure` profile is required if using Microsoft Azure storage as your object storage. It contains configuration settings specific for Azure. For details on configuring your object storage, see [Object Storage Integration](/documentation/score/installation/configuration/object-storage). + +For example: + +```yaml +azure: + endpointProtocol: https + accountName: abc123 + accountKey: abc123 + +bucket: + name.object: test_object_bucket + +upload: + partsize: 1048576 +``` + +# Prod + +The `prod` profile is used to enable production deployments and most importantly requires you to specify the Song metadata server that Score must interact with. For details on integrating with the Song server, see [Song Server Integration](/documentation/score/installation/configuration/song). + +For example: + +```yaml +metadata: + url: "http://localhost:8089/" + ssl.enabled: false +``` + +# Secure + +The `secure` profile is required if the [Overture](https://overture.bio) product [Ego](/documentation/ego) is used as the authentication service for Score. It enables authentication for requests to the Score API using API keys issued by Ego. For details on configuring authentication, see [Authentication](documentation/score/installation/authentication). + +For example: + +``` yaml +auth: + server: + url: https://localhost:8081/oauth/check_token + tokenName: token + clientId: score + clientSecret: scoresecret + scope: + download: + system: score.READ + study: + prefix: score. + suffix: .READ + upload: + system: score.WRITE + study: + prefix: score. + suffix: .WRITE +``` + +# JWT + +The `jwt` profile can be optionally used if you want to support both JWT and API Key authentication for requests to Score. Note that JWT authentication cannot be configured standalone, it still requires the aforementioned API key authentication to be setup first. For details on configuring authentication, see [Authentication](documentation/score/installation/authentication). + +For example: + +```yaml +auth: + jwt: + publicKeyUrl: "https://localhost:8443/oauth/token/public_key" +``` \ No newline at end of file diff --git a/markdown/documentation/score/installation/configuration/song.md b/markdown/documentation/score/installation/configuration/song.md new file mode 100644 index 00000000..d1bca89d --- /dev/null +++ b/markdown/documentation/score/installation/configuration/song.md @@ -0,0 +1,24 @@ +--- +title: Song Server Integration +--- + +As a data transfer management system, Score is focused on managing data upload and download, and does not handle the complexities of file metadata validation. To handle this, Score is built to interact with a required companion application, [Song](/documentation/song). Song is responsibe for validating file metadata, assigning unique global identifiers for data management, assigning permisssions for open (public) versus controlled (authentication required) file access, and so on. + +As such, a Song server must be setup for use with Score. See [here](/documentation/song/installation/installation/) for instructions on how to deploy a Song server. Once the Song server is setup, Score can be configured to connect to it using the `prod` profile in the `score-server-[version]/conf/application.properties` file. + +# Configuration Example + +To connect Score to your Song server, in the `score-server-[version]/conf/application.properties` file, make sure the `prod` profile exists and configure these settings: + +| Setting | Requirement | Description | +|---------|-------------|-------------| +| `url` | Required | URL to the Song server API that you have setup. When communicating with Song, Score will make requests via this API. | +| `ssl.enabled` | Optional | If using SSL encryption to securely communicate with Song, set this to `true`. Else if not using SSL, set this to `false.` | + +For example: + +```yaml +metadata: + url: "http://localhost:8089/" + ssl.enabled: false +``` \ No newline at end of file diff --git a/markdown/documentation/score/installation/index.md b/markdown/documentation/score/installation/index.md new file mode 100644 index 00000000..5bc82334 --- /dev/null +++ b/markdown/documentation/score/installation/index.md @@ -0,0 +1,3 @@ +--- +title: Installation +--- \ No newline at end of file diff --git a/markdown/documentation/score/installation/installation.md b/markdown/documentation/score/installation/installation.md new file mode 100644 index 00000000..1e4b8569 --- /dev/null +++ b/markdown/documentation/score/installation/installation.md @@ -0,0 +1,109 @@ +--- +title: Installing Score +--- + +# Dependencies + +Before installing Score, the following software services needs to be installed and running: + +| Service | Version | Requirement | Notes | +|---------|---------|-------------|-------------| +| [Java Runtime Environment (JRE)](https://www.oracle.com/java/technologies/javase-downloads.html) | 11 or up | Required || +| [Song](https://github.com/overture-stack/SONG/releases) | Latest | Required | Required as companion application to Score for metadata validation and management. See [here](/documentation/song/installation) for installation instructions. | +| Object Storage | Latest | Required | You must setup an object store with one of the supported cloud-based storage providers. This is required for Score to have a location for uploading data to and downloading data from. See [Configuring Storage Providers](#configuring-storage-providers) for details. | + +## Configuring Storage Providers + +Score currently supports data transfer with several popular cloud-based storage providers: + +* [Amazon S3](https://aws.amazon.com/s3/) +* [Microsoft Azure Storage](https://azure.microsoft.com/en-ca/services/storage/) +* [Openstack](https://www.openstack.org/) with [Ceph](https://ceph.io/) +* [Minio](https://min.io/) + +To setup your object storage for Score to use: + +1. First register with the provider of your choice and follow their instructions for setting up and configuring their service. Follow the links provided above as a starting point; however note that each provider will may have different setup requirements and steps, so follow their procedures as directed. + +2. Once you have an object storage created with your provider, you must create two data buckets for Score to use: + +* A bucket to store object data +* A bucket to store and maintain state information + +Create these buckets and remember both their IDs, as they will be required as inputs later during Score configuration. + +3. For certain storage services, a `/data` sub-folder must be created in advance in each bucket. Please check with your service provider's configuration requirements. For example, currently Openstack with Ceph requires this sub-folder when used by Score, while other services such as Amazon and MinIo do not. + +4. Make sure to note the **URL**, **access key**, and **secret key** used to access your storage service, as these are also required as input during Score configuration. **Keep these values safe and secure**. + +5. If specifically using Amazon S3, makes ure to note the geographic **Region** where you have configured your buckets to be stored, as this is also required as an input during Score configuration. + +If assistance is required, you may need to contact support with your specific storage provider. + +# Installation + +## Distribution + +Official Score releases can be found on [Github](https://github.com/overture-stack/SCORE/releases). Each release contains notes with a description of the bug fixes, new features or enhancements, breaking changes, and links to downloads and change logs. + +The latest distribution can be downloaded using this command: + +```bash + curl https://artifacts.oicr.on.ca/artifactory/dcc-release/bio/overture/score-server/[RELEASE]/score-server-[RELEASE]-dist.tar.gz -Ls -o score-server-dist.tar.gz +``` + +* Where `[RELEASE]` is the specific 3-digit release number you wish to download (e.g. `5.3.0`) + +The distribution contains the default configuration and jars for running the server. To unzip the distribution, run this command: + +```bash +tar zxvf score-server-dist.tar.gz +``` + +Note that once unzipped, the final directory will be suffixed with the latest release number of the distribution. + +## Feature Configuration + +There are several required components to configure for the Score server. These include: + +- [Run Profiles](/documentation/score/installation/configuration/profiles) +- [Song Server Integration](/documentation/score/installation/configuration/song) +- [Object Storage Integration](/documentation/score/installation/configuration/object-storage) +- [Other Bootstrap Properties](/documentation/score/installation/configuration/bootstrap) + +The details of each of these configurations are covered in the [Configuration](/documentation/score/installation/configuration/) section. + +## Authentication + +For an application to securely interact with Score, authentication and authorization must be provided. This ensures unauthorized users cannot access Score's API endpoints. To authorize properly with Score, either an authorized user's valid API key with appropriate scopes (permissions) must be supplied, or application-to-application authorization must be enabled following the [OAuth 2.0](https://oauth.net/2/) protocol. + +Although configuring authentication and authorization is technically optional, it is **highly recommended**, especially for production environments. The details for configuring authentication and authorization are covered in the [Authentication](/documentation/score/installation/authentication) section. + +# Running as a Service + +Although the Score server distribution can be run as a standalone application, it must be manually started or stopped by the user. For a long-running server, sudden power loss or a hard reboot would mean the standalone application would need to be restarted manually. However, if the Score server distribution is run as a service, the operating system would be responsible for automatically restarting the service upon reboot. For this reason, the distribution should be configured as a service that is always started on boot. + +## Linux (SysV) + +Assuming the directory path of the distribution is `$SCORE_SERVER_HOME` and [pre-requisites](/documentation/score/installation/installation/#dependencies) are correctly setup, the following steps will register the Score server as a SysV service on any Linux host supporting SysV and configure it to start on boot: + +``` bash +# Register the Score service +sudo ln -s $SCORE_SERVER_HOME/bin/score-server /etc/init.d/score-server + +# Start on boot (defaults) +sudo update-rc.d score-server defaults +``` + +The Score server can also be manually managed using several commands: + +``` bash +# Start the service +sudo service score-server start + +# Stop the service +sudo service score-server stop + +# Restart the service +sudo service score-server restart +``` \ No newline at end of file diff --git a/markdown/documentation/score/user-guide/client-setup.md b/markdown/documentation/score/user-guide/client-setup.md new file mode 100644 index 00000000..37a06fdf --- /dev/null +++ b/markdown/documentation/score/user-guide/client-setup.md @@ -0,0 +1,216 @@ +--- +title: Setting Up the Score Client +--- + +# Indexing Methods + +Once Maestro is installed, configured, and running, it is now available to receive requests and index data into Elasticsearch. + +If Maestro is enabled to use event-driven indexing with Kafka, then it will listen for specific messages from the [configured Kafka topics](/documentation/maestro/installation/configuration#configuring-kafka-topics). Otherwise, without Kafka, Maestro can receive requests over the HTTP JSON API. + +Maestro has several ways to index data, described in the following sub-sections. + +## Maestro API + +Maestro provides an API based on the [OpenAPI specification](https://swagger.io/specification/) (formerly known as the Swagger specification) which allows users (manually) and applications (programmatically) to interact with its core functionality. + +One major benefit of Swagger-based APIs is that they also provide easy-to-use, interactive API documentation via a web interface. Users can manually interact with the API by issuing cURL commands via their terminal. Administrators with access to the Swagger UI can also interact with the API via the web interface. + +On a local deployment, the Maestro Swagger UI can be accessed at `http://localhost:11235/maestro/api-docs`: + +![Entity](../assets/swagger.png 'Swagger UI') + +The following are examples of how to index data at different entity levels using the API. Recall that Maestro can index data flexibly at either the repository, study, or individual analysis levels. + +## Indexing by Repository + +It is possible to index an entire Song repository at once. This operation will index all analyses in all studies in the specified repository. + +### Using cURL + +To index a repository with cURL, from your command line. execute the following: + +```shell + curl -X POST \ + http://localhost:11235/index/repository/`` \ + -H 'Content-Type: application/json' \ + -H 'cache-control: no-cache' +``` + +Where `repositoryCode` is the code representing the Song repository you want to index. + +### Using Swagger UI + +To index a repository using the Swagger UI: + +1. Go to `http://localhost:11235/maestro/api-docs` + +2. Under **management-controller**, click the `POST /index/repository/{repositoryCode}` endpoint. + +3. Click **Try it out**. + +4. In `repositoryCode`, enter the code of the Song repository you want to index. + +5. Click **Execute**. For example: + +![Entity](../assets/index-repo2.png 'Index Repo') + +### Verify Repository Indexed + +If successful, either the cURL command or the Swagger UI will return a successful response indicating the repository has been indexed. For example: + +``` +[ + { + "indexName": "file_centric_1", + "failureData": { + "failingIds": {} + }, + "successful": true + } +] +``` + +## Indexing by Study + +The most common way to index is usually by study. This operation will index all analyses in the specific study provided. + +### Using cURL + +To index a study with cURL, from your command line, execute the following: + +```shell + curl -X POST \ + http://localhost:11235/index/repository/``/study/`` \ + -H 'Content-Type: application/json' \ + -H 'cache-control: no-cache' \ +``` + +Where: +* `repositoryCode` is the code representing the Song repository that the study belongs to +* `studyId` is the ID of the study you want to index + +### Using Swagger UI + +To index a study using the Swagger UI: + +1. Go to `http://localhost:11235/maestro/api-docs` + +2. Under **management-controller**, click the `POST /index/repository/{repositoryCode}/study/{studyId}` endpoint. + +3. Click **Try it out**. + +4. In `studyId`, enter the ID fo the study you want to index. + +5. In `repositoryCode`, enter the code of the Song repository that the study belongs to. + +6. Click **Execute**. For example: + +![Entity](../assets/index-study.png 'Index Study') + +### Verify Study Indexed + +If successful, either the cURL command or the Swagger UI will return a successful response indicating the study has been indexed. For example: + +``` +[ + { + "indexName": "file_centric_1", + "failureData": { + "failingIds": {} + }, + "successful": true + } +] +``` + +## Indexing by Analysis + +Lastly, you can also index data from an individual analysis within a study. + +### Using cURL + +To index an individual analysis with cURL, from your command line, execute the following: + +```shell + curl -X POST \ + http://localhost:11235/index/repository/``/study/``/analysis/`` \ + -H 'Content-Type: application/json' \ + -H 'cache-control: no-cache' \ +``` + +Where: +* `repositoryCode` is the code representing the Song repository that the study belongs to +* `studyId` is the ID of the study that the analysis belongs to +* `analysisId` is the ID of the analysis you want to index + +### Using Swagger UI + +To index a study using the Swagger UI: + +1. Go to `http://localhost:11235/maestro/api-docs` + +2. Under **management-controller**, click the `POST /index/repository/{repositoryCode}/study/{studyId}/analysis/{analysisId}` endpoint. + +3. Click **Try it out**. + +4. In `analysisId`, enter the ID of the analysis you want to index. + +5. In `studyId`, enter the ID fo the study that the analysis belongs to. + +6. In `repositoryCode`, enter the code of the Song repository that the study belongs to. + +7. Click **Execute**. For example: + +![Entity](../assets/index-analysis.png 'Index Analysis') + +### Verify Analysis Indexed + +If successful, either the cURL command or the Swagger UI will return a successful response indicating the analysis has been indexed. For example: + +``` +[ + { + "indexName": "file_centric_1", + "failureData": { + "failingIds": {} + }, + "successful": true + } +] +``` + +# Handling Index Mapping Changes + +By default, Maestro builds the Elasticsearch index based on a default index mapping that is pre-defined. The name of this default mapping is set in the configuration file, `application.yml`. + +While this mapping is not configurable in runtime, there may be use cases where an administrator will need to change the mapping to meet their business needs. + +For example, as mentioned earlier, Song supports [dynamic schemas](/documentation/song/user-guide/schema/), which can be used to extend the base scheme with additional useful fields. + +In such a scenario, it is the administrator's responsibility to modify the mapping that Maestro uses as input. This process requires a proper migration process to be followed. + +The guidelines for such a process are as follows: + +1. Originally, an Elasticsearch index has been created initially using the base mapping, either manually by Maestro itself. + + +2. Maestro runs initially and starts indexing analyses from Song based on the original mapping. + + +3. Via Song, the administrator introduces new analysis types with new fields (dynamic schema). + + +4. Maestro will continue operating and indexing these new documents, but the new fields will not yet be indexed. + + +5. The administrator updates the existing index mapping to account for the new analysis types and new fields. + + +6. The administrator must re-index the data based on the new mapping. This can be done by either triggering Maestro by supplying the updated mapping in the configuration input, OR, can be done directly to Elasticsearch by using the Elasticsearch API. + + +7. Make sure to switch your Elasticsearch aliases to point to the new or updated index instead of the old one. + + +8. Once all of this is complete, the data will be migrated and Maestro will continue indexing based on the new mapping. \ No newline at end of file diff --git a/markdown/documentation/score/user-guide/commands.md b/markdown/documentation/score/user-guide/commands.md new file mode 100644 index 00000000..ad42f3ae --- /dev/null +++ b/markdown/documentation/score/user-guide/commands.md @@ -0,0 +1,9 @@ +--- +title: Command Reference +--- + +Here is a reference table listing all the commands and command options currently supported by the Score client. + +| Command | Short Form | Description | Options | +| -----------------| ------------| -----------| --------| +| ??? | ??? | ??? | ??? | \ No newline at end of file diff --git a/markdown/documentation/score/user-guide/download.md b/markdown/documentation/score/user-guide/download.md new file mode 100644 index 00000000..57fff990 --- /dev/null +++ b/markdown/documentation/score/user-guide/download.md @@ -0,0 +1,216 @@ +--- +title: Downloading Data +--- + +# Indexing Methods + +Once Maestro is installed, configured, and running, it is now available to receive requests and index data into Elasticsearch. + +If Maestro is enabled to use event-driven indexing with Kafka, then it will listen for specific messages from the [configured Kafka topics](/documentation/maestro/installation/configuration#configuring-kafka-topics). Otherwise, without Kafka, Maestro can receive requests over the HTTP JSON API. + +Maestro has several ways to index data, described in the following sub-sections. + +## Maestro API + +Maestro provides an API based on the [OpenAPI specification](https://swagger.io/specification/) (formerly known as the Swagger specification) which allows users (manually) and applications (programmatically) to interact with its core functionality. + +One major benefit of Swagger-based APIs is that they also provide easy-to-use, interactive API documentation via a web interface. Users can manually interact with the API by issuing cURL commands via their terminal. Administrators with access to the Swagger UI can also interact with the API via the web interface. + +On a local deployment, the Maestro Swagger UI can be accessed at `http://localhost:11235/maestro/api-docs`: + +![Entity](../assets/swagger.png 'Swagger UI') + +The following are examples of how to index data at different entity levels using the API. Recall that Maestro can index data flexibly at either the repository, study, or individual analysis levels. + +## Indexing by Repository + +It is possible to index an entire Song repository at once. This operation will index all analyses in all studies in the specified repository. + +### Using cURL + +To index a repository with cURL, from your command line. execute the following: + +```shell + curl -X POST \ + http://localhost:11235/index/repository/`` \ + -H 'Content-Type: application/json' \ + -H 'cache-control: no-cache' +``` + +Where `repositoryCode` is the code representing the Song repository you want to index. + +### Using Swagger UI + +To index a repository using the Swagger UI: + +1. Go to `http://localhost:11235/maestro/api-docs` + +2. Under **management-controller**, click the `POST /index/repository/{repositoryCode}` endpoint. + +3. Click **Try it out**. + +4. In `repositoryCode`, enter the code of the Song repository you want to index. + +5. Click **Execute**. For example: + +![Entity](../assets/index-repo2.png 'Index Repo') + +### Verify Repository Indexed + +If successful, either the cURL command or the Swagger UI will return a successful response indicating the repository has been indexed. For example: + +``` +[ + { + "indexName": "file_centric_1", + "failureData": { + "failingIds": {} + }, + "successful": true + } +] +``` + +## Indexing by Study + +The most common way to index is usually by study. This operation will index all analyses in the specific study provided. + +### Using cURL + +To index a study with cURL, from your command line, execute the following: + +```shell + curl -X POST \ + http://localhost:11235/index/repository/``/study/`` \ + -H 'Content-Type: application/json' \ + -H 'cache-control: no-cache' \ +``` + +Where: +* `repositoryCode` is the code representing the Song repository that the study belongs to +* `studyId` is the ID of the study you want to index + +### Using Swagger UI + +To index a study using the Swagger UI: + +1. Go to `http://localhost:11235/maestro/api-docs` + +2. Under **management-controller**, click the `POST /index/repository/{repositoryCode}/study/{studyId}` endpoint. + +3. Click **Try it out**. + +4. In `studyId`, enter the ID fo the study you want to index. + +5. In `repositoryCode`, enter the code of the Song repository that the study belongs to. + +6. Click **Execute**. For example: + +![Entity](../assets/index-study.png 'Index Study') + +### Verify Study Indexed + +If successful, either the cURL command or the Swagger UI will return a successful response indicating the study has been indexed. For example: + +``` +[ + { + "indexName": "file_centric_1", + "failureData": { + "failingIds": {} + }, + "successful": true + } +] +``` + +## Indexing by Analysis + +Lastly, you can also index data from an individual analysis within a study. + +### Using cURL + +To index an individual analysis with cURL, from your command line, execute the following: + +```shell + curl -X POST \ + http://localhost:11235/index/repository/``/study/``/analysis/`` \ + -H 'Content-Type: application/json' \ + -H 'cache-control: no-cache' \ +``` + +Where: +* `repositoryCode` is the code representing the Song repository that the study belongs to +* `studyId` is the ID of the study that the analysis belongs to +* `analysisId` is the ID of the analysis you want to index + +### Using Swagger UI + +To index a study using the Swagger UI: + +1. Go to `http://localhost:11235/maestro/api-docs` + +2. Under **management-controller**, click the `POST /index/repository/{repositoryCode}/study/{studyId}/analysis/{analysisId}` endpoint. + +3. Click **Try it out**. + +4. In `analysisId`, enter the ID of the analysis you want to index. + +5. In `studyId`, enter the ID fo the study that the analysis belongs to. + +6. In `repositoryCode`, enter the code of the Song repository that the study belongs to. + +7. Click **Execute**. For example: + +![Entity](../assets/index-analysis.png 'Index Analysis') + +### Verify Analysis Indexed + +If successful, either the cURL command or the Swagger UI will return a successful response indicating the analysis has been indexed. For example: + +``` +[ + { + "indexName": "file_centric_1", + "failureData": { + "failingIds": {} + }, + "successful": true + } +] +``` + +# Handling Index Mapping Changes + +By default, Maestro builds the Elasticsearch index based on a default index mapping that is pre-defined. The name of this default mapping is set in the configuration file, `application.yml`. + +While this mapping is not configurable in runtime, there may be use cases where an administrator will need to change the mapping to meet their business needs. + +For example, as mentioned earlier, Song supports [dynamic schemas](/documentation/song/user-guide/schema/), which can be used to extend the base scheme with additional useful fields. + +In such a scenario, it is the administrator's responsibility to modify the mapping that Maestro uses as input. This process requires a proper migration process to be followed. + +The guidelines for such a process are as follows: + +1. Originally, an Elasticsearch index has been created initially using the base mapping, either manually by Maestro itself. + + +2. Maestro runs initially and starts indexing analyses from Song based on the original mapping. + + +3. Via Song, the administrator introduces new analysis types with new fields (dynamic schema). + + +4. Maestro will continue operating and indexing these new documents, but the new fields will not yet be indexed. + + +5. The administrator updates the existing index mapping to account for the new analysis types and new fields. + + +6. The administrator must re-index the data based on the new mapping. This can be done by either triggering Maestro by supplying the updated mapping in the configuration input, OR, can be done directly to Elasticsearch by using the Elasticsearch API. + + +7. Make sure to switch your Elasticsearch aliases to point to the new or updated index instead of the old one. + + +8. Once all of this is complete, the data will be migrated and Maestro will continue indexing based on the new mapping. \ No newline at end of file diff --git a/markdown/documentation/score/user-guide/index.md b/markdown/documentation/score/user-guide/index.md new file mode 100644 index 00000000..85df507a --- /dev/null +++ b/markdown/documentation/score/user-guide/index.md @@ -0,0 +1,3 @@ +--- +title: User Guide +--- \ No newline at end of file diff --git a/markdown/documentation/score/user-guide/upload.md b/markdown/documentation/score/user-guide/upload.md new file mode 100644 index 00000000..8559a3a0 --- /dev/null +++ b/markdown/documentation/score/user-guide/upload.md @@ -0,0 +1,216 @@ +--- +title: Uploading Data +--- + +# Indexing Methods + +Once Maestro is installed, configured, and running, it is now available to receive requests and index data into Elasticsearch. + +If Maestro is enabled to use event-driven indexing with Kafka, then it will listen for specific messages from the [configured Kafka topics](/documentation/maestro/installation/configuration#configuring-kafka-topics). Otherwise, without Kafka, Maestro can receive requests over the HTTP JSON API. + +Maestro has several ways to index data, described in the following sub-sections. + +## Maestro API + +Maestro provides an API based on the [OpenAPI specification](https://swagger.io/specification/) (formerly known as the Swagger specification) which allows users (manually) and applications (programmatically) to interact with its core functionality. + +One major benefit of Swagger-based APIs is that they also provide easy-to-use, interactive API documentation via a web interface. Users can manually interact with the API by issuing cURL commands via their terminal. Administrators with access to the Swagger UI can also interact with the API via the web interface. + +On a local deployment, the Maestro Swagger UI can be accessed at `http://localhost:11235/maestro/api-docs`: + +![Entity](../assets/swagger.png 'Swagger UI') + +The following are examples of how to index data at different entity levels using the API. Recall that Maestro can index data flexibly at either the repository, study, or individual analysis levels. + +## Indexing by Repository + +It is possible to index an entire Song repository at once. This operation will index all analyses in all studies in the specified repository. + +### Using cURL + +To index a repository with cURL, from your command line. execute the following: + +```shell + curl -X POST \ + http://localhost:11235/index/repository/`` \ + -H 'Content-Type: application/json' \ + -H 'cache-control: no-cache' +``` + +Where `repositoryCode` is the code representing the Song repository you want to index. + +### Using Swagger UI + +To index a repository using the Swagger UI: + +1. Go to `http://localhost:11235/maestro/api-docs` + +2. Under **management-controller**, click the `POST /index/repository/{repositoryCode}` endpoint. + +3. Click **Try it out**. + +4. In `repositoryCode`, enter the code of the Song repository you want to index. + +5. Click **Execute**. For example: + +![Entity](../assets/index-repo2.png 'Index Repo') + +### Verify Repository Indexed + +If successful, either the cURL command or the Swagger UI will return a successful response indicating the repository has been indexed. For example: + +``` +[ + { + "indexName": "file_centric_1", + "failureData": { + "failingIds": {} + }, + "successful": true + } +] +``` + +## Indexing by Study + +The most common way to index is usually by study. This operation will index all analyses in the specific study provided. + +### Using cURL + +To index a study with cURL, from your command line, execute the following: + +```shell + curl -X POST \ + http://localhost:11235/index/repository/``/study/`` \ + -H 'Content-Type: application/json' \ + -H 'cache-control: no-cache' \ +``` + +Where: +* `repositoryCode` is the code representing the Song repository that the study belongs to +* `studyId` is the ID of the study you want to index + +### Using Swagger UI + +To index a study using the Swagger UI: + +1. Go to `http://localhost:11235/maestro/api-docs` + +2. Under **management-controller**, click the `POST /index/repository/{repositoryCode}/study/{studyId}` endpoint. + +3. Click **Try it out**. + +4. In `studyId`, enter the ID fo the study you want to index. + +5. In `repositoryCode`, enter the code of the Song repository that the study belongs to. + +6. Click **Execute**. For example: + +![Entity](../assets/index-study.png 'Index Study') + +### Verify Study Indexed + +If successful, either the cURL command or the Swagger UI will return a successful response indicating the study has been indexed. For example: + +``` +[ + { + "indexName": "file_centric_1", + "failureData": { + "failingIds": {} + }, + "successful": true + } +] +``` + +## Indexing by Analysis + +Lastly, you can also index data from an individual analysis within a study. + +### Using cURL + +To index an individual analysis with cURL, from your command line, execute the following: + +```shell + curl -X POST \ + http://localhost:11235/index/repository/``/study/``/analysis/`` \ + -H 'Content-Type: application/json' \ + -H 'cache-control: no-cache' \ +``` + +Where: +* `repositoryCode` is the code representing the Song repository that the study belongs to +* `studyId` is the ID of the study that the analysis belongs to +* `analysisId` is the ID of the analysis you want to index + +### Using Swagger UI + +To index a study using the Swagger UI: + +1. Go to `http://localhost:11235/maestro/api-docs` + +2. Under **management-controller**, click the `POST /index/repository/{repositoryCode}/study/{studyId}/analysis/{analysisId}` endpoint. + +3. Click **Try it out**. + +4. In `analysisId`, enter the ID of the analysis you want to index. + +5. In `studyId`, enter the ID fo the study that the analysis belongs to. + +6. In `repositoryCode`, enter the code of the Song repository that the study belongs to. + +7. Click **Execute**. For example: + +![Entity](../assets/index-analysis.png 'Index Analysis') + +### Verify Analysis Indexed + +If successful, either the cURL command or the Swagger UI will return a successful response indicating the analysis has been indexed. For example: + +``` +[ + { + "indexName": "file_centric_1", + "failureData": { + "failingIds": {} + }, + "successful": true + } +] +``` + +# Handling Index Mapping Changes + +By default, Maestro builds the Elasticsearch index based on a default index mapping that is pre-defined. The name of this default mapping is set in the configuration file, `application.yml`. + +While this mapping is not configurable in runtime, there may be use cases where an administrator will need to change the mapping to meet their business needs. + +For example, as mentioned earlier, Song supports [dynamic schemas](/documentation/song/user-guide/schema/), which can be used to extend the base scheme with additional useful fields. + +In such a scenario, it is the administrator's responsibility to modify the mapping that Maestro uses as input. This process requires a proper migration process to be followed. + +The guidelines for such a process are as follows: + +1. Originally, an Elasticsearch index has been created initially using the base mapping, either manually by Maestro itself. + + +2. Maestro runs initially and starts indexing analyses from Song based on the original mapping. + + +3. Via Song, the administrator introduces new analysis types with new fields (dynamic schema). + + +4. Maestro will continue operating and indexing these new documents, but the new fields will not yet be indexed. + + +5. The administrator updates the existing index mapping to account for the new analysis types and new fields. + + +6. The administrator must re-index the data based on the new mapping. This can be done by either triggering Maestro by supplying the updated mapping in the configuration input, OR, can be done directly to Elasticsearch by using the Elasticsearch API. + + +7. Make sure to switch your Elasticsearch aliases to point to the new or updated index instead of the old one. + + +8. Once all of this is complete, the data will be migrated and Maestro will continue indexing based on the new mapping. \ No newline at end of file diff --git a/markdown/documentation/song/installation/auth-config.md b/markdown/documentation/song/installation/auth-config.md index c583a986..e421ec67 100644 --- a/markdown/documentation/song/installation/auth-config.md +++ b/markdown/documentation/song/installation/auth-config.md @@ -1,10 +1,10 @@ --- -title: Configuration Authentication +title: Configuring Authentication --- # Application Authorization -For an application to interact with song, authentication and authorization must be provided. You can do this by using an authorized users API Key with the correct permissions, or enabling application-to-application authorization following the OAuth 2.0 protocol. +For an application to interact with song, authentication and authorization must be provided. You can do this by using an authorized user's API Key with the correct permissions, or enabling application-to-application authorization following the OAuth 2.0 protocol. Scope requirements are defined in the `auth` section, and the `secure` and `jwt` profiles are available to manage the desired configuration. diff --git a/markdown/documentation/song/installation/configuration/profiles.md b/markdown/documentation/song/installation/configuration/profiles.md index 3a9d1c80..91fa98c6 100644 --- a/markdown/documentation/song/installation/configuration/profiles.md +++ b/markdown/documentation/song/installation/configuration/profiles.md @@ -1,9 +1,10 @@ --- title: Run Profiles --- -Song uses [Spring Profiles](https://docs.spring.io/spring-boot/docs/1.2.0.M1/reference/html/boot-features-profiles.html) as a feature top manage the running of a Song server in different environments. Spring profiles allow different settings to be applied to different environments, for example keeping security strict in production but less strict in test deployments. +Song uses [Spring Profiles](https://docs.spring.io/spring-boot/docs/1.2.0.M1/reference/html/boot-features-profiles.html) as a feature to manage the running of a Song server in different environments. Spring profiles allow different settings to be applied to different environments, for example keeping security strict in production but less strict in test deployments. + +During configuration, you will need to enable the active profiles in the `song-server-[version]/conf/application.yml` file. The active profiles to use for a particular application can be specified using the `profiles` argument which should be added at the start of the `spring` block, for example: -During configuration, you will need to enable the active profiles. The active profiles to use for a particular application can be specified using the `profiles` argument which should be added at the start of the `spring` block, for example: ```yaml spring: profiles: diff --git a/markdown/documentation/song/installation/installation.md b/markdown/documentation/song/installation/installation.md index 825ae67f..f9077fc9 100644 --- a/markdown/documentation/song/installation/installation.md +++ b/markdown/documentation/song/installation/installation.md @@ -12,11 +12,11 @@ The following software dependencies are required in order to run the Song server # Installation ## Distribution -Official Song releases can be found [Github](https://github.com/overture-stack/SONG/releases). Each release contains notes with a description of the bug fixes, new features or enhancements and breaking changes, as well as links to downloads and change logs. +Official Song releases can be found on [Github](https://github.com/overture-stack/SONG/releases). Each release contains notes with a description of the bug fixes, new features or enhancements and breaking changes, as well as links to downloads and change logs. The latest distribution can be downloaded using the command: ```bash - curl "https://artifacts.oicr.on.ca/artifactory/dcc-release/bio/overture/song-server/[RELEASE]/song-server-[RELEASE]-dist.tar.gz" -Ls -o song-server-dist.tar.gz + curl https://artifacts.oicr.on.ca/artifactory/dcc-release/bio/overture/song-server/[RELEASE]/song-server-[RELEASE]-dist.tar.gz -Ls -o song-server-dist.tar.gz ``` This distribution contains the default configuration and jars for running the server. To unzip, run the command: From 907a02be70ff514e4c3719de891fc06cedcfadf9 Mon Sep 17 00:00:00 2001 From: Brandon Date: Sun, 9 May 2021 01:17:46 -0400 Subject: [PATCH 02/10] Interim updates --- .../dms/installation/test-upload/index.md | 4 +- .../score/user-guide/client-setup.md | 223 +++--------------- .../score/user-guide/commands.md | 13 +- .../score/user-guide/download.md | 213 +---------------- .../documentation/score/user-guide/upload.md | 213 +---------------- 5 files changed, 52 insertions(+), 614 deletions(-) diff --git a/markdown/documentation/dms/installation/test-upload/index.md b/markdown/documentation/dms/installation/test-upload/index.md index 5318d7e3..ebbc15f7 100644 --- a/markdown/documentation/dms/installation/test-upload/index.md +++ b/markdown/documentation/dms/installation/test-upload/index.md @@ -186,7 +186,7 @@ client: # Download and Configure Score Client -Next, you must download and configure the Score client. This command-line client is to upload and download data files to and from your configured object storage service. To understand how to use Score in more detail, see [here](../../../score). +Next, you must download and configure the Score client. This command-line client is used to upload and download data files to and from your configured object storage service. To understand how to use Score in more detail, see [here](../../../score). 1. Download and unzip the latest Score client from [here](https://artifacts.oicr.on.ca/artifactory/dcc-release/bio/overture/score-client/%5BRELEASE%5D/score-client-%5BRELEASE%5D-dist.tar.gz) or do so from your terminal command line, then switch to the unzipped directory: @@ -227,7 +227,7 @@ For example: accessToken=36099917-45b1-49f4-b91e-68a655eb6708 # The location of the metadata service (SONG) -metadata.url=http://locatlhost:80/song-api +metadata.url=http://localhost:80/song-api # The location of the object storage service (SCORE) storage.url=http://localhost:80/score-api diff --git a/markdown/documentation/score/user-guide/client-setup.md b/markdown/documentation/score/user-guide/client-setup.md index 37a06fdf..1ad52723 100644 --- a/markdown/documentation/score/user-guide/client-setup.md +++ b/markdown/documentation/score/user-guide/client-setup.md @@ -2,215 +2,64 @@ title: Setting Up the Score Client --- -# Indexing Methods +End users must use the Score command-line client to execute data transfers (uploads and downloads) to and from the configured object storage. Users must download and install the client and perform some minimal configuration. -Once Maestro is installed, configured, and running, it is now available to receive requests and index data into Elasticsearch. +# Installing the Score Client -If Maestro is enabled to use event-driven indexing with Kafka, then it will listen for specific messages from the [configured Kafka topics](/documentation/maestro/installation/configuration#configuring-kafka-topics). Otherwise, without Kafka, Maestro can receive requests over the HTTP JSON API. +The Score client can be run in different ways depending on your operating system or setup: -Maestro has several ways to index data, described in the following sub-sections. +* If you are on Windows, use the Score client Docker distribution +* If you are on a Unix system (IOS/Linux) you can also use the Docker distribution, or alternatively use the Score client directly -## Maestro API +## Using the Docker Distribution -Maestro provides an API based on the [OpenAPI specification](https://swagger.io/specification/) (formerly known as the Swagger specification) which allows users (manually) and applications (programmatically) to interact with its core functionality. - -One major benefit of Swagger-based APIs is that they also provide easy-to-use, interactive API documentation via a web interface. Users can manually interact with the API by issuing cURL commands via their terminal. Administrators with access to the Swagger UI can also interact with the API via the web interface. - -On a local deployment, the Maestro Swagger UI can be accessed at `http://localhost:11235/maestro/api-docs`: - -![Entity](../assets/swagger.png 'Swagger UI') - -The following are examples of how to index data at different entity levels using the API. Recall that Maestro can index data flexibly at either the repository, study, or individual analysis levels. - -## Indexing by Repository - -It is possible to index an entire Song repository at once. This operation will index all analyses in all studies in the specified repository. - -### Using cURL - -To index a repository with cURL, from your command line. execute the following: +To use the Docker distribution, from your command line, pull the latest version: ```shell - curl -X POST \ - http://localhost:11235/index/repository/`` \ - -H 'Content-Type: application/json' \ - -H 'cache-control: no-cache' +$ docker pull overture/score ``` -Where `repositoryCode` is the code representing the Song repository you want to index. - -### Using Swagger UI - -To index a repository using the Swagger UI: - -1. Go to `http://localhost:11235/maestro/api-docs` - -2. Under **management-controller**, click the `POST /index/repository/{repositoryCode}` endpoint. - -3. Click **Try it out**. - -4. In `repositoryCode`, enter the code of the Song repository you want to index. - -5. Click **Execute**. For example: - -![Entity](../assets/index-repo2.png 'Index Repo') - -### Verify Repository Indexed - -If successful, either the cURL command or the Swagger UI will return a successful response indicating the repository has been indexed. For example: +The Docker distribution does not have a specific configuration to setup beforehand. Configuration parameters are supplied -``` -[ - { - "indexName": "file_centric_1", - "failureData": { - "failingIds": {} - }, - "successful": true - } -] -``` - -## Indexing by Study +You can now run the Score client for uploads and downloads using a Docker run command and supplying the correct input parameters. Unlike the direct Score client, the Docker distribution does not have a configuration file that you need to setup in advance. Instead, certain config parameters are supplied in real-time when you execute the Docker run command. See [Uploading Data](/documentation/score/user-guide/upload) and [Downloading Data](/documentation/score/user-guide/download) for more details. -The most common way to index is usually by study. This operation will index all analyses in the specific study provided. +## Using the Score Client Directly with Configured Values -### Using cURL +To use the Score client directly without Docker: -To index a study with cURL, from your command line, execute the following: +1. Download and unzip the latest Score client from [here](https://artifacts.oicr.on.ca/artifactory/dcc-release/bio/overture/score-client/%5BRELEASE%5D/score-client-%5BRELEASE%5D-dist.tar.gz) or do so from your command line, then switch to the unzipped directory: ```shell - curl -X POST \ - http://localhost:11235/index/repository/``/study/`` \ - -H 'Content-Type: application/json' \ - -H 'cache-control: no-cache' \ -``` - -Where: -* `repositoryCode` is the code representing the Song repository that the study belongs to -* `studyId` is the ID of the study you want to index - -### Using Swagger UI - -To index a study using the Swagger UI: - -1. Go to `http://localhost:11235/maestro/api-docs` - -2. Under **management-controller**, click the `POST /index/repository/{repositoryCode}/study/{studyId}` endpoint. - -3. Click **Try it out**. - -4. In `studyId`, enter the ID fo the study you want to index. - -5. In `repositoryCode`, enter the code of the Song repository that the study belongs to. - -6. Click **Execute**. For example: - -![Entity](../assets/index-study.png 'Index Study') - -### Verify Study Indexed - -If successful, either the cURL command or the Swagger UI will return a successful response indicating the study has been indexed. For example: - -``` -[ - { - "indexName": "file_centric_1", - "failureData": { - "failingIds": {} - }, - "successful": true - } -] +$ wget -O score-client.tar.gz https://artifacts.oicr.on.ca/artifactory/dcc-release/bio/overture/score-client/[RELEASE]/score-client-[RELEASE]-dist.tar.gz + +$ tar xvzf score-client.tar.gz + +$ cd score-client- ``` -## Indexing by Analysis +2. Open the `score-client//conf/application.properties` file and edit the `client` section as follows: -Lastly, you can also index data from an individual analysis within a study. +accessToken: your personal API Token +metadata.url: the file metadata Song server URL +storage.url: the object storage Score server URL -### Using cURL +* Set `accessToken` to your personal API key (API token). For example, if you are using Overture's [Ego](/documentation/ego) for authentication, then this would be your personal API key issued by Ego. +* Uncomment `metadata.url` and set it to the URL of the [Song](/documentation/song) server that you deployed as part of the Score [prerequisites](/documentation/score/installation#dependencies). +* Uncomment `storage.url` and set it to the URL of the [object storage](/documentation/score/installation#configuring-storage-providers) that you deployed as part of the Score [prerequisites](/documentation/score/installation#dependencies). -To index an individual analysis with cURL, from your command line, execute the following: +For example: ```shell - curl -X POST \ - http://localhost:11235/index/repository/``/study/``/analysis/`` \ - -H 'Content-Type: application/json' \ - -H 'cache-control: no-cache' \ +# The access token for authorized access to data +accessToken=36099917-45b1-49f4-b91e-68a655eb6708 + +# The location of the metadata service (SONG) +metadata.url=http://localhost:80/song-api + +# The location of the object storage service (SCORE) +storage.url=http://localhost:80/score-api ``` -Where: -* `repositoryCode` is the code representing the Song repository that the study belongs to -* `studyId` is the ID of the study that the analysis belongs to -* `analysisId` is the ID of the analysis you want to index - -### Using Swagger UI - -To index a study using the Swagger UI: - -1. Go to `http://localhost:11235/maestro/api-docs` - -2. Under **management-controller**, click the `POST /index/repository/{repositoryCode}/study/{studyId}/analysis/{analysisId}` endpoint. - -3. Click **Try it out**. - -4. In `analysisId`, enter the ID of the analysis you want to index. - -5. In `studyId`, enter the ID fo the study that the analysis belongs to. - -6. In `repositoryCode`, enter the code of the Song repository that the study belongs to. - -7. Click **Execute**. For example: - -![Entity](../assets/index-analysis.png 'Index Analysis') - -### Verify Analysis Indexed - -If successful, either the cURL command or the Swagger UI will return a successful response indicating the analysis has been indexed. For example: - -``` -[ - { - "indexName": "file_centric_1", - "failureData": { - "failingIds": {} - }, - "successful": true - } -] -``` - -# Handling Index Mapping Changes - -By default, Maestro builds the Elasticsearch index based on a default index mapping that is pre-defined. The name of this default mapping is set in the configuration file, `application.yml`. - -While this mapping is not configurable in runtime, there may be use cases where an administrator will need to change the mapping to meet their business needs. - -For example, as mentioned earlier, Song supports [dynamic schemas](/documentation/song/user-guide/schema/), which can be used to extend the base scheme with additional useful fields. - -In such a scenario, it is the administrator's responsibility to modify the mapping that Maestro uses as input. This process requires a proper migration process to be followed. - -The guidelines for such a process are as follows: - -1. Originally, an Elasticsearch index has been created initially using the base mapping, either manually by Maestro itself. - - -2. Maestro runs initially and starts indexing analyses from Song based on the original mapping. - - -3. Via Song, the administrator introduces new analysis types with new fields (dynamic schema). - - -4. Maestro will continue operating and indexing these new documents, but the new fields will not yet be indexed. - - -5. The administrator updates the existing index mapping to account for the new analysis types and new fields. - - -6. The administrator must re-index the data based on the new mapping. This can be done by either triggering Maestro by supplying the updated mapping in the configuration input, OR, can be done directly to Elasticsearch by using the Elasticsearch API. - - -7. Make sure to switch your Elasticsearch aliases to point to the new or updated index instead of the old one. - +3. Save your changes. -8. Once all of this is complete, the data will be migrated and Maestro will continue indexing based on the new mapping. \ No newline at end of file +You can now run the Score client directly using various commands. For details on how to do data transfers, see [Uploading Data](/documentation/score/user-guide/upload) and [Downloading Data](/documentation/score/user-guide/download). For a full command reference, see [here](/documentation/score/user-guide/commands). \ No newline at end of file diff --git a/markdown/documentation/score/user-guide/commands.md b/markdown/documentation/score/user-guide/commands.md index ad42f3ae..19830bcf 100644 --- a/markdown/documentation/score/user-guide/commands.md +++ b/markdown/documentation/score/user-guide/commands.md @@ -6,4 +6,15 @@ Here is a reference table listing all the commands and command options currently | Command | Short Form | Description | Options | | -----------------| ------------| -----------| --------| -| ??? | ??? | ??? | ??? | \ No newline at end of file +| ??? | ??? | ??? | ??? | + + +| Command | Short Form | Description | Options | +| -----------------| ------------| -----------| --------| +| --help | -h | Displays the command reference and links to useful documentation. | None | +| --version | -V | Displays the current DMS version. | None | +| config build | co bu | Runs the interactive configuration questionnaire. | None | +| config get | co g| Displays the contents of the saved configuration file (`~/.dms/config.yaml`) | None | +| cluster start | cl start | Deploys the configuration in the `~/.dms/config.yaml` file to a single cluster. | None | +| cluster stop | cl stop | Stops the running cluster and all services deployed to it, **without** deleting the data volumes. | None | +| cluster destroy | co destroy | Destroys the cluster, all services deploy to it, and **ALL** the data volumes. Always asks for confirmation before executing, unless the `-f` option is supplied. | `-f` - Forcefully destroys volumes without asking for confirmation. | \ No newline at end of file diff --git a/markdown/documentation/score/user-guide/download.md b/markdown/documentation/score/user-guide/download.md index 57fff990..ef1dad2a 100644 --- a/markdown/documentation/score/user-guide/download.md +++ b/markdown/documentation/score/user-guide/download.md @@ -2,215 +2,4 @@ title: Downloading Data --- -# Indexing Methods - -Once Maestro is installed, configured, and running, it is now available to receive requests and index data into Elasticsearch. - -If Maestro is enabled to use event-driven indexing with Kafka, then it will listen for specific messages from the [configured Kafka topics](/documentation/maestro/installation/configuration#configuring-kafka-topics). Otherwise, without Kafka, Maestro can receive requests over the HTTP JSON API. - -Maestro has several ways to index data, described in the following sub-sections. - -## Maestro API - -Maestro provides an API based on the [OpenAPI specification](https://swagger.io/specification/) (formerly known as the Swagger specification) which allows users (manually) and applications (programmatically) to interact with its core functionality. - -One major benefit of Swagger-based APIs is that they also provide easy-to-use, interactive API documentation via a web interface. Users can manually interact with the API by issuing cURL commands via their terminal. Administrators with access to the Swagger UI can also interact with the API via the web interface. - -On a local deployment, the Maestro Swagger UI can be accessed at `http://localhost:11235/maestro/api-docs`: - -![Entity](../assets/swagger.png 'Swagger UI') - -The following are examples of how to index data at different entity levels using the API. Recall that Maestro can index data flexibly at either the repository, study, or individual analysis levels. - -## Indexing by Repository - -It is possible to index an entire Song repository at once. This operation will index all analyses in all studies in the specified repository. - -### Using cURL - -To index a repository with cURL, from your command line. execute the following: - -```shell - curl -X POST \ - http://localhost:11235/index/repository/`` \ - -H 'Content-Type: application/json' \ - -H 'cache-control: no-cache' -``` - -Where `repositoryCode` is the code representing the Song repository you want to index. - -### Using Swagger UI - -To index a repository using the Swagger UI: - -1. Go to `http://localhost:11235/maestro/api-docs` - -2. Under **management-controller**, click the `POST /index/repository/{repositoryCode}` endpoint. - -3. Click **Try it out**. - -4. In `repositoryCode`, enter the code of the Song repository you want to index. - -5. Click **Execute**. For example: - -![Entity](../assets/index-repo2.png 'Index Repo') - -### Verify Repository Indexed - -If successful, either the cURL command or the Swagger UI will return a successful response indicating the repository has been indexed. For example: - -``` -[ - { - "indexName": "file_centric_1", - "failureData": { - "failingIds": {} - }, - "successful": true - } -] -``` - -## Indexing by Study - -The most common way to index is usually by study. This operation will index all analyses in the specific study provided. - -### Using cURL - -To index a study with cURL, from your command line, execute the following: - -```shell - curl -X POST \ - http://localhost:11235/index/repository/``/study/`` \ - -H 'Content-Type: application/json' \ - -H 'cache-control: no-cache' \ -``` - -Where: -* `repositoryCode` is the code representing the Song repository that the study belongs to -* `studyId` is the ID of the study you want to index - -### Using Swagger UI - -To index a study using the Swagger UI: - -1. Go to `http://localhost:11235/maestro/api-docs` - -2. Under **management-controller**, click the `POST /index/repository/{repositoryCode}/study/{studyId}` endpoint. - -3. Click **Try it out**. - -4. In `studyId`, enter the ID fo the study you want to index. - -5. In `repositoryCode`, enter the code of the Song repository that the study belongs to. - -6. Click **Execute**. For example: - -![Entity](../assets/index-study.png 'Index Study') - -### Verify Study Indexed - -If successful, either the cURL command or the Swagger UI will return a successful response indicating the study has been indexed. For example: - -``` -[ - { - "indexName": "file_centric_1", - "failureData": { - "failingIds": {} - }, - "successful": true - } -] -``` - -## Indexing by Analysis - -Lastly, you can also index data from an individual analysis within a study. - -### Using cURL - -To index an individual analysis with cURL, from your command line, execute the following: - -```shell - curl -X POST \ - http://localhost:11235/index/repository/``/study/``/analysis/`` \ - -H 'Content-Type: application/json' \ - -H 'cache-control: no-cache' \ -``` - -Where: -* `repositoryCode` is the code representing the Song repository that the study belongs to -* `studyId` is the ID of the study that the analysis belongs to -* `analysisId` is the ID of the analysis you want to index - -### Using Swagger UI - -To index a study using the Swagger UI: - -1. Go to `http://localhost:11235/maestro/api-docs` - -2. Under **management-controller**, click the `POST /index/repository/{repositoryCode}/study/{studyId}/analysis/{analysisId}` endpoint. - -3. Click **Try it out**. - -4. In `analysisId`, enter the ID of the analysis you want to index. - -5. In `studyId`, enter the ID fo the study that the analysis belongs to. - -6. In `repositoryCode`, enter the code of the Song repository that the study belongs to. - -7. Click **Execute**. For example: - -![Entity](../assets/index-analysis.png 'Index Analysis') - -### Verify Analysis Indexed - -If successful, either the cURL command or the Swagger UI will return a successful response indicating the analysis has been indexed. For example: - -``` -[ - { - "indexName": "file_centric_1", - "failureData": { - "failingIds": {} - }, - "successful": true - } -] -``` - -# Handling Index Mapping Changes - -By default, Maestro builds the Elasticsearch index based on a default index mapping that is pre-defined. The name of this default mapping is set in the configuration file, `application.yml`. - -While this mapping is not configurable in runtime, there may be use cases where an administrator will need to change the mapping to meet their business needs. - -For example, as mentioned earlier, Song supports [dynamic schemas](/documentation/song/user-guide/schema/), which can be used to extend the base scheme with additional useful fields. - -In such a scenario, it is the administrator's responsibility to modify the mapping that Maestro uses as input. This process requires a proper migration process to be followed. - -The guidelines for such a process are as follows: - -1. Originally, an Elasticsearch index has been created initially using the base mapping, either manually by Maestro itself. - - -2. Maestro runs initially and starts indexing analyses from Song based on the original mapping. - - -3. Via Song, the administrator introduces new analysis types with new fields (dynamic schema). - - -4. Maestro will continue operating and indexing these new documents, but the new fields will not yet be indexed. - - -5. The administrator updates the existing index mapping to account for the new analysis types and new fields. - - -6. The administrator must re-index the data based on the new mapping. This can be done by either triggering Maestro by supplying the updated mapping in the configuration input, OR, can be done directly to Elasticsearch by using the Elasticsearch API. - - -7. Make sure to switch your Elasticsearch aliases to point to the new or updated index instead of the old one. - - -8. Once all of this is complete, the data will be migrated and Maestro will continue indexing based on the new mapping. \ No newline at end of file +??? \ No newline at end of file diff --git a/markdown/documentation/score/user-guide/upload.md b/markdown/documentation/score/user-guide/upload.md index 8559a3a0..41c99418 100644 --- a/markdown/documentation/score/user-guide/upload.md +++ b/markdown/documentation/score/user-guide/upload.md @@ -2,215 +2,4 @@ title: Uploading Data --- -# Indexing Methods - -Once Maestro is installed, configured, and running, it is now available to receive requests and index data into Elasticsearch. - -If Maestro is enabled to use event-driven indexing with Kafka, then it will listen for specific messages from the [configured Kafka topics](/documentation/maestro/installation/configuration#configuring-kafka-topics). Otherwise, without Kafka, Maestro can receive requests over the HTTP JSON API. - -Maestro has several ways to index data, described in the following sub-sections. - -## Maestro API - -Maestro provides an API based on the [OpenAPI specification](https://swagger.io/specification/) (formerly known as the Swagger specification) which allows users (manually) and applications (programmatically) to interact with its core functionality. - -One major benefit of Swagger-based APIs is that they also provide easy-to-use, interactive API documentation via a web interface. Users can manually interact with the API by issuing cURL commands via their terminal. Administrators with access to the Swagger UI can also interact with the API via the web interface. - -On a local deployment, the Maestro Swagger UI can be accessed at `http://localhost:11235/maestro/api-docs`: - -![Entity](../assets/swagger.png 'Swagger UI') - -The following are examples of how to index data at different entity levels using the API. Recall that Maestro can index data flexibly at either the repository, study, or individual analysis levels. - -## Indexing by Repository - -It is possible to index an entire Song repository at once. This operation will index all analyses in all studies in the specified repository. - -### Using cURL - -To index a repository with cURL, from your command line. execute the following: - -```shell - curl -X POST \ - http://localhost:11235/index/repository/`` \ - -H 'Content-Type: application/json' \ - -H 'cache-control: no-cache' -``` - -Where `repositoryCode` is the code representing the Song repository you want to index. - -### Using Swagger UI - -To index a repository using the Swagger UI: - -1. Go to `http://localhost:11235/maestro/api-docs` - -2. Under **management-controller**, click the `POST /index/repository/{repositoryCode}` endpoint. - -3. Click **Try it out**. - -4. In `repositoryCode`, enter the code of the Song repository you want to index. - -5. Click **Execute**. For example: - -![Entity](../assets/index-repo2.png 'Index Repo') - -### Verify Repository Indexed - -If successful, either the cURL command or the Swagger UI will return a successful response indicating the repository has been indexed. For example: - -``` -[ - { - "indexName": "file_centric_1", - "failureData": { - "failingIds": {} - }, - "successful": true - } -] -``` - -## Indexing by Study - -The most common way to index is usually by study. This operation will index all analyses in the specific study provided. - -### Using cURL - -To index a study with cURL, from your command line, execute the following: - -```shell - curl -X POST \ - http://localhost:11235/index/repository/``/study/`` \ - -H 'Content-Type: application/json' \ - -H 'cache-control: no-cache' \ -``` - -Where: -* `repositoryCode` is the code representing the Song repository that the study belongs to -* `studyId` is the ID of the study you want to index - -### Using Swagger UI - -To index a study using the Swagger UI: - -1. Go to `http://localhost:11235/maestro/api-docs` - -2. Under **management-controller**, click the `POST /index/repository/{repositoryCode}/study/{studyId}` endpoint. - -3. Click **Try it out**. - -4. In `studyId`, enter the ID fo the study you want to index. - -5. In `repositoryCode`, enter the code of the Song repository that the study belongs to. - -6. Click **Execute**. For example: - -![Entity](../assets/index-study.png 'Index Study') - -### Verify Study Indexed - -If successful, either the cURL command or the Swagger UI will return a successful response indicating the study has been indexed. For example: - -``` -[ - { - "indexName": "file_centric_1", - "failureData": { - "failingIds": {} - }, - "successful": true - } -] -``` - -## Indexing by Analysis - -Lastly, you can also index data from an individual analysis within a study. - -### Using cURL - -To index an individual analysis with cURL, from your command line, execute the following: - -```shell - curl -X POST \ - http://localhost:11235/index/repository/``/study/``/analysis/`` \ - -H 'Content-Type: application/json' \ - -H 'cache-control: no-cache' \ -``` - -Where: -* `repositoryCode` is the code representing the Song repository that the study belongs to -* `studyId` is the ID of the study that the analysis belongs to -* `analysisId` is the ID of the analysis you want to index - -### Using Swagger UI - -To index a study using the Swagger UI: - -1. Go to `http://localhost:11235/maestro/api-docs` - -2. Under **management-controller**, click the `POST /index/repository/{repositoryCode}/study/{studyId}/analysis/{analysisId}` endpoint. - -3. Click **Try it out**. - -4. In `analysisId`, enter the ID of the analysis you want to index. - -5. In `studyId`, enter the ID fo the study that the analysis belongs to. - -6. In `repositoryCode`, enter the code of the Song repository that the study belongs to. - -7. Click **Execute**. For example: - -![Entity](../assets/index-analysis.png 'Index Analysis') - -### Verify Analysis Indexed - -If successful, either the cURL command or the Swagger UI will return a successful response indicating the analysis has been indexed. For example: - -``` -[ - { - "indexName": "file_centric_1", - "failureData": { - "failingIds": {} - }, - "successful": true - } -] -``` - -# Handling Index Mapping Changes - -By default, Maestro builds the Elasticsearch index based on a default index mapping that is pre-defined. The name of this default mapping is set in the configuration file, `application.yml`. - -While this mapping is not configurable in runtime, there may be use cases where an administrator will need to change the mapping to meet their business needs. - -For example, as mentioned earlier, Song supports [dynamic schemas](/documentation/song/user-guide/schema/), which can be used to extend the base scheme with additional useful fields. - -In such a scenario, it is the administrator's responsibility to modify the mapping that Maestro uses as input. This process requires a proper migration process to be followed. - -The guidelines for such a process are as follows: - -1. Originally, an Elasticsearch index has been created initially using the base mapping, either manually by Maestro itself. - - -2. Maestro runs initially and starts indexing analyses from Song based on the original mapping. - - -3. Via Song, the administrator introduces new analysis types with new fields (dynamic schema). - - -4. Maestro will continue operating and indexing these new documents, but the new fields will not yet be indexed. - - -5. The administrator updates the existing index mapping to account for the new analysis types and new fields. - - -6. The administrator must re-index the data based on the new mapping. This can be done by either triggering Maestro by supplying the updated mapping in the configuration input, OR, can be done directly to Elasticsearch by using the Elasticsearch API. - - -7. Make sure to switch your Elasticsearch aliases to point to the new or updated index instead of the old one. - - -8. Once all of this is complete, the data will be migrated and Maestro will continue indexing based on the new mapping. \ No newline at end of file +??? \ No newline at end of file From ed91782e73e39906693bf18e67721be8b5c434c5 Mon Sep 17 00:00:00 2001 From: Brandon Date: Sun, 9 May 2021 13:48:20 -0400 Subject: [PATCH 03/10] interim updates --- .../score/installation/authentication.md | 24 +++---- .../score/user-guide/commands.md | 63 ++++++++++++++----- 2 files changed, 59 insertions(+), 28 deletions(-) diff --git a/markdown/documentation/score/installation/authentication.md b/markdown/documentation/score/installation/authentication.md index 15c39eda..bab48dd8 100644 --- a/markdown/documentation/score/installation/authentication.md +++ b/markdown/documentation/score/installation/authentication.md @@ -19,18 +19,18 @@ The `secure` profile is required if the [Overture](https://overture.bio) product To configure authentication and authorization via Ego, in the `score-server-[version]/conf/application.properties` file, make sure the `secure` profile exists and configure these settings in the `auth -> server` section: -| Setting | Requirement | Description | -|---------|-------------|-------------| -| `url` | Required | URL to the Ego API endpoint that is used to authenticate a user's API key (token). Specify the host and port where the endpoint is hosted. The endpoint to use is `/oauth/check_token`. See the example below for guidance. | -| `tokenName` | Required | Name used to identify a token. Typically you should leave this set to the default value, `token`. | -| `clientId` | Required | This is the client ID for the Score application as configured in Ego. | -| `clientSecret` | Required | This is the client secret for the Score application as configured in Ego. | -| `scope -> download -> system` | Required | Scope (permission) that a user's API key must have to enable system-level downloads from Score. Typically you should leave this set to the default value, `score.READ`. | -| `scope -> download -> study -> prefix` | Required | Prefix that must come before the Song study name when assigning study-level download scopes (permissions) for Score. Typically you should leave this set to the default value, `score.`. | -| `scope -> download -> study -> suffix` | Required | Suffix that must come after the Song study name when assigning study-level download scopes (permissions) for Score. Typically you should leave this set to the default value, `.READ`. | -| `scope -> upload -> system` | Required | Scope (permission) that a user's API key must have to enable system-level uploads to Score. Typically you should leave this set to the default value, `score.READ`. | -| `scope -> upload -> study -> prefix` | Required | Prefix that must come before the Song study name when assigning study-level upload scopes (permissions) for Score. Typically you should leave this set to the default value, `score.`. | -| `scope -> upload -> study -> suffix` | Required | Suffix that must come after the Song study name when assigning study-level upload scopes (permissions) for Score. Typically you should leave this set to the default value, `.READ`. | +| Section | Setting | Requirement | Description | +|---------|---------|-------------|-------------| +| `server` | `url` | Required | URL to the Ego API endpoint that is used to authenticate a user's API key (token). Specify the host and port where the endpoint is hosted. The endpoint to use is `/oauth/check_token`. See the example below for guidance. | +| | `tokenName` | Required | Name used to identify a token. Typically you should leave this set to the default value, `token`. | +| | `clientId` | Required | This is the client ID for the Score application as configured in Ego. | +| | `clientSecret` | Required | This is the client secret for the Score application as configured in Ego. | +| `server -> scope -> download` | `system` | Required | Scope (permission) that a user's API key must have to enable system-level downloads from Score. Typically you should leave this set to the default value, `score.READ`. | +| `server -> scope -> download -> study` | `prefix` | Required | Prefix that must come before the Song study name when assigning study-level download scopes (permissions) for Score. Typically you should leave this set to the default value, `score.`. | +| | `suffix` | Required | Suffix that must come after the Song study name when assigning study-level download scopes (permissions) for Score. Typically you should leave this set to the default value, `.READ`. | +| `server -> scope -> upload` | `system` | Required | Scope (permission) that a user's API key must have to enable system-level uploads to Score. Typically you should leave this set to the default value, `score.READ`. | +| `server -> scope -> upload -> study` | `prefix` | Required | Prefix that must come before the Song study name when assigning study-level upload scopes (permissions) for Score. Typically you should leave this set to the default value, `score.`. | +| | `suffix` | Required | Suffix that must come after the Song study name when assigning study-level upload scopes (permissions) for Score. Typically you should leave this set to the default value, `.READ`. | For example: diff --git a/markdown/documentation/score/user-guide/commands.md b/markdown/documentation/score/user-guide/commands.md index 19830bcf..a1808f71 100644 --- a/markdown/documentation/score/user-guide/commands.md +++ b/markdown/documentation/score/user-guide/commands.md @@ -2,19 +2,50 @@ title: Command Reference --- -Here is a reference table listing all the commands and command options currently supported by the Score client. - -| Command | Short Form | Description | Options | -| -----------------| ------------| -----------| --------| -| ??? | ??? | ??? | ??? | - - -| Command | Short Form | Description | Options | -| -----------------| ------------| -----------| --------| -| --help | -h | Displays the command reference and links to useful documentation. | None | -| --version | -V | Displays the current DMS version. | None | -| config build | co bu | Runs the interactive configuration questionnaire. | None | -| config get | co g| Displays the contents of the saved configuration file (`~/.dms/config.yaml`) | None | -| cluster start | cl start | Deploys the configuration in the `~/.dms/config.yaml` file to a single cluster. | None | -| cluster stop | cl stop | Stops the running cluster and all services deployed to it, **without** deleting the data volumes. | None | -| cluster destroy | co destroy | Destroys the cluster, all services deploy to it, and **ALL** the data volumes. Always asks for confirmation before executing, unless the `-f` option is supplied. | `-f` - Forcefully destroys volumes without asking for confirmation. | \ No newline at end of file +# Invoking a Command + +To invoke a command, run the `score-client` executable and append any options required after the command with the necessary input values. + +For example, invoking the `upload` command and using the `--manifest` option to provide a manifest file: + +```shell +$ ./score-client-/bin/score-client upload --manifest .//manifest.txt +``` + +# Commands + +The following sub-sections are a reference listing of all the commands and command options currently supported by the Score client. + +## Download + +This command retrieves file object(s) from the remote storage repository. + +| Option | Description | +| -------| ------------| +| `--analysis-id` | Download files for a specific [Song](/documentation/song) analysis ID. | +| `--force` | Force a re-download of the file if it already exists locally (overrides local file). | +| `--index` | If available, also download the file index. | +| `--length` | Limit the number of bytes to download to this value. By default, if this option is not specified, all of the file will be downloaded. | +| `--manifest` | Download files based on a manifest file ID, manifest file URL, or path to the manifest file. | +| `--object-id` | Download a specific file object ID. | +| `--offset` | Byte position in the source file to begin download from. By default, if this option is not specified, all of the file will be downloaded. | +| `--output-dir` | Path to the output directory where files will be downloaded to. | +| `--output-layout` | Layout of the output directory, one of: | +| | * `bundle` : Saved according to the filename under the Song bundle ID directory. | +| | * `filename` : Saved according to the filename in the output directory. | +| | * `id` : Saved according to the object ID in the output directory. | +| `--program-id` | Download files for a specific [Song](/documentation/song) program ID. | +| `--study-id` | Download files for a specific [Song](/documentation/song) study ID. | +| `--validate` | If available, perform validation on file MD5 checksum. | +| `--verify-connection` | First verify the connection to the object storage repository. | + +# Extra Options + +Here is a list of additional option flags that can be used when the `score-client` executable itself: + + +| Option | Description | +| -------| ------------| +| --profile | Define a specific environment profile used to resolve configuration properties. If not specified, the default profile is used. | +| --quiet | Run client in quiet mode, with a reduced, minimal set of info messages displayed during execution. | +| --silent | Run client in silent mode, without any additional info messages displayed during execution. | \ No newline at end of file From 78872c97e51b100b9f7390dcc329015577b940a0 Mon Sep 17 00:00:00 2001 From: Brandon Date: Sun, 9 May 2021 15:18:47 -0400 Subject: [PATCH 04/10] interim updates --- .../score/user-guide/commands.md | 73 ++++++++++++++++++- 1 file changed, 69 insertions(+), 4 deletions(-) diff --git a/markdown/documentation/score/user-guide/commands.md b/markdown/documentation/score/user-guide/commands.md index a1808f71..5a0478fb 100644 --- a/markdown/documentation/score/user-guide/commands.md +++ b/markdown/documentation/score/user-guide/commands.md @@ -18,7 +18,7 @@ The following sub-sections are a reference listing of all the commands and comma ## Download -This command retrieves file object(s) from the remote storage repository. +The `download` command downloads file object(s) from the remote storage repository. | Option | Description | | -------| ------------| @@ -39,6 +39,71 @@ This command retrieves file object(s) from the remote storage repository. | `--validate` | If available, perform validation on file MD5 checksum. | | `--verify-connection` | First verify the connection to the object storage repository. | +## Help + +The `help` command displays help information for the Score client commands and options. + +## Info + +The `info` command displays the active configuration information for the Score client. + +| Option | Description | +| -------| ------------| +| `--verbose` | Displays an exhaustive list of all Score client configuration properties. Without this option, only a reduced, minimal set of properties is displayed. | + +## Manifest + +The `manifest` command displays the contents of a specific Score manifest file. + +| Option | Description | +| -------| ------------| +| `--manifest` | Manifest file ID, manifest file URL, or path to the manifest file that you want to display the contents for. | + +## Mount + +The `mount` command mounts a read-only [FUSE](https://github.com/libfuse/) file system view of the object storage repository that Score is using. + +| Option | Description | +| -------| ------------| +| `--cache-metadata` | To make load times faster, you can optionally cache metadata on the local disk and use the cache if available. | +| `--daemonize` | Optionally detach the mount point and run it in the background instead. | +| `--layout` | Layout of the mount point directory, one of: | +| | * `bundle` : Nests files in the bundle directory. | +| | * `object-id` : Uses a flat list of files named by their associated object ID. | +| `--manifest` | Manifest file ID, manifest file URL, or path to the manifest file that you want to specifically mount contents for. | +| `--mount-point` | The mount point of the FUSE file system. For this command to work, the mount point must exist, be empty and be executable by the current user. | +| `--options` | Additional mount options of the file system that you want to use. This may vary depending on the file system. E.g. `user_allow_other`, `allow_other`, `fsname=debug`, etc. | +| `--verify-connection` | First verify the connection to the object storage repository. | + +## Upload + +The `upload` command uploads file object(s) to the remote storage repository. + +| Option | Description | +| -------| ------------| +| `--analysis-id` | Download files for a specific [Song](/documentation/song) analysis ID. | +| `--force` | Force a re-download of the file if it already exists locally (overrides local file). | +| `--index` | If available, also download the file index. | +| `--length` | Limit the number of bytes to download to this value. By default, if this option is not specified, all of the file will be downloaded. | +| `--manifest` | Download files based on a manifest file ID, manifest file URL, or path to the manifest file. | +| `--object-id` | Download a specific file object ID. | +| `--offset` | Byte position in the source file to begin download from. By default, if this option is not specified, all of the file will be downloaded. | +| `--output-dir` | Path to the output directory where files will be downloaded to. | +| `--output-layout` | Layout of the output directory, one of: | +| | * `bundle` : Saved according to the filename under the Song bundle ID directory. | +| | * `filename` : Saved according to the filename in the output directory. | +| | * `id` : Saved according to the object ID in the output directory. | +| `--program-id` | Download files for a specific [Song](/documentation/song) program ID. | +| `--study-id` | Download files for a specific [Song](/documentation/song) study ID. | +| `--validate` | If available, perform validation on file MD5 checksum. | +| `--verify-connection` | First verify the connection to the object storage repository. | + +## Url + +## Version + +## View + # Extra Options Here is a list of additional option flags that can be used when the `score-client` executable itself: @@ -46,6 +111,6 @@ Here is a list of additional option flags that can be used when the `score-clien | Option | Description | | -------| ------------| -| --profile | Define a specific environment profile used to resolve configuration properties. If not specified, the default profile is used. | -| --quiet | Run client in quiet mode, with a reduced, minimal set of info messages displayed during execution. | -| --silent | Run client in silent mode, without any additional info messages displayed during execution. | \ No newline at end of file +| `--profile` | Define a specific environment profile used to resolve configuration properties. If not specified, the default profile is used. | +| `--quiet` | Run client in quiet mode, with a reduced, minimal set of info messages displayed during execution. | +| `--silent` | Run client in silent mode, without any additional info messages displayed during execution. | \ No newline at end of file From 2dd13e96e13adc6e37334ed2fcc6749a5b1f8e0d Mon Sep 17 00:00:00 2001 From: Brandon Date: Sun, 9 May 2021 17:20:51 -0400 Subject: [PATCH 05/10] interim updates --- .../score/user-guide/commands.md | 51 +++++++++++++------ 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/markdown/documentation/score/user-guide/commands.md b/markdown/documentation/score/user-guide/commands.md index 5a0478fb..aa3bc556 100644 --- a/markdown/documentation/score/user-guide/commands.md +++ b/markdown/documentation/score/user-guide/commands.md @@ -26,7 +26,7 @@ The `download` command downloads file object(s) from the remote storage reposito | `--force` | Force a re-download of the file if it already exists locally (overrides local file). | | `--index` | If available, also download the file index. | | `--length` | Limit the number of bytes to download to this value. By default, if this option is not specified, all of the file will be downloaded. | -| `--manifest` | Download files based on a manifest file ID, manifest file URL, or path to the manifest file. | +| `--manifest` | Download specific files based on a manifest file ID, manifest file URL, or path to the manifest file. | | `--object-id` | Download a specific file object ID. | | `--offset` | Byte position in the source file to begin download from. By default, if this option is not specified, all of the file will be downloaded. | | `--output-dir` | Path to the output directory where files will be downloaded to. | @@ -81,29 +81,50 @@ The `upload` command uploads file object(s) to the remote storage repository. | Option | Description | | -------| ------------| -| `--analysis-id` | Download files for a specific [Song](/documentation/song) analysis ID. | -| `--force` | Force a re-download of the file if it already exists locally (overrides local file). | -| `--index` | If available, also download the file index. | -| `--length` | Limit the number of bytes to download to this value. By default, if this option is not specified, all of the file will be downloaded. | -| `--manifest` | Download files based on a manifest file ID, manifest file URL, or path to the manifest file. | -| `--object-id` | Download a specific file object ID. | -| `--offset` | Byte position in the source file to begin download from. By default, if this option is not specified, all of the file will be downloaded. | -| `--output-dir` | Path to the output directory where files will be downloaded to. | -| `--output-layout` | Layout of the output directory, one of: | -| | * `bundle` : Saved according to the filename under the Song bundle ID directory. | -| | * `filename` : Saved according to the filename in the output directory. | -| | * `id` : Saved according to the object ID in the output directory. | -| `--program-id` | Download files for a specific [Song](/documentation/song) program ID. | -| `--study-id` | Download files for a specific [Song](/documentation/song) study ID. | +| `--file` | Upload a specific file based on a path to that file. | +| `--force` | Force a re-upload of the file if it already exists in the object storage (overrides file in the repository). | +| `--manifest` | Upload specific files based on a manifest file ID, manifest file URL, or path to the manifest file. | +| `--md5` | MD5 checksum value of the file to upload. | +| `--object-id` | Upload a specific file based on its object ID. | | `--validate` | If available, perform validation on file MD5 checksum. | | `--verify-connection` | First verify the connection to the object storage repository. | ## Url +The `url` command displays the URL of a specific file object in the object storage repository. + +| Option | Description | +| -------| ------------| +| `--object-id` | Object ID of the specific file you want to display the URL for. | + ## Version +The `version` command displays the Score client's version information. + ## View +The `view` command locally stores and displays some or all contents of a [SAM or BAM](https://samtools.github.io/hts-specs/SAMv1.pdf) file. + +| Option | Description | +| -------| ------------| +| `--bed-query` | You can optionally specify a file in [BED](https://m.ensembl.org/info/website/upload/bed.html) format containing specific ranges to query. This option overrides the `--query` option. | +| `--contained` | Only output sequence alignments completely contained in a specific region. If this option is not used, then by default any alignment that intersects with a specified region will be returned. | +| `--header-only` | Only output the header of the SAM or BAM file. | +| `--input-file` | Local path to the BAM file being queried. This option supercedes the `--object-id` option. | +| `--input-file-index` | Local path to index file. This requires the `--input-file` option to also be provided. | +| `--manifest` | Manifest file ID, manifest file URL, or path to the manifest file containing object IDs and ranges that you want to query for. | +| `--object-id` | Specific object ID inside a BAM file from which to download a slice from. This option supercedes the `--manifest` option. | +| `--output-file` | Name of the file to write output to. If not specified, then the metadata filename or the original input filename will be used by default. | +| `--output-format` | File format being queried and written to output, either `SAM` or `BAM`. | +| `--output-dir` | Path to the output directory where the output file will be stored. Only used with the `--manifest` option. | +| `--output-index` | Indicates whether to write index files to output. Only used with the `--manifest` option. | +| `--output-original-header` | Output the original header in its entirety. | +| `--output-type` | Structure of the output file containing query results. One of: `CROSS`, `MERGED`, or `TRIMMED`. Only used with the `--manifest` option. | +| `--query` | Query used to define what contents to extract from a BAM file. You must use coordinate format (`sequence:start-end`). Note that multiple ranges must be separate by a space. | +| `--reference-file` | Local path to the [FASTA](https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=BlastHelp) file that a [CRAM](https://samtools.github.io/hts-specs/CRAMv3.pdf) file was encoded with. | +| `--stdout` | Indicates whether to send output to `stdout` instead of a file. Only used with the `--object-id` option and output will always be forced to SAM format. | +| `--verify-connection` | First verify the connection to the object storage repository. | + # Extra Options Here is a list of additional option flags that can be used when the `score-client` executable itself: From 14f5cc0cba4195a50db1ba5e404cdb3831baf9d7 Mon Sep 17 00:00:00 2001 From: Brandon Date: Sun, 9 May 2021 17:39:33 -0400 Subject: [PATCH 06/10] interim updates --- .../score/user-guide/commands.md | 6 +-- .../documentation/score/user-guide/upload.md | 40 ++++++++++++++++++- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/markdown/documentation/score/user-guide/commands.md b/markdown/documentation/score/user-guide/commands.md index aa3bc556..a3140d34 100644 --- a/markdown/documentation/score/user-guide/commands.md +++ b/markdown/documentation/score/user-guide/commands.md @@ -81,11 +81,11 @@ The `upload` command uploads file object(s) to the remote storage repository. | Option | Description | | -------| ------------| -| `--file` | Upload a specific file based on a path to that file. | +| `--file` | Upload a specific file by providing the path to that file. | | `--force` | Force a re-upload of the file if it already exists in the object storage (overrides file in the repository). | -| `--manifest` | Upload specific files based on a manifest file ID, manifest file URL, or path to the manifest file. | +| `--manifest` | Upload specific files using a manifest by providing the manifest file ID, manifest file URL, or path to the manifest file. | | `--md5` | MD5 checksum value of the file to upload. | -| `--object-id` | Upload a specific file based on its object ID. | +| `--object-id` | Upload a specific file by providing its object ID. | | `--validate` | If available, perform validation on file MD5 checksum. | | `--verify-connection` | First verify the connection to the object storage repository. | diff --git a/markdown/documentation/score/user-guide/upload.md b/markdown/documentation/score/user-guide/upload.md index 41c99418..7a6fe7e9 100644 --- a/markdown/documentation/score/user-guide/upload.md +++ b/markdown/documentation/score/user-guide/upload.md @@ -2,4 +2,42 @@ title: Uploading Data --- -??? \ No newline at end of file +To upload files to your configured object storage with the Score client, use the [`upload` command](/documentation/score/user-guide/commands#upload). + +The command provides different methods to upload files: + +* `--file` option : Upload a specific file by providing the path to that file. +* `--manifest` option: Upload specific files using a manifest by providing the manifest file ID, manifest file URL, or path to the manifest file. For example, using a manifest file generated from the [Song](/documentation/song) client. +* `--object-id` option: Upload a specific file by providing its object ID. + +The command has additional options that can be used, see [here](/documentation/score/user-guide/commands#upload) for details. + +# Upload Example + +Here is an example of uploading files using a previously-generated manifest file from Song: + +1. Switch to your home directory and from there, initiate an upload by executing the Score client: + +```shell +$ ./score-client-/bin/score-client upload --manifest .//manifest.txt +``` +Where `` is the location of the previously-generated manifest file. + +2. If successful, each file in the manifest will be 100% uploaded, and the Score client will indicate the upload has completed: + +```shell +Uploading object: '/home/ubuntu/songdata/input-files/example.vcf.gz.idx' using the object id e98daf88-fdf8-5a89-9803-9ebafb41de94 +100% [##################################################] Parts: 1/1, Checksum: 100%, Write/sec: 1000B/s, Read/sec: 0B/s +Finalizing... +Total execution time: 3.141 s +Total bytes read : 0 +Total bytes written : 24 +Upload completed +Uploading object: '/home/ubuntu/songdata/input-files/example.vcf.gz' using the object id 440f4559-e905-55ec-bdeb-9518f823e287 +100% [##################################################] Parts: 1/1, Checksum: 100%, Write/sec: 7.8K/s, Read/sec: 0B/s +Finalizing... +Total execution time: 3.105 s +Total bytes read : 0 +Total bytes written : 52 +Upload completed +``` \ No newline at end of file From d484192e0c2cbe1bdb69a35e4c3e7b04cf850d69 Mon Sep 17 00:00:00 2001 From: Brandon Date: Sun, 9 May 2021 19:52:57 -0400 Subject: [PATCH 07/10] interim updates --- .../score/user-guide/download.md | 28 ++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/markdown/documentation/score/user-guide/download.md b/markdown/documentation/score/user-guide/download.md index ef1dad2a..861221ea 100644 --- a/markdown/documentation/score/user-guide/download.md +++ b/markdown/documentation/score/user-guide/download.md @@ -2,4 +2,30 @@ title: Downloading Data --- -??? \ No newline at end of file +To download files from your configured object storage with the Score client, use the [`download` command](/documentation/score/user-guide/commands#download). + +The command provides different methods to download files: + +* `--analysis-id` option : Download files for a specific [Song](/documentation/song) analysis ID. +* `--manifest` option: Download specific files based on a manifest file ID, manifest file URL, or path to the manifest file. +* `--object-id` option: Download a specific file object ID. +* `--program-id` option: Download files for a specific [Song](/documentation/song) program ID. +* `--study-id` option: Download files for a specific [Song](/documentation/song) study ID. + +The command has additional options that can be used, see [here](/documentation/score/user-guide/commands#download) for details. + +# Download Example + +Here is an example of downloading files using a previously-generated manifest file from Song. + +Switch to your home directory and from there, initiate a download by executing the Score client: + +```shell +$ ./score-client-/bin/score-client download --manifest .//manifest.txt --output-dir ./ +``` +Where: + +* `` is the location of the previously-generated manifest file +* `` is the location where you want the files to be downloaded + +If successful, each file in the manifest will be 100% downloaded, and the Score client will indicate the download has completed. \ No newline at end of file From 3664b96eec8cb3f258deedc407e57fc04d016475 Mon Sep 17 00:00:00 2001 From: Brandon Date: Tue, 11 May 2021 20:09:46 -0400 Subject: [PATCH 08/10] interim updates --- .../installation/configuration/profiles.md | 17 +++++++++++++---- .../score/installation/installation.md | 2 +- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/markdown/documentation/score/installation/configuration/profiles.md b/markdown/documentation/score/installation/configuration/profiles.md index fbeb17e4..89bfa87f 100644 --- a/markdown/documentation/score/installation/configuration/profiles.md +++ b/markdown/documentation/score/installation/configuration/profiles.md @@ -47,6 +47,8 @@ upload: connection.timeout: 60000 clean.cron: “0 0 0 * * ?” clean.enabled: true + +object.sentinel: heliograph # Score requires a sample object/file to exist in the object storage for `ping` operations; default is `heliograph` ``` # Azure @@ -58,14 +60,21 @@ For example: ```yaml azure: endpointProtocol: https - accountName: abc123 - accountKey: abc123 + accountName: + accountKey: bucket: - name.object: test_object_bucket + name.object: # Name of the bucket or container that will store the object data + policy.upload: # Name of the access policy to use for write/add/modify operations + policy.downolad: # Name of the access policy for the read/list operations upload: - partsize: 1048576 + partsize: 104587 + +download: + partsize: 250000000 # Safe default part size for downloads + +object.sentinel: heliograph # Score requires a sample object/file to exist in the object storage for `ping` operations; default is `heliograph` ``` # Prod diff --git a/markdown/documentation/score/installation/installation.md b/markdown/documentation/score/installation/installation.md index 1e4b8569..56f78374 100644 --- a/markdown/documentation/score/installation/installation.md +++ b/markdown/documentation/score/installation/installation.md @@ -36,7 +36,7 @@ Create these buckets and remember both their IDs, as they will be required as in 4. Make sure to note the **URL**, **access key**, and **secret key** used to access your storage service, as these are also required as input during Score configuration. **Keep these values safe and secure**. -5. If specifically using Amazon S3, makes ure to note the geographic **Region** where you have configured your buckets to be stored, as this is also required as an input during Score configuration. +5. If specifically using Amazon S3, make sure to note the geographic **Region** where you have configured your buckets to be stored, as this is also required as an input during Score configuration. If assistance is required, you may need to contact support with your specific storage provider. From 88b35bdee1d76c8605b16e24d754c923b6dfed26 Mon Sep 17 00:00:00 2001 From: Brandon Date: Thu, 13 May 2021 02:39:27 -0400 Subject: [PATCH 09/10] review updates --- .../score/installation/authentication.md | 57 ++++----- .../configuration/object-storage.md | 58 +++++---- .../installation/configuration/profiles.md | 113 ++++++++---------- .../score/installation/configuration/song.md | 11 +- 4 files changed, 109 insertions(+), 130 deletions(-) diff --git a/markdown/documentation/score/installation/authentication.md b/markdown/documentation/score/installation/authentication.md index bab48dd8..90597ad1 100644 --- a/markdown/documentation/score/installation/authentication.md +++ b/markdown/documentation/score/installation/authentication.md @@ -21,37 +21,30 @@ To configure authentication and authorization via Ego, in the `score-server-[ver | Section | Setting | Requirement | Description | |---------|---------|-------------|-------------| -| `server` | `url` | Required | URL to the Ego API endpoint that is used to authenticate a user's API key (token). Specify the host and port where the endpoint is hosted. The endpoint to use is `/oauth/check_token`. See the example below for guidance. | -| | `tokenName` | Required | Name used to identify a token. Typically you should leave this set to the default value, `token`. | -| | `clientId` | Required | This is the client ID for the Score application as configured in Ego. | -| | `clientSecret` | Required | This is the client secret for the Score application as configured in Ego. | -| `server -> scope -> download` | `system` | Required | Scope (permission) that a user's API key must have to enable system-level downloads from Score. Typically you should leave this set to the default value, `score.READ`. | -| `server -> scope -> download -> study` | `prefix` | Required | Prefix that must come before the Song study name when assigning study-level download scopes (permissions) for Score. Typically you should leave this set to the default value, `score.`. | -| | `suffix` | Required | Suffix that must come after the Song study name when assigning study-level download scopes (permissions) for Score. Typically you should leave this set to the default value, `.READ`. | -| `server -> scope -> upload` | `system` | Required | Scope (permission) that a user's API key must have to enable system-level uploads to Score. Typically you should leave this set to the default value, `score.READ`. | -| `server -> scope -> upload -> study` | `prefix` | Required | Prefix that must come before the Song study name when assigning study-level upload scopes (permissions) for Score. Typically you should leave this set to the default value, `score.`. | -| | `suffix` | Required | Suffix that must come after the Song study name when assigning study-level upload scopes (permissions) for Score. Typically you should leave this set to the default value, `.READ`. | +| `auth.server.url` | Required | URL to the Ego API endpoint that is used to authenticate a user's API key (token). Specify the host and port where the endpoint is hosted. The endpoint to use is `/oauth/check_token`. See the example below for guidance. | +| `auth.server.tokenName` | Required | Name used to identify a token. Typically you should leave this set to the default value, `token`. | +| `auth.server.clientId` | Required | This is the client ID for the Score application as configured in Ego. | +| `auth.server.clientSecret` | Required | This is the client secret for the Score application as configured in Ego. | +| `auth.server.scope.download.system` | Required | Scope (permission) that a user's API key must have to enable system-level downloads from Score. Typically you should leave this set to the default value, `score.READ`. | +| `auth.server.scope.download.study.prefixprefix` | Required | Prefix that must come before the Song study name when assigning study-level download scopes (permissions) for Score. Typically you should leave this set to the default value, `score.`. | +| `auth.server.scope.download.study.suffix` | Required | Suffix that must come after the Song study name when assigning study-level download scopes (permissions) for Score. Typically you should leave this set to the default value, `.READ`. | +| `auth.server.scope.upload.system` | Required | Scope (permission) that a user's API key must have to enable system-level uploads to Score. Typically you should leave this set to the default value, `score.READ`. | +| `auth.server.scope.upload.study.prefix` | Required | Prefix that must come before the Song study name when assigning study-level upload scopes (permissions) for Score. Typically you should leave this set to the default value, `score.`. | +| `auth.server.scope.upload.study.suffix` | Required | Suffix that must come after the Song study name when assigning study-level upload scopes (permissions) for Score. Typically you should leave this set to the default value, `.READ`. | For example: -``` yaml -auth: - server: - url: "https://:/oauth/check_token" - tokenName: "token" - clientId: - clientSecret: - scope: - download: - system: "score.READ" - study: - prefix: "score." - suffix: ".READ" - upload: - system: "score.WRITE" - study: - prefix: "score." - suffix: ".WRITE" +```shell +auth.server.url: "https://localhost:8081/oauth/check_token" +auth.server.tokenName: "token" +auth.server.clientId: "" +auth.server.clientSecret: "" +auth.server.scope.download.system: "score.READ:" +auth.server.scope.download.study.prefix: "score." +auth.server.scope.download.study.suffix: ".READ" +auth.server.scope.upload.system: "score.WRITE" +auth.server.scope.upload.study.prefix: "score." +auth.server.scope.upload.study.suffix: ".WRITE" ``` # JWT Profile Example @@ -62,12 +55,10 @@ To make use of JWT authentication, in the `score-server-[version]/conf/applicati | Setting | Requirement | Description | |---------|-------------|-------------| -| `publicKeyUrl` | Required | URL to the Ego API endpoint that is used to retrieve a user's public key . Specify the host and port where the endpoint is hosted. The endpoint to use is `/oauth/token/public_key`. See the example below for guidance. | +| `auth.jwt.publicKeyUrl` | Required | URL to the Ego API endpoint that is used to retrieve a user's public key . Specify the host and port where the endpoint is hosted. The endpoint to use is `/oauth/token/public_key`. See the example below for guidance. | For example: -```yaml -auth: - jwt: - publicKeyUrl: "https://:/oauth/token/public_key" +```shell +auth.jwt.publicKeyUrl: "https://:/oauth/token/public_key" ``` \ No newline at end of file diff --git a/markdown/documentation/score/installation/configuration/object-storage.md b/markdown/documentation/score/installation/configuration/object-storage.md index 863fb937..5ac63d84 100644 --- a/markdown/documentation/score/installation/configuration/object-storage.md +++ b/markdown/documentation/score/installation/configuration/object-storage.md @@ -17,17 +17,18 @@ To connect Score to AWS, Ceph, or Minio storage, in the `score-server-[version]/ | Setting | Requirement | Description | |---------|-------------|-------------| -| `s3 -> endpoint` | Required | URL of the storage service's API endpoint. Score will send requests to this URL when interacting with the service's API. | -| `s3 -> accessKey` | Required | Access key required to access the buckets in your object storage. You should have recorded this as part of your prequisite setup. | -| `s3 -> secretKey` | Required | Secret key required to access the buckets in your object storage. You should have recorded this as part of your prequisite setup. | -| `s3 -> sigV4Enabled` | Required | If your storage service uses the AWS S3 [Signature Version 4](https://docs.aws.amazon.com/general/latest/gr/signature-version-4.html) signing process for authentication, set this to `true`. Else set this to `false`. | -| `bucket -> name.object` | Required | ID of the bucket used to store object data for Score. You should have recorded this as part of your prequisite setup. | -| `bucket -> name.state` | Required | ID of the bucket used to store and maintain state information for Score. You should have recorded this as part of your prequisite setup. | -| `upload -> partsize` | Required | Size, expressing in bytes, of each part or chunk to upload at once to the object storage. You can use this parameter to adjust for your desired speed and performance. | -| `upload -> retry.limit` | Required | Number of times that Score will retry failed upload requests to the object storage before aborting. | -| `upload -> connectionltimeout` | Required | Number of milliseconds that Score will wait on a stale or idle connection to the object storage before timing out. | -| `upload -> clean.cron` | Optional | If `clean.enabled` = `true`, use this parameter to configure the schedule on which the cleanup cron job runs. The cleanup job cleans up old upload jobs, between Score and the object storage. | -| `upload -> clean.enabled` | Optional | Set to `true` if you want to run a cron job that cleans up old upload jobs between Score and the object storage. If `true`, the cron schedule can be set with the `clean.cron` parameter. Else set this value `false` if you do not want any cleanup. | +| `s3.endpoint` | Required | URL of the storage service's API endpoint. Score will send requests to this URL when interacting with the service's API. | +| `s3.accessKey` | Required | Access key required to access the buckets in your object storage. You should have recorded this as part of your prequisite setup. | +| `s3.secretKey` | Required | Secret key required to access the buckets in your object storage. You should have recorded this as part of your prequisite setup. | +| `s3.sigV4Enabled` | Required | If your storage service uses the AWS S3 [Signature Version 4](https://docs.aws.amazon.com/general/latest/gr/signature-version-4.html) signing process for authentication, set this to `true`. Else set this to `false`. | +| `bucket.name.object` | Required | ID of the bucket used to store object data for Score. You should have recorded this as part of your prequisite setup. | +| `bucket.name.state` | Required | ID of the bucket used to store and maintain state information for Score. You should have recorded this as part of your prequisite setup. | +| `upload.partsize` | Required | Size, expressed in bytes, of each part or chunk to upload at once to the object storage. You can use this parameter to adjust for your desired speed and performance. | +| `upload.retry.limit` | Required | Number of times that Score will retry failed upload requests to the object storage before aborting. | +| `upload.connectionltimeout` | Required | Number of milliseconds that Score will wait on a stale or idle connection to the object storage before timing out. | +| `upload.clean.cron` | Optional | If `clean.enabled` = `true`, use this parameter to configure the schedule on which the cleanup cron job runs. The cleanup job cleans up old upload jobs, between Score and the object storage. | +| `upload.clean.enabled` | Optional | Set to `true` if you want to run a cron job that cleans up old upload jobs between Score and the object storage. If `true`, the cron schedule can be set with the `clean.cron` parameter. Else set this value `false` if you do not want any cleanup. | +| `object.sentinel` | Required | Name of the sample object/file that must exist in object storage for Score to perform `ping` operations. Default is `heliograph.` | For example: @@ -58,23 +59,30 @@ To connect Score to Microsoft Azure storage, in the `score-server-[version]/conf | Setting | Requirement | Description | |---------|-------------|-------------| -| `azure -> endpointProtocol` | Required | Indicates the communication protocol used by the Azure storage service's API endpoint. For example, `https`. | -| `azure -> accountName` | Required | Account name required to access your Azure objectd storage. You should have recorded this as part of your prequisite setup. | -| `azure -> accountKey` | Required | Account key required to access your Azure objectd storage. You should have recorded this as part of your prequisite setup. | -| `bucket -> name.object` | Required | ID of the bucket used to store object data for Score. You should have recorded this as part of your prequisite setup. | -| `upload -> partsize` | Required | Size, expressing in bytes, of each part or chunk to upload at once to the object storage. You can use this parameter to adjust for your desired speed and performance. | +| `azure.endpointProtocol` | Required | Indicates the communication protocol used by the Azure storage service's API endpoint. For example, `https`. | +| `azure.accountName` | Required | Account name required to access your Azure object storage. You should have recorded this as part of your prequisite setup. | +| `azure.accountKey` | Required | Account key required to access your Azure object storage. You should have recorded this as part of your prequisite setup. | +| `bucket.name.object` | Required | ID of the bucket used to store object data for Score. You should have recorded this as part of your prequisite setup. | +| `bucket.policy.upload` | Required | Name of the access policy to use for write/add/modify operations. | +| `bucket.policy.download` | Required | Name of the access policy for the read/list operations. | +| `upload.partsize` | Required | Size, expressed in bytes, of each part or chunk to upload at once to the object storage. You can use this parameter to adjust for your desired speed and performance. | +| `download.partsize` | Required | Size, expressed in bytes, of each part or chunk to download at once from the object storage. You can use this parameter to adjust for your desired speed and performance. | +| `object.sentinel` | Required | Name of the sample object/file that must exist in object storage for Score to perform `ping` operations. Default is `heliograph.` | For example: -```yaml -azure: - endpointProtocol: https - accountName: abc123 - accountKey: abc123 +```shell +azure.endpointProtocol: "https" +azure.accountName: "" +azure.accountKey: "" -bucket: - name.object: test_object_bucket +bucket.name.object: "" # Name of the bucket or container that will store the object data +bucket.policy.upload: "" # Name of the access policy to use for write/add/modify operations +bucket.policy.downolad: "" # Name of the access policy for the read/list operations -upload: - partsize: 1048576 +upload.partsize: 104587 + +download.partsize: 250000000 # Safe default part size for downloads + +object.sentinel: "heliograph" # Score requires a sample object/file to exist in the object storage for `ping` operations; default is `heliograph` ``` \ No newline at end of file diff --git a/markdown/documentation/score/installation/configuration/profiles.md b/markdown/documentation/score/installation/configuration/profiles.md index 89bfa87f..5d33cd73 100644 --- a/markdown/documentation/score/installation/configuration/profiles.md +++ b/markdown/documentation/score/installation/configuration/profiles.md @@ -4,12 +4,10 @@ title: Run Profiles Score uses [Spring Profiles](https://docs.spring.io/spring-boot/docs/1.2.0.M1/reference/html/boot-features-profiles.html) as a feature to manage the running of a Score server in different environments or when integrating with different services. For example, spring profiles allows different configuration settings to be applied depending on the type of object storage service being used. -During configuration, you will need to enable the active profiles in the `score-server-[version]/conf/application.properties` file. The active profiles to use for a particular configuration can be specified using the `profiles` argument which should be added at the start of the `spring` block, for example: +During configuration, you will need to enable the active profiles in the `score-server-[version]/conf/application.properties` file. The active profiles to use for a particular configuration can be specified using the `spring.profiles.active` property which should be added at the start of the properties file, for example: -```yaml -spring: - profiles: - active: "default,prod,secure,jwt" +```shell +spring.profiles.active: "default,prod,secure,jwt" ``` Descriptions of the profiles available to Score are provided below. Depending on the type of configuration, some profiles are required to run and some are optional. @@ -28,27 +26,24 @@ The `default` profile is required if using AWS, Ceph, or Minio as your object st For example: -```yaml -s3: - endpoint: "http://localhost:9000" - accessKey: abc123 - secretKey: abc123 - sigV4Enabled: true - -bucket: - name.object: test_object_bucket - name.state: test_state_bucket - size.pool: 0 - size.key: 2 - -upload: - partsize: 1048576 - retry.limit: 10 - connection.timeout: 60000 - clean.cron: “0 0 0 * * ?” - clean.enabled: true - -object.sentinel: heliograph # Score requires a sample object/file to exist in the object storage for `ping` operations; default is `heliograph` +```shell +s3.endpoint: "http://localhost:9000" +s3.accessKey: "abc123" +s3.secretKey: "abc123" +s3.sigV4Enabled: true + +bucket.name.object: "test_object_bucket" +bucket.name.state: "test_state_bucket" +bucket.size.pool: 0 +bucket.size.key: 2 + +upload.partsize: 1048576 +upload.retry.limit: 10 +upload.connection.timeout: 60000 +upload.clean.cron: "0 0 0 * * ?" +upload.clean.enabled: true + +object.sentinel: "heliograph" # Score requires a sample object/file to exist in the object storage for `ping` operations; default is `heliograph` ``` # Azure @@ -57,24 +52,20 @@ The `azure` profile is required if using Microsoft Azure storage as your object For example: -```yaml -azure: - endpointProtocol: https - accountName: - accountKey: +```shell +azure.endpointProtocol: "https" +azure.accountName: "" +azure.accountKey: "" -bucket: - name.object: # Name of the bucket or container that will store the object data - policy.upload: # Name of the access policy to use for write/add/modify operations - policy.downolad: # Name of the access policy for the read/list operations +bucket.name.object: "" # Name of the bucket or container that will store the object data +bucket.policy.upload: "" # Name of the access policy to use for write/add/modify operations +bucket.policy.downolad: "" # Name of the access policy for the read/list operations -upload: - partsize: 104587 +upload.partsize: 104587 -download: - partsize: 250000000 # Safe default part size for downloads +download.partsize: 250000000 # Safe default part size for downloads -object.sentinel: heliograph # Score requires a sample object/file to exist in the object storage for `ping` operations; default is `heliograph` +object.sentinel: "heliograph" # Score requires a sample object/file to exist in the object storage for `ping` operations; default is `heliograph` ``` # Prod @@ -83,10 +74,9 @@ The `prod` profile is used to enable production deployments and most importantly For example: -```yaml -metadata: - url: "http://localhost:8089/" - ssl.enabled: false +```shell +metadata.url: "http://localhost:8089/" +metadata.ssl.enabled: false ``` # Secure @@ -95,24 +85,17 @@ The `secure` profile is required if the [Overture](https://overture.bio) product For example: -``` yaml -auth: - server: - url: https://localhost:8081/oauth/check_token - tokenName: token - clientId: score - clientSecret: scoresecret - scope: - download: - system: score.READ - study: - prefix: score. - suffix: .READ - upload: - system: score.WRITE - study: - prefix: score. - suffix: .WRITE +```shell +auth.server.url: "https://localhost:8081/oauth/check_token" +auth.server.tokenName: "token" +auth.server.clientId: "score" +auth.server.clientSecret: "scoresecret" +auth.server.scope.download.system: "score.READ:" +auth.server.scope.download.study.prefix: "score." +auth.server.scope.download.study.suffix: ".READ" +auth.server.scope.upload.system: "score.WRITE" +auth.server.scope.upload.study.prefix: "score." +auth.server.scope.upload.study.suffix: ".WRITE" ``` # JWT @@ -121,8 +104,6 @@ The `jwt` profile can be optionally used if you want to support both JWT and API For example: -```yaml -auth: - jwt: - publicKeyUrl: "https://localhost:8443/oauth/token/public_key" +```shell +auth.jwt.publicKeyUrl: "https://localhost:8443/oauth/token/public_key" ``` \ No newline at end of file diff --git a/markdown/documentation/score/installation/configuration/song.md b/markdown/documentation/score/installation/configuration/song.md index d1bca89d..602d56e8 100644 --- a/markdown/documentation/score/installation/configuration/song.md +++ b/markdown/documentation/score/installation/configuration/song.md @@ -12,13 +12,12 @@ To connect Score to your Song server, in the `score-server-[version]/conf/applic | Setting | Requirement | Description | |---------|-------------|-------------| -| `url` | Required | URL to the Song server API that you have setup. When communicating with Song, Score will make requests via this API. | -| `ssl.enabled` | Optional | If using SSL encryption to securely communicate with Song, set this to `true`. Else if not using SSL, set this to `false.` | +| `metadata.url` | Required | URL to the Song server API that you have setup. When communicating with Song, Score will make requests via this API. | +| `metadata.ssl.enabled` | Optional | If using SSL encryption to securely communicate with Song, set this to `true`. Else if not using SSL, set this to `false.` | For example: -```yaml -metadata: - url: "http://localhost:8089/" - ssl.enabled: false +```shell +metadata.url: "http://localhost:8089/" +metadata.ssl.enabled: false ``` \ No newline at end of file From 42a91f42ef9e1e2fdb207981cb702c5488cb094c Mon Sep 17 00:00:00 2001 From: Brandon Date: Thu, 13 May 2021 12:28:35 -0400 Subject: [PATCH 10/10] final review updates --- .../score/installation/authentication.md | 22 ++--- .../installation/configuration/bootstrap.md | 4 +- .../configuration/object-storage.md | 53 ++++++------ .../installation/configuration/profiles.md | 80 +++++++++---------- .../score/installation/configuration/song.md | 4 +- 5 files changed, 80 insertions(+), 83 deletions(-) diff --git a/markdown/documentation/score/installation/authentication.md b/markdown/documentation/score/installation/authentication.md index 90597ad1..cb66f2e1 100644 --- a/markdown/documentation/score/installation/authentication.md +++ b/markdown/documentation/score/installation/authentication.md @@ -35,16 +35,16 @@ To configure authentication and authorization via Ego, in the `score-server-[ver For example: ```shell -auth.server.url: "https://localhost:8081/oauth/check_token" -auth.server.tokenName: "token" -auth.server.clientId: "" -auth.server.clientSecret: "" -auth.server.scope.download.system: "score.READ:" -auth.server.scope.download.study.prefix: "score." -auth.server.scope.download.study.suffix: ".READ" -auth.server.scope.upload.system: "score.WRITE" -auth.server.scope.upload.study.prefix: "score." -auth.server.scope.upload.study.suffix: ".WRITE" +auth.server.url="https://localhost:8081/oauth/check_token" +auth.server.tokenName="token" +auth.server.clientId="" +auth.server.clientSecret="" +auth.server.scope.download.system="score.READ:" +auth.server.scope.download.study.prefix="score." +auth.server.scope.download.study.suffix=".READ" +auth.server.scope.upload.system="score.WRITE" +auth.server.scope.upload.study.prefix="score." +auth.server.scope.upload.study.suffix=".WRITE" ``` # JWT Profile Example @@ -60,5 +60,5 @@ To make use of JWT authentication, in the `score-server-[version]/conf/applicati For example: ```shell -auth.jwt.publicKeyUrl: "https://:/oauth/token/public_key" +auth.jwt.publicKeyUrl="https://:/oauth/token/public_key" ``` \ No newline at end of file diff --git a/markdown/documentation/score/installation/configuration/bootstrap.md b/markdown/documentation/score/installation/configuration/bootstrap.md index 4d818607..d2d9ba49 100644 --- a/markdown/documentation/score/installation/configuration/bootstrap.md +++ b/markdown/documentation/score/installation/configuration/bootstrap.md @@ -20,8 +20,8 @@ $ cd $SCORE_SERVER_HOME/conf For example: -```yaml -spring.cloud.vault.enabled=false +```shell +spring.cloud.vault.enabled="false" ``` 3. Save the file. \ No newline at end of file diff --git a/markdown/documentation/score/installation/configuration/object-storage.md b/markdown/documentation/score/installation/configuration/object-storage.md index 5ac63d84..088ae3d5 100644 --- a/markdown/documentation/score/installation/configuration/object-storage.md +++ b/markdown/documentation/score/installation/configuration/object-storage.md @@ -32,25 +32,22 @@ To connect Score to AWS, Ceph, or Minio storage, in the `score-server-[version]/ For example: -```yaml -s3: - endpoint: "http://localhost:9000" - accessKey: abc123 - secretKey: abc123 - sigV4Enabled: true - -bucket: - name.object: test_object_bucket - name.state: test_state_bucket - size.pool: 0 - size.key: 2 - -upload: - partsize: 1048576 - retry.limit: 10 - connection.timeout: 60000 - clean.cron: “0 0 0 * * ?” - clean.enabled: true +```shell +s3.endpoint="http://localhost:9000" +s3.accessKey="abc123" +s3.secretKey="abc123" +s3.sigV4Enabled="true" + +bucket.name.object="test_object_bucket" +bucket.name.state="test_state_bucket": +bucket.size.pool=0 +bucket.size.key=2 + +upload.partsize=1048576 +upload.retry.limit=10 +upload.connection.timeout=60000 +upload.clean.cron="0 0 0 * * ?" +upload.clean.enabled="true" ``` # Azure Profile Example @@ -72,17 +69,17 @@ To connect Score to Microsoft Azure storage, in the `score-server-[version]/conf For example: ```shell -azure.endpointProtocol: "https" -azure.accountName: "" -azure.accountKey: "" +azure.endpointProtocol="https" +azure.accountName="" +azure.accountKey="" -bucket.name.object: "" # Name of the bucket or container that will store the object data -bucket.policy.upload: "" # Name of the access policy to use for write/add/modify operations -bucket.policy.downolad: "" # Name of the access policy for the read/list operations +bucket.name.object="" # Name of the bucket or container that will store the object data +bucket.policy.upload="" # Name of the access policy to use for write/add/modify operations +bucket.policy.downolad="" # Name of the access policy for the read/list operations -upload.partsize: 104587 +upload.partsize=104587 -download.partsize: 250000000 # Safe default part size for downloads +download.partsize=250000000 # Safe default part size for downloads -object.sentinel: "heliograph" # Score requires a sample object/file to exist in the object storage for `ping` operations; default is `heliograph` +object.sentinel="heliograph" # Score requires a sample object/file to exist in the object storage for `ping` operations; default is `heliograph` ``` \ No newline at end of file diff --git a/markdown/documentation/score/installation/configuration/profiles.md b/markdown/documentation/score/installation/configuration/profiles.md index 5d33cd73..f9537730 100644 --- a/markdown/documentation/score/installation/configuration/profiles.md +++ b/markdown/documentation/score/installation/configuration/profiles.md @@ -7,7 +7,7 @@ Score uses [Spring Profiles](https://docs.spring.io/spring-boot/docs/1.2.0.M1/re During configuration, you will need to enable the active profiles in the `score-server-[version]/conf/application.properties` file. The active profiles to use for a particular configuration can be specified using the `spring.profiles.active` property which should be added at the start of the properties file, for example: ```shell -spring.profiles.active: "default,prod,secure,jwt" +spring.profiles.active="default,prod,secure,jwt" ``` Descriptions of the profiles available to Score are provided below. Depending on the type of configuration, some profiles are required to run and some are optional. @@ -27,23 +27,23 @@ The `default` profile is required if using AWS, Ceph, or Minio as your object st For example: ```shell -s3.endpoint: "http://localhost:9000" -s3.accessKey: "abc123" -s3.secretKey: "abc123" -s3.sigV4Enabled: true - -bucket.name.object: "test_object_bucket" -bucket.name.state: "test_state_bucket" -bucket.size.pool: 0 -bucket.size.key: 2 - -upload.partsize: 1048576 -upload.retry.limit: 10 -upload.connection.timeout: 60000 -upload.clean.cron: "0 0 0 * * ?" -upload.clean.enabled: true - -object.sentinel: "heliograph" # Score requires a sample object/file to exist in the object storage for `ping` operations; default is `heliograph` +s3.endpoint="http://localhost:9000" +s3.accessKey="abc123" +s3.secretKey="abc123" +s3.sigV4Enabled="true" + +bucket.name.object="test_object_bucket" +bucket.name.state="test_state_bucket" +bucket.size.pool=0 +bucket.size.key=2 + +upload.partsize=1048576 +upload.retry.limit=10 +upload.connection.timeout=60000 +upload.clean.cron="0 0 0 * * ?" +upload.clean.enabled="true" + +object.sentinel="heliograph" # Score requires a sample object/file to exist in the object storage for `ping` operations; default is `heliograph` ``` # Azure @@ -53,19 +53,19 @@ The `azure` profile is required if using Microsoft Azure storage as your object For example: ```shell -azure.endpointProtocol: "https" -azure.accountName: "" -azure.accountKey: "" +azure.endpointProtocol="https" +azure.accountName="" +azure.accountKey="" -bucket.name.object: "" # Name of the bucket or container that will store the object data -bucket.policy.upload: "" # Name of the access policy to use for write/add/modify operations -bucket.policy.downolad: "" # Name of the access policy for the read/list operations +bucket.name.object="" # Name of the bucket or container that will store the object data +bucket.policy.upload="" # Name of the access policy to use for write/add/modify operations +bucket.policy.downolad="" # Name of the access policy for the read/list operations -upload.partsize: 104587 +upload.partsize=104587 -download.partsize: 250000000 # Safe default part size for downloads +download.partsize=250000000 # Safe default part size for downloads -object.sentinel: "heliograph" # Score requires a sample object/file to exist in the object storage for `ping` operations; default is `heliograph` +object.sentinel="heliograph" # Score requires a sample object/file to exist in the object storage for `ping` operations; default is `heliograph` ``` # Prod @@ -75,8 +75,8 @@ The `prod` profile is used to enable production deployments and most importantly For example: ```shell -metadata.url: "http://localhost:8089/" -metadata.ssl.enabled: false +metadata.url="http://localhost:8089/" +metadata.ssl.enabled="false" ``` # Secure @@ -86,16 +86,16 @@ The `secure` profile is required if the [Overture](https://overture.bio) product For example: ```shell -auth.server.url: "https://localhost:8081/oauth/check_token" -auth.server.tokenName: "token" -auth.server.clientId: "score" -auth.server.clientSecret: "scoresecret" -auth.server.scope.download.system: "score.READ:" -auth.server.scope.download.study.prefix: "score." -auth.server.scope.download.study.suffix: ".READ" -auth.server.scope.upload.system: "score.WRITE" -auth.server.scope.upload.study.prefix: "score." -auth.server.scope.upload.study.suffix: ".WRITE" +auth.server.url="https://localhost:8081/oauth/check_token" +auth.server.tokenName="token" +auth.server.clientId="score" +auth.server.clientSecret="scoresecret" +auth.server.scope.download.system="score.READ:" +auth.server.scope.download.study.prefix="score." +auth.server.scope.download.study.suffix=".READ" +auth.server.scope.upload.system="score.WRITE" +auth.server.scope.upload.study.prefix="score." +auth.server.scope.upload.study.suffix=".WRITE" ``` # JWT @@ -105,5 +105,5 @@ The `jwt` profile can be optionally used if you want to support both JWT and API For example: ```shell -auth.jwt.publicKeyUrl: "https://localhost:8443/oauth/token/public_key" +auth.jwt.publicKeyUrl="https://localhost:8443/oauth/token/public_key" ``` \ No newline at end of file diff --git a/markdown/documentation/score/installation/configuration/song.md b/markdown/documentation/score/installation/configuration/song.md index 602d56e8..9ade0b1f 100644 --- a/markdown/documentation/score/installation/configuration/song.md +++ b/markdown/documentation/score/installation/configuration/song.md @@ -18,6 +18,6 @@ To connect Score to your Song server, in the `score-server-[version]/conf/applic For example: ```shell -metadata.url: "http://localhost:8089/" -metadata.ssl.enabled: false +metadata.url="http://localhost:8089/" +metadata.ssl.enabled="false" ``` \ No newline at end of file