From 5ff1fde189cc138e7e3daa36c5a69c2df236e7e6 Mon Sep 17 00:00:00 2001 From: Jon Eubank Date: Thu, 29 Aug 2024 11:17:29 -0400 Subject: [PATCH 1/9] Strictly define package exported functions, types, and files --- packages/client/package.json | 4 +- .../src/changeAnalysis/changeAnalyzer.ts | 10 +- packages/client/src/index.ts | 75 ++++++++++- packages/client/src/processing/index.ts | 20 ++- packages/client/src/rest/index.ts | 127 +++++------------- packages/client/test/changeAnalyzer.spec.ts | 7 +- packages/dictionary/package.json | 3 + packages/validation/package.json | 3 + 8 files changed, 132 insertions(+), 117 deletions(-) diff --git a/packages/client/package.json b/packages/client/package.json index 8a248406..4cd8b71f 100644 --- a/packages/client/package.json +++ b/packages/client/package.json @@ -1,11 +1,11 @@ { "name": "@overture-stack/lectern-client", - "version": "1.5.0", + "version": "0.1.0-beta.1", "files": [ "dist/" ], "main": "./dist/index.js", - "typings": "./dist/index.d.ts", + "types": "./dist/index.d.ts", "description": "TypeScript client to interact with Lectern servers and perform data validation versus Lectern dictionaries.", "scripts": { "build": "rimraf dist && tsc -p tsconfig.build.json", diff --git a/packages/client/src/changeAnalysis/changeAnalyzer.ts b/packages/client/src/changeAnalysis/changeAnalyzer.ts index 55f50cae..c65a4f9f 100644 --- a/packages/client/src/changeAnalysis/changeAnalyzer.ts +++ b/packages/client/src/changeAnalysis/changeAnalyzer.ts @@ -17,14 +17,8 @@ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -import { - DictionaryDiff, - FieldChanges, - RestrictionRange, - SchemaField, - ValueChange, -} from '@overture-stack/lectern-dictionary'; -import { restClient } from '../rest'; +import { DictionaryDiff, FieldChanges, SchemaField, ValueChange } from '@overture-stack/lectern-dictionary'; +import * as restClient from '../rest'; import { ChangeAnalysis, RestrictionChanges } from './changeAnalysisTypes'; const isValueChange = (input: FieldChanges): input is ValueChange => ValueChange.safeParse(input).success; diff --git a/packages/client/src/index.ts b/packages/client/src/index.ts index 0013e312..9cd53360 100644 --- a/packages/client/src/index.ts +++ b/packages/client/src/index.ts @@ -17,6 +17,75 @@ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -export * as analyzer from './changeAnalysis'; -export * as functions from './processing'; -export { restClient } from './rest'; +// Exporting zod Schemas and types for the basic dictionary components and data types +export { + DataRecord, + DataRecordValue, + UnprocessedDataRecord, + ArrayDataValue, + SingleDataValue, + Dictionary, + Schema, + SchemaField, + Result, +} from '@overture-stack/lectern-dictionary'; +import * as _validation from '@overture-stack/lectern-validation'; + +// Exporting all result types used in the parsing and validation functions +export type { + TestResult, + TestResultValid, + TestResultInvalid, + SchemaRecordError, + DictionaryValidationError, + FieldValidationError, + ParseDictionaryData, + ParseDictionaryFailure, + ParseDictionaryResult, + ParseSchemaError, + ParseSchemaFailureData, + ParseSchemaResult, + ParseRecordResult, +} from '@overture-stack/lectern-validation'; + +import * as _processing from './processing'; +export * as rest from './rest'; + +/** + * Processing functions will perform both data parsing and validation and then return an + * object with the parsed data and a list of any errors encountered. + * + * The available processing functions are concerned with data at different scales: + * - processRecord: will process a single data record using schema definition + * - processSchema: will process a collection of data records using a single schema definition + * - processDicitonary: will process multiple collecitons of data records, each vs a different + * schema definition that is found in a dictionary definition. + */ +export const processing = _processing; + +/** + * Validation functions will perform all restriction tests on data objects. The result will indicate + * if all tests passed or if there were some failures, and return a list of the failures that occurred. + */ +export const validation = { + validateField: _validation.validateField, + validateRecord: _validation.validateRecord, + validateSchema: _validation.validateSchema, + validateDictionary: _validation.validateDictionary, +}; + +/** + * Parsing functions will convert the an object with string values into a new object with all values properly typed + * to match the data types from a schema definition. This parsing process will convert values to numbers, booleans, + * and arrays, as required. String values may also be cleaned up to trim whitespace and match the casing of codeList + * values. + * + * The parsing functions will return a Result object that will indicate if parsing was successful or if there were + * string values that could not be converted to the required data types. + */ +export const parse = { + parseFieldValue: _validation.parseFieldValue, + parseRecordValues: _validation.parseRecordValues, + parseSchemaValues: _validation.parseSchemaValues, + parseDictionaryValues: _validation.parseDictionaryValues, +}; diff --git a/packages/client/src/processing/index.ts b/packages/client/src/processing/index.ts index 28f6894b..015f184b 100644 --- a/packages/client/src/processing/index.ts +++ b/packages/client/src/processing/index.ts @@ -28,6 +28,18 @@ import { const L = loggerFor(__filename); +/** + * Process data from multiple schemas for a dictionary. + * + * Parse and then validate collections of data records, with each collection belonging to a different schema. + * The data argument is an object where each key is a schema name and each element an array of data records + * that belong to that schema type. If there are errors found during conversion, + * those errors will be returned and validation will be skipped. The final result will indicate if the + * data processing attempt was successful, or failed due to errors during parsing or validation. + * @param data + * @param dictionary + * @returns + */ export const processDictionary = ( data: Record, dictionary: Dictionary, @@ -68,7 +80,9 @@ export const processDictionary = ( /** * Process a list of records for a single schema. * - * Parse and then validate each record in the list. + * Parse and then validate each record in the list. If there are errors found during conversion, + * those errors will be returned and validation will be skipped. The final result will indicate if the + * data processing attempt was successful, or failed due to errors during parsing or validation. * @param dictionary * @param definition * @param records @@ -106,9 +120,9 @@ export const processSchema = (records: UnprocessedDataRecord[], schema: Schema): * * Parse and then validate a data record. If there are errors found during conversion, * those errors will be returned and validation will be skipped. The final result will indicate if the - * data processing attempt was successful, or failed due to errors in conversion or validation. + * data processing attempt was successful, or failed due to errors during parsing or validation. */ -export const processRecord = (schema: Schema, data: UnprocessedDataRecord): RecordProcessingResult => { +export const processRecord = (data: UnprocessedDataRecord, schema: Schema): RecordProcessingResult => { const parseResult = validation.parseRecordValues(data, schema); if (!parseResult.success) { diff --git a/packages/client/src/rest/index.ts b/packages/client/src/rest/index.ts index 0e49d188..ea71a6ae 100644 --- a/packages/client/src/rest/index.ts +++ b/packages/client/src/rest/index.ts @@ -27,57 +27,8 @@ import { import fetch from 'node-fetch'; import promiseTools from 'promise-tools'; import { loggerFor } from '../logger'; -const L = loggerFor(__filename); - -export interface SchemaServiceRestClient { - fetchSchema(schemaSvcUrl: string, name: string, version: string): Promise; - fetchDiff(schemaSvcUrl: string, name: string, fromVersion: string, toVersion: string): Promise; -} - -export const restClient: SchemaServiceRestClient = { - fetchSchema: async (schemaSvcUrl: string, name: string, version: string): Promise => { - // for testing where we need to work against stub schema - if (schemaSvcUrl.startsWith('file://')) { - return await loadSchemaFromFile(version, schemaSvcUrl, name); - } - if (!schemaSvcUrl) { - throw new Error('please configure a valid url to get schema from'); - } - const url = `${schemaSvcUrl}/dictionaries?name=${name}&version=${version}`; - try { - L.debug(`in fetch live schema ${version}`); - const schemaDictionary = await doRequest(url); - // todo validate response and map it to a schema - return schemaDictionary[0] as Dictionary; - } catch (error: unknown) { - L.error(`failed to fetch schema at url: ${url} - ${unknownToString(error)}`); - throw error; - } - }, - fetchDiff: async ( - schemaSvcBaseUrl: string, - name: string, - fromVersion: string, - toVersion: string, - ): Promise => { - // TODO: Error handling (return result?) - const url = `${schemaSvcBaseUrl}/diff?name=${name}&left=${fromVersion}&right=${toVersion}`; - const diffResponse = await doRequest(url); - - const diffArray = DictionaryDiffArray.parse(diffResponse); - - const result: DictionaryDiff = new Map(); - for (const entry of diffArray) { - const fieldName = entry[0]; - if (entry[1]) { - const fieldDiff: FieldDiff = entry[1]; - result.set(fieldName, fieldDiff); - } - } - return result; - }, -}; +const L = loggerFor(__filename); const doRequest = async (url: string) => { let response: any; @@ -94,55 +45,37 @@ const doRequest = async (url: string) => { } }; -async function loadSchemaFromFile(version: string, schemaSvcUrl: string, name: string) { - L.debug(`in fetch stub schema ${version}`); - const result = delay(1000); - const dictionary = await result(() => { - const dictionaries: Dictionary[] = require(schemaSvcUrl.substring(7, schemaSvcUrl.length)) - .dictionaries as Dictionary[]; - if (!dictionaries) { - throw new Error('your mock json is not structured correctly, see sampleFiles/sample-schema.json'); - } - const dic = dictionaries.find((d: any) => d.version === version && d.name === name); - if (!dic) { - return undefined; - } - return dic; - }); - if (dictionary === undefined) { - throw new Error("couldn't load stub dictionary with the criteria specified"); +export const fetchSchema = async (schemaSvcUrl: string, name: string, version: string): Promise => { + const url = `${schemaSvcUrl}/dictionaries?name=${name}&version=${version}`; + try { + L.debug(`in fetch live schema ${version}`); + const schemaDictionary = await doRequest(url); + // todo validate response and map it to a schema + return schemaDictionary[0] as Dictionary; + } catch (error: unknown) { + L.error(`failed to fetch schema at url: ${url} - ${unknownToString(error)}`); + throw error; } - L.debug(`schema found ${dictionary.version}`); - return dictionary; -} +}; +export const fetchDiff = async ( + schemaSvcBaseUrl: string, + name: string, + fromVersion: string, + toVersion: string, +): Promise => { + // TODO: Error handling (return result?) + const url = `${schemaSvcBaseUrl}/diff?name=${name}&left=${fromVersion}&right=${toVersion}`; + const diffResponse = await doRequest(url); -async function loadDiffFromFile(schemaSvcBaseUrl: string, name: string, fromVersion: string, toVersion: string) { - L.debug(`in fetch stub diffs ${name} ${fromVersion} ${toVersion}`); - const result = delay(1000); - const diff = await result(() => { - const diffResponse = require(schemaSvcBaseUrl.substring(7, schemaSvcBaseUrl.length)).diffs as any[]; - if (!diffResponse) { - throw new Error('your mock json is not structured correctly, see sampleFiles/sample-schema.json'); - } + const diffArray = DictionaryDiffArray.parse(diffResponse); - const diff = diffResponse.find( - (d) => d.fromVersion === fromVersion && d.toVersion === toVersion && d.name === name, - ); - if (!diff) { - return undefined; + const result: DictionaryDiff = new Map(); + for (const entry of diffArray) { + const fieldName = entry[0]; + if (entry[1]) { + const fieldDiff: FieldDiff = entry[1]; + result.set(fieldName, fieldDiff); } - return diff; - }); - if (diff === undefined) { - throw new Error("couldn't load stub diff with the criteria specified, check your stub file"); } - return diff.data; -} - -function delay(milliseconds: number) { - return async (result: () => T | undefined) => { - return new Promise((resolve, reject) => { - setTimeout(() => resolve(result()), milliseconds); - }); - }; -} + return result; +}; diff --git a/packages/client/test/changeAnalyzer.spec.ts b/packages/client/test/changeAnalyzer.spec.ts index 426cd588..ee7d283b 100644 --- a/packages/client/test/changeAnalyzer.spec.ts +++ b/packages/client/test/changeAnalyzer.spec.ts @@ -17,10 +17,9 @@ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -import chai from 'chai'; import { DiffUtils } from '@overture-stack/lectern-dictionary'; -import { analyzer } from '../src'; -import { ChangeAnalysis } from '../src/changeAnalysis'; +import chai from 'chai'; +import { analyzeChanges, ChangeAnalysis } from '../src/changeAnalysis'; import diffResponse from './fixtures/diffResponse'; chai.should(); @@ -107,7 +106,7 @@ const expectedResult: ChangeAnalysis = { describe('changeAnalyzer', () => { it('categorize changes correctly', () => { - const result = analyzer.analyzeChanges(diffFixture); + const result = analyzeChanges(diffFixture); result.should.deep.eq(expectedResult); }); }); diff --git a/packages/dictionary/package.json b/packages/dictionary/package.json index 735c3212..d5ced1ca 100644 --- a/packages/dictionary/package.json +++ b/packages/dictionary/package.json @@ -2,6 +2,9 @@ "name": "@overture-stack/lectern-dictionary", "version": "0.1.0-beta.1", "description": "", + "files": [ + "dist/" + ], "main": "dist/index.js", "scripts": { "build": "pnpm build:clean && tsc", diff --git a/packages/validation/package.json b/packages/validation/package.json index 05723425..87bd0463 100644 --- a/packages/validation/package.json +++ b/packages/validation/package.json @@ -2,6 +2,9 @@ "name": "@overture-stack/lectern-validation", "version": "0.1.0-beta.1", "description": "Logic for validating data using a Lectern dictionary", + "files": [ + "dist/" + ], "main": "dist/index.js", "scripts": { "build": "pnpm build:clean && tsc -p ./tsconfig.build.json", From 53ab75e4b18baaa2dd93128e5dae139d52d28c63 Mon Sep 17 00:00:00 2001 From: Jon Eubank Date: Thu, 29 Aug 2024 12:01:21 -0400 Subject: [PATCH 2/9] Standardize package versions to `2.0.0-beta.1` --- apps/server/package.json | 2 +- packages/client/package.json | 2 +- packages/dictionary/package.json | 2 +- packages/validation/package.json | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/apps/server/package.json b/apps/server/package.json index b7c29473..50a0e09d 100644 --- a/apps/server/package.json +++ b/apps/server/package.json @@ -1,7 +1,7 @@ { "name": "@overture-stack/lectern-server", "private": true, - "version": "2.0.0-next.0", + "version": "2.0.0-beta.1", "description": "Overture Data Dictionary Management", "scripts": { "build": "tsc -p tsconfig.build.json", diff --git a/packages/client/package.json b/packages/client/package.json index 4cd8b71f..0c0a0fc2 100644 --- a/packages/client/package.json +++ b/packages/client/package.json @@ -1,6 +1,6 @@ { "name": "@overture-stack/lectern-client", - "version": "0.1.0-beta.1", + "version": "2.0.0-beta.1", "files": [ "dist/" ], diff --git a/packages/dictionary/package.json b/packages/dictionary/package.json index d5ced1ca..183116d4 100644 --- a/packages/dictionary/package.json +++ b/packages/dictionary/package.json @@ -1,6 +1,6 @@ { "name": "@overture-stack/lectern-dictionary", - "version": "0.1.0-beta.1", + "version": "2.0.0-beta.1", "description": "", "files": [ "dist/" diff --git a/packages/validation/package.json b/packages/validation/package.json index 87bd0463..39702720 100644 --- a/packages/validation/package.json +++ b/packages/validation/package.json @@ -1,6 +1,6 @@ { "name": "@overture-stack/lectern-validation", - "version": "0.1.0-beta.1", + "version": "2.0.0-beta.1", "description": "Logic for validating data using a Lectern dictionary", "files": [ "dist/" From beb20fb43990b75779b38f0ce72e26204628d2dc Mon Sep 17 00:00:00 2001 From: Jon Eubank Date: Thu, 29 Aug 2024 17:47:49 -0400 Subject: [PATCH 3/9] READMEs cleaned up for NPM publishing --- README.md | 16 +-- apps/server/README.md | 11 +- packages/client/README.md | 30 +++-- packages/dictionary/README.md | 14 ++- packages/validation/README.md | 43 ++++++- .../validation/docs/validation-reports.md | 107 ++++++++++++++++++ 6 files changed, 194 insertions(+), 27 deletions(-) create mode 100644 packages/validation/docs/validation-reports.md diff --git a/README.md b/README.md index eb4d01c5..c5d282c3 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # Lectern - Data Dictionary Management and Validation -[](http://slack.overture.bio) +[](http://slack.overture.bio) [](https://github.com/overture-stack/lectern/blob/develop/LICENSE) -[](CODE_OF_CONDUCT.md) +[](CODE_OF_CONDUCT.md) Lectern is Overture's Data Dictionary Schema Manager, providing a system for defining Schemas that will validate the structured data collected by an application. The core of Lectern is a web-server application that handles storage and version management of data dictionaries. Lectern data dictionaries are collections of schemas that define the structure of tabular data files (like TSV). This application provides functionality to validate the structure of data dictionaries, maintain a list of dictionary versions, and to compute the difference between dictionary versions. @@ -36,12 +36,12 @@ The modules in the monorepo are organized into two categories: * __packages/__ - Reusable packages shared between applications and other packages. Packages are published to [NPM](https://npmjs.com). * __scripts__ - Utility scripts for use within this repo. -| Component | Package Name | Path | Published Location | Description | -| --------------------------------------------------- | ---------------------------------- | ---------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| [Lectern Server](apps/server/README.md) | @overture-stack/lectern-server | apps/server/ | [![Lectern GHCR Packages](https://img.shields.io/badge/GHCR-lectern-brightgreen?style=for-the-badge&logo=github)](https://github.com/overture-stack/lectern/pkgs/container/lectern) | Lectern Server web application. | -| [Lectern Client](packages/client/README.md) | @overture-stack/lectern-client | packages/client | [![Lectern Client NPM Package](https://img.shields.io/npm/v/@overture-stack/lectern-client?color=%23cb3837&style=for-the-badge&logo=npm)](https://www.npmjs.com/package/@overture-stack/lectern-client) | TypeScript Client to interact with Lectern Server and Lectern data dictionaries. This library provides a REST client to assist in fetching data from the Lectern server. It also exposes the functionality from the Lectern Validation library to use a Lectern data dictionary to validate data. | -| [Lectern Dictionary](packages/dictionary/README.md) | | @overture-stack/lectern-dictionary | packages/dictionary/ | [![Lectern Client NPM Package](https://img.shields.io/npm/v/@overture-stack/lectern-dictionary?color=%23cb3837&style=for-the-badge&logo=npm)](https://www.npmjs.com/package/@overture-stack/lectern-dictionary) | Dictionary meta-schema definition, includes TS types, and Zod schemas. This also exports all utilities for getting the diff of two dictionaries. | -| [Lectern Validation](packages/validation/README.md) | @overture-stack/lectern-validation | packages/validation/ | [![Lectern Validation NPM Package](https://img.shields.io/npm/v/@overture-stack/lectern-client?color=%23cb3837&style=for-the-badge&logo=npm)](https://www.npmjs.com/package/@overture-stack/lectern-client) | Validate data using Lectern Dictionaries. | +| Component | Package Name | Path | Published Location | Description | +| --------------------------------------------------- | ---------------------------------- | ---------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| [Lectern Server](apps/server/README.md) | @overture-stack/lectern-server | apps/server/ | [![Lectern GHCR Packages](https://img.shields.io/badge/GHCR-lectern-brightgreen?style=for-the-badge&logo=github)](https://github.com/overture-stack/lectern/pkgs/container/lectern) | Lectern Server web application. | +| [Lectern Client](packages/client/README.md) | @overture-stack/lectern-client | packages/client | [![Lectern Client NPM Package](https://img.shields.io/npm/v/@overture-stack/lectern-client?color=%23cb3837&style=for-the-badge&logo=npm)](https://www.npmjs.com/package/@overture-stack/lectern-client) | TypeScript Client to interact with Lectern Server and Lectern data dictionaries. This library provides a REST client to assist in fetching data from the Lectern server. It also exposes the functionality from the Lectern Validation library to use a Lectern data dictionary to validate data. | +| [Lectern Dictionary](packages/dictionary/README.md) | | @overture-stack/lectern-dictionary | [![Lectern Client NPM Package](https://img.shields.io/npm/v/@overture-stack/lectern-dictionary?color=%23cb3837&style=for-the-badge&logo=npm)](https://www.npmjs.com/package/@overture-stack/lectern-dictionary) | Dictionary meta-schema definition, includes TS types, and Zod schemas. This also exports all utilities for getting the diff of two dictionaries. | +| [Lectern Validation](packages/validation/README.md) | @overture-stack/lectern-validation | packages/validation/ | [![Lectern Validation NPM Package](https://img.shields.io/npm/v/@overture-stack/lectern-client?color=%23cb3837&style=for-the-badge&logo=npm)](https://www.npmjs.com/package/@overture-stack/lectern-client) | Validate data using Lectern Dictionaries. | ## Developer Instructions diff --git a/apps/server/README.md b/apps/server/README.md index c39a5b8b..9907c4eb 100644 --- a/apps/server/README.md +++ b/apps/server/README.md @@ -1,16 +1,19 @@ # Lectern Server -[Container Registry Badge](https://github.com/overture-stack/lectern/pkgs/container/lectern) +[Container Registry Badge](https://github.com/overture-stack/lectern/pkgs/container/lectern) +[](http://slack.overture.bio) +[](https://github.com/overture-stack/lectern/blob/develop/LICENSE) -Lectern Server is the standalone web service for Lectern. It provides an API to create, manage, and share Data Dictionary schemas. +Lectern Server is a standalone web service for Lectern. It provides an API to create, manage, and share Lectern Dictionary schemas. ## Technology -Lectern is a NodeJS service written in TypeScript. It is published as a container on ghcr.io: [Lectern Container Registry](https://github.com/overture-stack/lectern/pkgs/container/lectern) +Lectern is a NodeJS service written in TypeScript. -To run the application from source, follow the [Development](#development) instructions below. +It is published as a container on ghcr.io: [Lectern Container Registry](https://github.com/overture-stack/lectern/pkgs/container/lectern) ## Development + ### PNPM Monorepo Package Manager This project uses `pnpm` instead of `npm` to facilitate a monorepo workspace. diff --git a/packages/client/README.md b/packages/client/README.md index ba0390be..bb310ef6 100644 --- a/packages/client/README.md +++ b/packages/client/README.md @@ -1,21 +1,27 @@ # Lectern Client -[![NPM Version](https://img.shields.io/npm/v/@overture-stack/lectern-client?color=%23cb3837&style=for-the-badge&logo=npm)](https://www.npmjs.com/package/@overture-stack/lectern-client) +![Typescript](https://img.shields.io/badge/TypeScript-007ACC?style=for-the-badge&logo=typescript&logoColor=white) +[](http://slack.overture.bio) +[](https://github.com/overture-stack/lectern/blob/develop/LICENSE) -The Lectern client provides developers the mechanism to interact with Lectern servers and to use Lectern Dictionaries. The client provides all the validation logic to check that submitted data is valid based on the structure and restrictions of a Lectern dictionary. It also provides a REST client to fetch Lectern Dictionary data from a Lectern server. +The Lectern Client provides developers TypeScript code tools to interact with Lectern servers and [Lectern Dictionaries](https://github.com/overture-stack/lectern). This package provides data processing functions that will parse submitted data and validatet that it adheres to the structure defined by the Dictionary. It also provides a REST client to fetch Lectern Dictionary data from a Lectern server. ## Features -- Interact with lectern servers: +- REST client to interact with Lectern servers: - Fetch dictionary by name and version - Fetch difference summaries between dictionary versions -- Process data using a Lectern Dictionary: +- [Process data](#data-processing) using a Lectern Dictionary: - Convert raw string inputs into properly typed values. - Check the structure of input data is valid. - Apply all restrictions, both across schemas and on individual fields, to validate input data. - Report all validation errors found in the input data. +- Expose [Lectern Validation](https://www.npmjs.com/package/@overture-stack/lectern-validation) library functionality: + - Parsing functions to check and convert data types from string values + - Validation functions to confirm the structure and content of records match Lectern schemas + - This functionality is combined in the Processing functions - -## Data Fetching Example +## Developer Examples +### Data Fetching ```ts import * as lectern from '@overture-stack/lectern-client'; @@ -29,9 +35,7 @@ const dictionary = lectern.restClient.fetchSchema(lecternUrl, dictionaryName, cu const versionUpdates = lectern.restClient.fetchDiff(lecternUrl, dictionaryName, currentVersion, previousVersion); ``` -## Data Processing Usage - -### Process Data for a Single Schema +### Data Processing The following example shows how to process data using the Lectern Client. The input `donorData` is presented as hardcoded, but in a typical scenario this would be submitted to the application through an uploaded TSV, form entry, or similar user submission system. @@ -63,3 +67,11 @@ switch (schemaProcessingResult.status) { // records were parsed successfully, so this returns all parsed records } ``` + +## Lectern Dependencies +The Lectern Client is a wrapper around Lectern submodules that allow functionality to be shared between client and server implementations. If you do not need the REST client, or the combined processing functions, you can consider including submodules directly to access the specific pieces of functionality you require. + +| Package | Description | +| ------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| [lectern-dictionary](https://www.npmjs.com/package/@overture-stack/lectern-dictionary) | Definition of the Lectern Dictionary structure. Includes TS types and schemas for validating the content of Lectern dictionary. Also includes functionality for comapring mulitple Lectern Dictionary versions and computing the difference between them. | +| [lectern-validation](https://www.npmjs.com/package/@overture-stack/lectern-validation) | Provides functionality for validating data against a Lectern dictionary. | \ No newline at end of file diff --git a/packages/dictionary/README.md b/packages/dictionary/README.md index f6a26cde..43e20df6 100644 --- a/packages/dictionary/README.md +++ b/packages/dictionary/README.md @@ -1,6 +1,16 @@ -# Lectern Dictionary Meta-Schema and Utilities +# Lectern Dictionary -[![NPM Version](https://img.shields.io/npm/v/@overture-stack/lectern-dictionary?color=%23cb3837&style=for-the-badge&logo=npm)](https://www.npmjs.com/package/@overture-stack/lectern-dictionary) +![Typescript](https://img.shields.io/badge/TypeScript-007ACC?style=for-the-badge&logo=typescript&logoColor=white) +[](http://slack.overture.bio) +[](https://github.com/overture-stack/lectern/blob/develop/LICENSE) + +> **Note** +> +> This may not be the module you are looking to import. +> +> This is a sub-module used as a dependency by both the [Lectern Client](https://www.npmjs.com/package/@overture-stack/lectern-client) and [Lectern Server](https://github.com/overture-stack/lectern/blob/develop/apps/server/README.md). +> +> If you are building an application that will interact with a Lectern Server over HTTP, or wants to validate data using a Lectern Dictionary, you likely want to import the [Lectern Client](https://www.npmjs.com/package/@overture-stack/lectern-client). This package defines the structure of Lectern Dictionaries, including providing the TypeScript type definitions to use the dictionary in code and the schemas to validate that a given JSON object is a valid Lectern Dictionary. diff --git a/packages/validation/README.md b/packages/validation/README.md index 710b04df..afe99c0b 100644 --- a/packages/validation/README.md +++ b/packages/validation/README.md @@ -1,9 +1,44 @@ # Lectern Validation -[![NPM Version](https://img.shields.io/npm/v/@overture-stack/lectern-validation?color=%23cb3837&style=for-the-badge&logo=npm)](https://www.npmjs.com/package/@overture-stack/lectern-validation) +![Typescript](https://img.shields.io/badge/TypeScript-007ACC?style=for-the-badge&logo=typescript&logoColor=white) +[](http://slack.overture.bio) +[](https://github.com/overture-stack/lectern/blob/develop/LICENSE) -Standalone library to validate data using a Lectern Dictionary. -> **Note**: -> This may not be the module you want to import, it produced to be used as a shared dependency inside the Lectern monorepo. If you are building an application that will interact with a Lectern Server consider using the [Lectern Client](https://www.npmjs.com/package/@overture-stack/lectern-client) which includes this libary as a dependency while also providing REST Client functionality. +> **Note** +> +> This may not be the module you are looking to import. +> +> This is a sub-module used as a dependency by both the [Lectern Client](https://www.npmjs.com/package/@overture-stack/lectern-client) and [Lectern Server](https://github.com/overture-stack/lectern/blob/develop/apps/server/README.md). +> +> If you are building an application that will interact with a Lectern Server over HTTP, or wants to validate data using a Lectern Dictionary, you likely want to import the [Lectern Client](https://www.npmjs.com/package/@overture-stack/lectern-client). + +This package provides tools to parse and validate data based on the schemas in Lectern Dictionaries. + +## Parsing Data +Parsing data involves reading string values for fields defined in a Lectern Schema and converting the that value into properly typed data. For example, if a field has `"dataType": "number"` and the provided value `"123"` this will be converted from the string value into the numeric `123`. A more complicated example would take a comma separated array value and convert each element and return the final array. If any values cannot be properly parsed and converted based on the schema's rules, an error is returned instead. + +There are four separate parsing functions exported, mapping to different collections of data to be processed together: + +- `parseFieldValue`: Parse a string value for an individual field. +- `parseRecordValues`: Parse all fields in an [UnprocessedDataRecord](https://github.com/overture-stack/lectern/blob/develop/docs/important-concepts.md#datarecord-and-unprocesseddatarecord) based on a schema definition. Applies `parseFieldValue` to each field. +- `parseSchemaValues`: Parse all records in a collection belonging to an individual [schema](https://github.com/overture-stack/lectern/blob/develop/docs/important-concepts.md#schema). Applies `parseRecordValues` to each record. +- `parseDictionaryValues`: Parse all records for multiple schemas in a [dictionary](https://github.com/overture-stack/lectern/blob/develop/docs/important-concepts.md#dictionary). Applies `parseSchemaValues` to each array of records provided. + +Each parsing function will return a Result object that indicates if the parsing completed successfully. When parsing completes without any errors the response will include the parsed data with all fields converted to the correct type. + +If the parsing failed, part of the response will be an array of errors indicating which record and which fields had parsing errors. The response will also include the partially parsed data record(s); fields that were succesfully parsed will have been updated to their correct data types, but fields that failed to parse will still contain their original string values. + +## Validating Data + +Validation functions are provided to test parsed DataRecords with all restrictions defined in a Lectern dictionary. These functions will identify all restrictions that must be tested from the provided schema and apply these to the given data, including resolving conditional restrictions. Different restrictions are applied depending on which data collection is provided; validating an individual fields will only test field level restrictions, while validating a whole schema will also validated `unique` and `uniqueKey` constraints. + +There are four main validation functions, provided to validate data at the field, record, schema, and dictionary levels: + +- `validateField`: Validate the field value, testing all field level restrictions such as `regex`, and `codeList` +- `validateRecord`: Validate all fields in a record, testing that all fields belong to that schema, that all required fields are present, and applying `validateField` to all fields in the record. +- `validateSchema`: Validate a collection of records from a single schema, testing all `unique` and `uniqueKey` requirements for that schema and then applying `validateRecord` to all records. +- `validateDictionary`: Validates multiple collections of records each belonging to schemas from a dictionary. This checks that each schema specified is a member of the given dictionary, and tests `foreignKey` restrictions on each schema, in addition to applying `validateSchema` to each colleciton. + +All validation functions return a [`TestResult`](https://github.com/overture-stack/lectern/blob/develop/docs/important-concepts.md#testresult) object that will indicate if the validation passed or failed. If the validation is successful then this result will simply indicate that the data is valid. If there were any errors then the response will include a error information. diff --git a/packages/validation/docs/validation-reports.md b/packages/validation/docs/validation-reports.md new file mode 100644 index 00000000..5cbacccb --- /dev/null +++ b/packages/validation/docs/validation-reports.md @@ -0,0 +1,107 @@ +# Validation Reports + +## Restriction Levels +Restrictions apply to multiple different levels, differentiated by what data is needed to check their rules. + + +#### Dictionary +Considers all records for all schemas of the dictionary. + +- ForeignKey +- schemaNames (look at schemas provided in a data set, flag any that have an invalid name) + +#### Schema +Considers all records for a given schema. + +- unique +- uniqueKey + +#### Record +Considers all fields within a single data record. + +- fieldName +- compare + +Note: It is at this level that conditional restrictions are calculated. + +#### Field +Considers only the value of the given field +- codeList +- range +- required +- empty +- count + +## Handling Singular and Array Fields + +Field level validations have a complication where sometimes the field has an array of values. When this is the case, we need to still report the validation error using the same object, but we will need to provide additional details to indicate which element(s) in the array caused the failure. + +Let's use a `codelist` restriction as an example: `"codeList": ["good", "ok", "fine"]` + +Consider a sample schema: + +```json +{ + "name": "array-vs-single-example", + "description": "Includes one two fields both restricted to a list of values. One field is an array.", + "fields": [ + { + "name": "singule_value_field", + "valueType": "string", + "restrictions": { + "codeList": ["good", "ok", "fine"] + } + }, + { + "name": "array_value_field", + "valueType": "string", + "isArray": true, + "restrictions": { + "codeList": ["good", "ok", "fine"] + } + } + ] +} +``` + +Let's look at what the errors would look like for the following records: + +```json +{ + "single_value_field": "bad", + "array_value_field": ["good", "wrong", "fine"] +} +``` + +When this record is validated we should get back the following report: + +```json +[ + { + "field": "single_value_field", + "value": "bad", + "errors": [{ + "message": "The value for this field must match an option from the list.", + "restriction": { + "type": "codeList", + "rule": ["good", "ok", "fine"], + }, + }, { + "field": "array_value_field", + "value": ["good", "wrong", "fine"], + "errors": [{ + "message": "All values for this field must match an option from the list.", + "restriction": { + "type": "codeList", + "rule": ["good", "ok", "fine"], + }, + "invalidItems": [ + { + "position": 1, + "value": "wrong" + } + ] + }] + } +] +``` \ No newline at end of file From 7123f284e1a0bed5170f42ce54c12c99bddc0702 Mon Sep 17 00:00:00 2001 From: Jon Eubank Date: Thu, 29 Aug 2024 17:54:27 -0400 Subject: [PATCH 4/9] Rename exported function variables to have same grammatic case --- packages/client/README.md | 6 +++--- packages/client/src/index.ts | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/client/README.md b/packages/client/README.md index bb310ef6..4490f3ab 100644 --- a/packages/client/README.md +++ b/packages/client/README.md @@ -31,8 +31,8 @@ const dictionaryName = 'dictionary-name'; const currentVersion = "2.3"; const previousVersion = "2.1"; -const dictionary = lectern.restClient.fetchSchema(lecternUrl, dictionaryName, currentVersion); -const versionUpdates = lectern.restClient.fetchDiff(lecternUrl, dictionaryName, currentVersion, previousVersion); +const dictionary = lectern.rest.fetchSchema(lecternUrl, dictionaryName, currentVersion); +const versionUpdates = lectern.rest.fetchDiff(lecternUrl, dictionaryName, currentVersion, previousVersion); ``` ### Data Processing @@ -49,7 +49,7 @@ const dictionary = await getLecternDictionary(); const donorData = [{submitter_donor_id: "abc123", gender: "Male", age: "28"}, {submitter_donor_id: "def456", gender: "Female", age: "37"}] -const schemaProcessingResult = lectern.functions.processSchema(dictionary, "donors", donorData); +const schemaProcessingResult = lectern.process.processSchema(dictionary, "donors", donorData); switch (schemaProcessingResult.status) { case 'SUCCESS': { diff --git a/packages/client/src/index.ts b/packages/client/src/index.ts index 9cd53360..5f13e527 100644 --- a/packages/client/src/index.ts +++ b/packages/client/src/index.ts @@ -61,13 +61,13 @@ export * as rest from './rest'; * - processDicitonary: will process multiple collecitons of data records, each vs a different * schema definition that is found in a dictionary definition. */ -export const processing = _processing; +export const process = _processing; /** * Validation functions will perform all restriction tests on data objects. The result will indicate * if all tests passed or if there were some failures, and return a list of the failures that occurred. */ -export const validation = { +export const validate = { validateField: _validation.validateField, validateRecord: _validation.validateRecord, validateSchema: _validation.validateSchema, From 06936d1e02a8ef3e362e0824bc025b19de202727 Mon Sep 17 00:00:00 2001 From: Jon Eubank Date: Thu, 29 Aug 2024 19:58:53 -0400 Subject: [PATCH 5/9] Remove redundancy in lectern-server description --- apps/server/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/server/README.md b/apps/server/README.md index 9907c4eb..9b7e3ec7 100644 --- a/apps/server/README.md +++ b/apps/server/README.md @@ -4,11 +4,11 @@ [](http://slack.overture.bio) [](https://github.com/overture-stack/lectern/blob/develop/LICENSE) -Lectern Server is a standalone web service for Lectern. It provides an API to create, manage, and share Lectern Dictionary schemas. +Lectern Server is a standalone web service that provides an REST API to manage and share Data Dictionary schemas. ## Technology -Lectern is a NodeJS service written in TypeScript. +Lectern Server is a NodeJS service written in TypeScript. It is published as a container on ghcr.io: [Lectern Container Registry](https://github.com/overture-stack/lectern/pkgs/container/lectern) From 37a7607dbd4dc144c109305aab2a547d62893df4 Mon Sep 17 00:00:00 2001 From: Jon Eubank Date: Thu, 29 Aug 2024 20:03:03 -0400 Subject: [PATCH 6/9] README typos and clarifications --- packages/client/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/client/README.md b/packages/client/README.md index 4490f3ab..cc6acf42 100644 --- a/packages/client/README.md +++ b/packages/client/README.md @@ -1,10 +1,10 @@ # Lectern Client -![Typescript](https://img.shields.io/badge/TypeScript-007ACC?style=for-the-badge&logo=typescript&logoColor=white) +![TypeScript](https://img.shields.io/badge/TypeScript-007ACC?style=for-the-badge&logo=typescript&logoColor=white) [](http://slack.overture.bio) [](https://github.com/overture-stack/lectern/blob/develop/LICENSE) -The Lectern Client provides developers TypeScript code tools to interact with Lectern servers and [Lectern Dictionaries](https://github.com/overture-stack/lectern). This package provides data processing functions that will parse submitted data and validatet that it adheres to the structure defined by the Dictionary. It also provides a REST client to fetch Lectern Dictionary data from a Lectern server. +The Lectern Client provides developers TypeScript code tools to interact with Lectern servers and [Lectern Dictionaries](https://github.com/overture-stack/lectern). This package provides data processing functions that will parse and validate submitted data, ensuring that it adheres to the structure defined by the Dictionary. It also provides a REST client to fetch Lectern Dictionary data from a Lectern Server. ## Features - REST client to interact with Lectern servers: @@ -73,5 +73,5 @@ The Lectern Client is a wrapper around Lectern submodules that allow functionali | Package | Description | | ------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| [lectern-dictionary](https://www.npmjs.com/package/@overture-stack/lectern-dictionary) | Definition of the Lectern Dictionary structure. Includes TS types and schemas for validating the content of Lectern dictionary. Also includes functionality for comapring mulitple Lectern Dictionary versions and computing the difference between them. | +| [lectern-dictionary](https://www.npmjs.com/package/@overture-stack/lectern-dictionary) | Definition of the Lectern Dictionary structure. Includes TS types and schemas for validating the content of Lectern dictionary. Also includes functionality to comparing multiple Lectern Dictionary versions, and to analyze the differences between them. | | [lectern-validation](https://www.npmjs.com/package/@overture-stack/lectern-validation) | Provides functionality for validating data against a Lectern dictionary. | \ No newline at end of file From d4e6a352a0873462bae6c74c73a385f9da293c06 Mon Sep 17 00:00:00 2001 From: Anders Richardsson <2107110+justincorrigible@users.noreply.github.com> Date: Thu, 29 Aug 2024 20:57:33 -0400 Subject: [PATCH 7/9] correct TypeScript casing typos --- packages/dictionary/README.md | 2 +- packages/validation/README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/dictionary/README.md b/packages/dictionary/README.md index 43e20df6..ec53976d 100644 --- a/packages/dictionary/README.md +++ b/packages/dictionary/README.md @@ -1,6 +1,6 @@ # Lectern Dictionary -![Typescript](https://img.shields.io/badge/TypeScript-007ACC?style=for-the-badge&logo=typescript&logoColor=white) +![TypeScript](https://img.shields.io/badge/TypeScript-007ACC?style=for-the-badge&logo=typescript&logoColor=white) [](http://slack.overture.bio) [](https://github.com/overture-stack/lectern/blob/develop/LICENSE) diff --git a/packages/validation/README.md b/packages/validation/README.md index afe99c0b..71bde928 100644 --- a/packages/validation/README.md +++ b/packages/validation/README.md @@ -1,6 +1,6 @@ # Lectern Validation -![Typescript](https://img.shields.io/badge/TypeScript-007ACC?style=for-the-badge&logo=typescript&logoColor=white) +![TypeScript](https://img.shields.io/badge/TypeScript-007ACC?style=for-the-badge&logo=typescript&logoColor=white) [](http://slack.overture.bio) [](https://github.com/overture-stack/lectern/blob/develop/LICENSE) From 039af0051352a05e9b0496a973be5ff2a40e478b Mon Sep 17 00:00:00 2001 From: Jon Eubank Date: Fri, 30 Aug 2024 15:14:22 -0400 Subject: [PATCH 8/9] Remove unused imports, clean TSDoc comment blocks --- packages/client/src/processing/index.ts | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/packages/client/src/processing/index.ts b/packages/client/src/processing/index.ts index 015f184b..1598fec0 100644 --- a/packages/client/src/processing/index.ts +++ b/packages/client/src/processing/index.ts @@ -17,17 +17,14 @@ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -import * as validation from '@overture-stack/lectern-validation'; import { DataRecord, Dictionary, Schema, UnprocessedDataRecord } from '@overture-stack/lectern-dictionary'; -import { loggerFor } from '../logger'; +import * as validation from '@overture-stack/lectern-validation'; import { SchemaProcessingResult, type DictionaryProcessingResult, type RecordProcessingResult, } from './processingResultTypes'; -const L = loggerFor(__filename); - /** * Process data from multiple schemas for a dictionary. * @@ -36,9 +33,6 @@ const L = loggerFor(__filename); * that belong to that schema type. If there are errors found during conversion, * those errors will be returned and validation will be skipped. The final result will indicate if the * data processing attempt was successful, or failed due to errors during parsing or validation. - * @param data - * @param dictionary - * @returns */ export const processDictionary = ( data: Record, @@ -83,10 +77,6 @@ export const processDictionary = ( * Parse and then validate each record in the list. If there are errors found during conversion, * those errors will be returned and validation will be skipped. The final result will indicate if the * data processing attempt was successful, or failed due to errors during parsing or validation. - * @param dictionary - * @param definition - * @param records - * @returns */ export const processSchema = (records: UnprocessedDataRecord[], schema: Schema): SchemaProcessingResult => { const parseResult = validation.parseSchemaValues(records, schema); From 691ebbcd7f00fe863a20e9a7e2fb5095f542b29e Mon Sep 17 00:00:00 2001 From: Jon Eubank Date: Fri, 30 Aug 2024 15:21:53 -0400 Subject: [PATCH 9/9] Correcting typos and grammatical errors in documentation --- README.md | 6 +++--- apps/server/README.md | 2 +- docs/important-concepts.md | 21 ++++++++++++++++++- packages/client/src/index.ts | 2 +- packages/validation/README.md | 2 +- .../validation/docs/validation-reports.md | 2 +- 6 files changed, 27 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index c5d282c3..a091ee33 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ The modules in the monorepo are organized into two categories: | [Lectern Server](apps/server/README.md) | @overture-stack/lectern-server | apps/server/ | [![Lectern GHCR Packages](https://img.shields.io/badge/GHCR-lectern-brightgreen?style=for-the-badge&logo=github)](https://github.com/overture-stack/lectern/pkgs/container/lectern) | Lectern Server web application. | | [Lectern Client](packages/client/README.md) | @overture-stack/lectern-client | packages/client | [![Lectern Client NPM Package](https://img.shields.io/npm/v/@overture-stack/lectern-client?color=%23cb3837&style=for-the-badge&logo=npm)](https://www.npmjs.com/package/@overture-stack/lectern-client) | TypeScript Client to interact with Lectern Server and Lectern data dictionaries. This library provides a REST client to assist in fetching data from the Lectern server. It also exposes the functionality from the Lectern Validation library to use a Lectern data dictionary to validate data. | | [Lectern Dictionary](packages/dictionary/README.md) | | @overture-stack/lectern-dictionary | [![Lectern Client NPM Package](https://img.shields.io/npm/v/@overture-stack/lectern-dictionary?color=%23cb3837&style=for-the-badge&logo=npm)](https://www.npmjs.com/package/@overture-stack/lectern-dictionary) | Dictionary meta-schema definition, includes TS types, and Zod schemas. This also exports all utilities for getting the diff of two dictionaries. | -| [Lectern Validation](packages/validation/README.md) | @overture-stack/lectern-validation | packages/validation/ | [![Lectern Validation NPM Package](https://img.shields.io/npm/v/@overture-stack/lectern-client?color=%23cb3837&style=for-the-badge&logo=npm)](https://www.npmjs.com/package/@overture-stack/lectern-client) | Validate data using Lectern Dictionaries. | +| [Lectern Validation](packages/validation/README.md) | @overture-stack/lectern-validation | packages/validation/ | [![Lectern Validation NPM Package](https://img.shields.io/npm/v/@overture-stack/lectern-validation?color=%23cb3837&style=for-the-badge&logo=npm)](https://www.npmjs.com/package/@overture-stack/lectern-client) | Validate data using Lectern Dictionaries. | ## Developer Instructions @@ -49,9 +49,9 @@ You can install all dependencies for the entire repo from the root (as defined `pnpm install` -Using `nx` will ensure all local dependencies are built, in the correct sequence, when building, running, or testing any of the applications and packages in the repo. To run a package.json script from any module - after installing dependencies - use a command of the form `pnpm nx