From 83d55eac520a57309da5f76c6c84b73981ab763e Mon Sep 17 00:00:00 2001 From: Matt Condon Date: Thu, 2 Oct 2025 19:18:56 +0100 Subject: [PATCH 1/3] fix(ipa): fix import issue with parquet-wasm package --- .../metrics/scripts/parquet-wasm-wrapper.cjs | 35 +++++++++++ .../metrics/scripts/runMetricCollection.js | 62 ++++++++++++++----- 2 files changed, 81 insertions(+), 16 deletions(-) create mode 100644 tools/spectral/ipa/metrics/scripts/parquet-wasm-wrapper.cjs diff --git a/tools/spectral/ipa/metrics/scripts/parquet-wasm-wrapper.cjs b/tools/spectral/ipa/metrics/scripts/parquet-wasm-wrapper.cjs new file mode 100644 index 0000000000..16781a1b26 --- /dev/null +++ b/tools/spectral/ipa/metrics/scripts/parquet-wasm-wrapper.cjs @@ -0,0 +1,35 @@ +/** + * CommonJS wrapper for parquet-wasm + * + * This wrapper allows us to use parquet-wasm 0.7.0 from ES modules in Node.js. + * + * Background: parquet-wasm 0.7.0 has package.json with "type": "module" which makes + * Node.js treat .js files as ES modules. However, the node build uses CommonJS syntax. + * + * Solution: Since this file has a .cjs extension, Node.js always treats it as CommonJS, + * allowing us to use require(). We then use createRequire() in our ES module to load this wrapper. + * + * See: https://github.com/kylebarron/parquet-wasm/issues/798 + */ + +const path = require('path'); +const fs = require('fs'); + +// Read and eval the parquet-wasm node build +// We use eval because require() won't work due to the "type": "module" in package.json +const parquetWasmPath = path.resolve( + __dirname, + '../../../../../node_modules/parquet-wasm/node/parquet_wasm.js', +); + +const code = fs.readFileSync(parquetWasmPath, 'utf8'); +const moduleExports = {}; +const moduleObj = { exports: moduleExports }; + +// Execute the code in a function scope with CommonJS globals +const fn = new Function('exports', 'require', 'module', '__filename', '__dirname', code); +fn(moduleExports, require, moduleObj, parquetWasmPath, path.dirname(parquetWasmPath)); + +// Re-export everything +module.exports = moduleObj.exports; + diff --git a/tools/spectral/ipa/metrics/scripts/runMetricCollection.js b/tools/spectral/ipa/metrics/scripts/runMetricCollection.js index ec6a92ebc8..af02e8c9ed 100644 --- a/tools/spectral/ipa/metrics/scripts/runMetricCollection.js +++ b/tools/spectral/ipa/metrics/scripts/runMetricCollection.js @@ -2,12 +2,19 @@ import fs from 'node:fs'; import path from 'path'; import { spawnSync } from 'child_process'; import spectral from '@stoplight/spectral-core'; -import { Table, writeParquet, WriterPropertiesBuilder } from 'parquet-wasm/esm/parquet_wasm.js'; import { tableFromJSON, tableToIPC } from 'apache-arrow'; import config from '../config.js'; import { runMetricCollectionJob } from '../metricCollection.js'; +import { createRequire } from 'module'; const { Spectral } = spectral; + +// Use createRequire to load our CommonJS wrapper for parquet-wasm +// This allows us to use parquet-wasm 0.7.0 from Node.js ESM +// See parquet-wasm-wrapper.cjs for details +const require = createRequire(import.meta.url); +const { Compression, Table, writeParquet, WriterPropertiesBuilder } = require('./parquet-wasm-wrapper.cjs'); + const args = process.argv.slice(2); const oasFilePath = args[0]; @@ -53,19 +60,42 @@ runMetricCollectionJob( new Spectral() ) .then((results) => { - console.log('Writing results'); - const table = tableFromJSON(results.metrics); - const wasmTable = Table.fromIPCStream(tableToIPC(table, 'stream')); - const parquetUint8Array = writeParquet( - wasmTable, - new WriterPropertiesBuilder().setCompression(2).build() // 2 = GZIP compression - ); - fs.writeFileSync(config.defaultMetricCollectionResultsFilePath, parquetUint8Array); - fs.writeFileSync(path.join(config.defaultOutputsDir, 'warning-count.txt'), results.warnings.count.toString()); - - fs.writeFileSync( - path.join(config.defaultOutputsDir, 'warning-violations.json'), - JSON.stringify(results.warnings.violations, null, 2) - ); + console.log('Writing results to parquet file...'); + + try { + console.log('Converting metrics to Arrow table...'); + const table = tableFromJSON(results.metrics); + + console.log('Converting Arrow table to WASM table...'); + const wasmTable = Table.fromIPCStream(tableToIPC(table, 'stream')); + + console.log('Writing parquet file with GZIP compression...'); + const parquetUint8Array = writeParquet( + wasmTable, + new WriterPropertiesBuilder().setCompression(Compression.GZIP).build() + ); + + console.log(`Saving parquet file to: ${config.defaultMetricCollectionResultsFilePath}`); + fs.writeFileSync(config.defaultMetricCollectionResultsFilePath, parquetUint8Array); + + console.log('Writing warning count...'); + fs.writeFileSync(path.join(config.defaultOutputsDir, 'warning-count.txt'), results.warnings.count.toString()); + + console.log('Writing warning violations...'); + fs.writeFileSync( + path.join(config.defaultOutputsDir, 'warning-violations.json'), + JSON.stringify(results.warnings.violations, null, 2) + ); + + console.log('Metric collection completed successfully!'); + } catch (error) { + console.error('Error writing results:', error.message); + console.error('Stack trace:', error.stack); + process.exit(1); + } }) - .catch((error) => console.error(error.message)); + .catch((error) => { + console.error('Error during metric collection:', error.message); + console.error('Stack trace:', error.stack); + process.exit(1); + }); From 6ec1e90c7968d6ebfe4f5d7a3ae56d4cc48e542b Mon Sep 17 00:00:00 2001 From: Matt Condon Date: Thu, 2 Oct 2025 19:22:16 +0100 Subject: [PATCH 2/3] chore: update readme with local run info --- tools/spectral/ipa/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/spectral/ipa/README.md b/tools/spectral/ipa/README.md index a06edb3f2e..a048c3ecb1 100644 --- a/tools/spectral/ipa/README.md +++ b/tools/spectral/ipa/README.md @@ -24,13 +24,13 @@ The IPA validation uses [Spectral](https://docs.stoplight.io/docs/spectral/9ffa0 ### Run Validation -To run the IPA validation locally, install necessary dependencies with `npm install` if you haven't already. Then, simply run: +To run the IPA validation locally, install necessary dependencies with `npm install` if you haven't already. Then (from `/openapi/tools/spectral/ipa/metrics/scripts/`), simply run: ``` npm run ipa-validation ``` -This command will run Spectral CLI for the ruleset [ipa-spectral.yaml](https://github.com/mongodb/openapi/blob/main/tools/spectral/ipa/ipa-spectral.yaml) on the raw [v2.yaml](https://github.com/mongodb/openapi/blob/main/openapi/.raw/v2.yaml) OpenAPI spec. +This command will run Spectral CLI for the ruleset [ipa-spectral.yaml](https://github.com/mongodb/openapi/blob/main/tools/spectral/ipa/ipa-spectral.yaml) on the raw [v2.yaml](https://github.com/mongodb/openapi/blob/main/openapi/.raw/v2.yaml) OpenAPI spec and generate . ## Integrating IPA Validations From 9ab18bb2f9b8e89823d664eceb7f1a88d889cde2 Mon Sep 17 00:00:00 2001 From: Matt Condon Date: Thu, 2 Oct 2025 19:25:02 +0100 Subject: [PATCH 3/3] chore: prettier --- tools/spectral/ipa/metrics/scripts/parquet-wasm-wrapper.cjs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tools/spectral/ipa/metrics/scripts/parquet-wasm-wrapper.cjs b/tools/spectral/ipa/metrics/scripts/parquet-wasm-wrapper.cjs index 16781a1b26..2d21f33cb2 100644 --- a/tools/spectral/ipa/metrics/scripts/parquet-wasm-wrapper.cjs +++ b/tools/spectral/ipa/metrics/scripts/parquet-wasm-wrapper.cjs @@ -17,10 +17,7 @@ const fs = require('fs'); // Read and eval the parquet-wasm node build // We use eval because require() won't work due to the "type": "module" in package.json -const parquetWasmPath = path.resolve( - __dirname, - '../../../../../node_modules/parquet-wasm/node/parquet_wasm.js', -); +const parquetWasmPath = path.resolve(__dirname, '../../../../../node_modules/parquet-wasm/node/parquet_wasm.js'); const code = fs.readFileSync(parquetWasmPath, 'utf8'); const moduleExports = {}; @@ -32,4 +29,3 @@ fn(moduleExports, require, moduleObj, parquetWasmPath, path.dirname(parquetWasmP // Re-export everything module.exports = moduleObj.exports; -