From ba0d60a30373a8292d6aafbdcc8668d86073ac4a Mon Sep 17 00:00:00 2001 From: Varun Gandhi Date: Mon, 1 Sep 2025 19:49:39 +0800 Subject: [PATCH] fix: Gracefully handle large number of paths returned by pip show execSync in NodeJS takes a maxBuffer argument; it will not dynamically allocate the output. When doing `pip show`, we could run into an ENOBUFS error if the buffer size was too small, as a part of path listing. This means there are two options: 1. Use exec (or spawn): This requires CPS-ing the code all the way up to main. 2. Use a bigger buffer. We go with option 2 because it's not worth changing everything to callbacks just for this. All the other I/O uses sync operations anyway. For the bigger buffer case, we put in retry logic because that is 'free'; it doesn't make sense to provide a CLI flag for the user to customize this, because what they'll do is look at the error and increase the value; so we just do it for them. --- .../src/virtualenv/environment.ts | 57 +++++++++++++++++-- 1 file changed, 51 insertions(+), 6 deletions(-) diff --git a/packages/pyright-scip/src/virtualenv/environment.ts b/packages/pyright-scip/src/virtualenv/environment.ts index 802ba9f8d..60931214e 100644 --- a/packages/pyright-scip/src/virtualenv/environment.ts +++ b/packages/pyright-scip/src/virtualenv/environment.ts @@ -1,5 +1,6 @@ import * as fs from 'fs'; import * as child_process from 'child_process'; +import * as os from 'os'; import PythonPackage from './PythonPackage'; import PythonEnvironment from './PythonEnvironment'; import { withStatus } from 'src/status'; @@ -28,16 +29,60 @@ let getPipCommand = () => { return pipCommand; }; +function spawnSyncWithRetry(command: string, args: string[]): child_process.SpawnSyncReturns { + let maxBuffer = 1 * 1024 * 1024; // Start with 1MB (original default) + const maxMemory = os.totalmem() * 0.1; // Don't use more than 10% of total system memory + + while (true) { + const result = child_process.spawnSync(command, args, { + encoding: 'utf8', + maxBuffer: maxBuffer, + }); + + const error = result.error as NodeJS.ErrnoException | null; + if (error && error.code === 'ENOBUFS') { + const nextBuffer = maxBuffer * 10; + if (nextBuffer > maxMemory) { + throw new Error( + `Command output too large: final attempt maxBuffer ${(maxBuffer / 1024 / 1024).toFixed( + 1 + )}MB (total memory: ${(maxMemory / 1024 / 1024).toFixed(1)}MB)` + ); + } + maxBuffer = nextBuffer; + continue; // Retry with larger buffer + } + + return result; + } +} + function pipList(): PipInformation[] { - return JSON.parse(child_process.execSync(`${getPipCommand()} list --format=json`).toString()) as PipInformation[]; + const result = spawnSyncWithRetry(getPipCommand(), ['list', '--format=json']); + + if (result.status !== 0) { + throw new Error(`pip list failed with code ${result.status}: ${result.stderr}`); + } + + return JSON.parse(result.stdout) as PipInformation[]; } +// pipBulkShow returns the results of 'pip show', one for each package. +// +// It doesn't cross-check if the length of the output matches that of the input. function pipBulkShow(names: string[]): string[] { - // TODO: This probably breaks with enough names. Should batch them into 512 or whatever the max for bash would be - return child_process - .execSync(`${getPipCommand()} show -f ${names.join(' ')}`) - .toString() - .split('\n---'); + // FIXME: The performance of this scales with the number of packages that + // are installed in the Python distribution, not just the number of packages + // that are requested. If 10K packages are installed, this can take several + // minutes. However, it's not super obvious if there is a more performant + // way to do this without hand-rolling the functionality ourselves. + const result = spawnSyncWithRetry(getPipCommand(), ['show', '-f', ...names]); + + if (result.status !== 0) { + throw new Error(`pip show failed with code ${result.status}: ${result.stderr}`); + } + + return result.stdout.split('\n---').filter((pkg) => pkg.trim()); } export default function getEnvironment(