Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ Project is primarily an addition to Pyright. At this time, there are no substant

## Pre-requisites

scip-python only supports Python 3.10+.

```
$ # Install scip-python
$ npm install -g @sourcegraph/scip-python
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@
- `npm run check-snapshots` - Check snapshot tests
- `npm run update-snapshots` - Update snapshot tests

After making changes to the codebase, run tests with:
1. `npm run build-agent` - Build the development version
2. `npm run check-snapshots` - Run all tests including unit tests

### Building

- `npm run webpack` - Development build
Expand Down
7 changes: 7 additions & 0 deletions packages/pyright-scip/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# scip-python CHANGELOG

## v0.6.6

- Changes package listing to use Python's `importlib` instead of
`pip show` by default. On versions of pip 24.0 and older, `pip show`
is much slower. This also avoids parsing of unstructured data
returned by pip in favor of JSON.

## v0.6.5

- Fixes a crash when `pip show` returns more than 1MB of data. (#151)
Expand Down
4 changes: 2 additions & 2 deletions packages/pyright-scip/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion packages/pyright-scip/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@sourcegraph/scip-python",
"version": "0.6.5",
"version": "0.6.6",
"description": "SCIP indexer for Python",
"main": "index.js",
"scripts": {
Expand Down
163 changes: 151 additions & 12 deletions packages/pyright-scip/src/virtualenv/environment.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import * as fs from 'fs';
import * as child_process from 'child_process';
import * as os from 'os';
import * as path from 'path';
import PythonPackage from './PythonPackage';
import PythonEnvironment from './PythonEnvironment';
import { withStatus } from 'src/status';
Expand All @@ -14,6 +15,11 @@ interface PipInformation {
version: string;
}

type PipBulkShowResult =
| { success: true; data: string[] }
| { success: false; error: 'timeout'; message: string }
| { success: false; error: 'other'; message: string; code?: number };

let pipCommand: string | undefined;
let getPipCommand = () => {
if (pipCommand === undefined) {
Expand All @@ -22,21 +28,37 @@ let getPipCommand = () => {
} else if (commandExistsSync('pip')) {
pipCommand = 'pip';
} else {
throw new Error('Could not find valid pip command');
throw new Error(`Could not find valid pip command. Searched PATH: ${process.env.PATH}`);
}
}

return pipCommand;
};

function spawnSyncWithRetry(command: string, args: string[]): child_process.SpawnSyncReturns<string> {
let pythonCommand: string | undefined;
let getPythonCommand = () => {
if (pythonCommand === undefined) {
if (commandExistsSync('python3')) {
pythonCommand = 'python3';
} else if (commandExistsSync('python')) {
pythonCommand = 'python';
} else {
throw new Error(`Could not find valid python command. Searched PATH: ${process.env.PATH}`);
}
}

return pythonCommand;
};

function spawnSyncWithRetry(command: string, args: string[], timeout?: number): child_process.SpawnSyncReturns<string> {
let maxBuffer = 1 * 1024 * 1024; // Start with 1MB (original default)
const maxMemory = os.totalmem() * 0.1; // Don't use more than 10% of total system memory

while (true) {
const result = child_process.spawnSync(command, args, {
encoding: 'utf8',
maxBuffer: maxBuffer,
timeout: timeout, // Will be undefined if not provided, which is fine
});

const error = result.error as NodeJS.ErrnoException | null;
Expand All @@ -57,6 +79,67 @@ function spawnSyncWithRetry(command: string, args: string[]): child_process.Spaw
}
}

// Utility function for temporary directory cleanup
function cleanupTempDirectory(tempDir: string): void {
try {
fs.rmSync(tempDir, { recursive: true, force: true });
} catch (error) {
console.warn(`Warning: Failed to cleanup temp directory ${tempDir}: ${error}`);
}
}

// Helper function to validate and warn about missing packages
function validatePackageResults(results: PythonPackage[], requestedNames: string[]): PythonPackage[] {
if (results.length !== requestedNames.length) {
const foundNames = new Set(results.map((pkg) => pkg.name));
const missingNames = requestedNames.filter((name) => !foundNames.has(name));
console.warn(`Warning: Could not find package information for: ${missingNames.join(', ')}`);
}
return results;
}

function generatePackageInfoScript(): string {
return `#!/usr/bin/env python3
import sys
import json
import importlib.metadata

def get_package_info(package_names):
results = []
package_set = set(package_names) # Use set for faster lookup

for dist in importlib.metadata.distributions():
if dist.name in package_set:
files = []

# Get files for this package
if dist.files:
for file_path in dist.files:
file_str = str(file_path)

# Skip cached or out-of-project files
if file_str.startswith('..') or '__pycache__' in file_str:
continue

# Only include .py and .pyi files
if file_str.endswith(('.py', '.pyi')):
files.append(file_str)

results.append({
'name': dist.name,
'version': dist.version,
'files': files
})

return results

if __name__ == '__main__':
package_names = set(sys.argv[1:])
package_info = get_package_info(package_names)
json.dump(package_info, sys.stdout)
`;
}

function pipList(): PipInformation[] {
const result = spawnSyncWithRetry(getPipCommand(), ['list', '--format=json']);

Expand All @@ -70,19 +153,75 @@ function pipList(): PipInformation[] {
// pipBulkShow returns the results of 'pip show', one for each package.
//
// It doesn't cross-check if the length of the output matches that of the input.
function pipBulkShow(names: string[]): string[] {
function pipBulkShow(names: string[]): PipBulkShowResult {
// FIXME: The performance of this scales with the number of packages that
// are installed in the Python distribution, not just the number of packages
// that are requested. If 10K packages are installed, this can take several
// minutes. However, it's not super obvious if there is a more performant
// way to do this without hand-rolling the functionality ourselves.
const result = spawnSyncWithRetry(getPipCommand(), ['show', '-f', ...names]);
const result = spawnSyncWithRetry(getPipCommand(), ['show', '-f', ...names], 60000); // 1 minute timeout

if (result.status !== 0) {
throw new Error(`pip show failed with code ${result.status}: ${result.stderr}`);
const error = result.error as NodeJS.ErrnoException | null;
if (result.signal === 'SIGTERM' || (error && error.code === 'ETIMEDOUT')) {
return {
success: false,
error: 'timeout',
message: 'pip show timed out after 1 minute.',
};
}
return {
success: false,
error: 'other',
message: `pip show failed: ${result.stderr}`,
code: result.status ?? undefined,
};
}

return result.stdout.split('\n---').filter((pkg) => pkg.trim());
return {
success: true,
data: result.stdout.split('\n---').filter((pkg) => pkg.trim()),
};
}

// Get package information by running a short Python script.
// If we fail to run that, attempt to use `pip show`.
function gatherPackageData(packageNames: string[]): PythonPackage[] {
// First try the new importlib.metadata approach
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'scip-python-'));
try {
const scriptPath = path.join(tempDir, 'get_packages.py');
const scriptContent = generatePackageInfoScript();

fs.writeFileSync(scriptPath, scriptContent, { mode: 0o755 });

const result = spawnSyncWithRetry(getPythonCommand(), [scriptPath, ...packageNames]);

if (result.status === 0) {
const packageData = JSON.parse(result.stdout);
const packages = packageData.map((pkg: any) => new PythonPackage(pkg.name, pkg.version, pkg.files));
return validatePackageResults(packages, packageNames);
} else {
console.warn(`Python script failed with code ${result.status}: ${result.stderr}`);
console.warn('Falling back to pip show approach');
}
} catch (error) {
console.warn(`Failed to use importlib.metadata approach: ${error}`);
console.warn('Falling back to pip show approach');
} finally {
cleanupTempDirectory(tempDir);
}

// Fallback to original pip show approach
const bulkResult = pipBulkShow(packageNames);
if (!bulkResult.success) {
console.warn(`Warning: Package discovery failed - ${bulkResult.message}`);
console.warn('Navigation to external packages may not work correctly.');
return [];
}

const pipResults = bulkResult.data.map((shown) => PythonPackage.fromPipShow(shown));
return validatePackageResults(pipResults, packageNames);
}

export default function getEnvironment(
Expand All @@ -101,13 +240,13 @@ export default function getEnvironment(
return withStatus('Evaluating python environment dependencies', (progress) => {
const listed = pipList();

progress.message('Gathering environment information from `pip`');
const bulk = pipBulkShow(listed.map((item) => item.name));
progress.message('Gathering environment information');
const packageNames = listed.map((item) => item.name);
const info = gatherPackageData(packageNames);

progress.message('Analyzing dependencies');
const info = bulk.map((shown) => {
return PythonPackage.fromPipShow(shown);
});
return new PythonEnvironment(projectFiles, projectVersion, info);
});
}

// Export for testing purposes
export { gatherPackageData };
Loading