Skip to content

Commit 6c8e874

Browse files
committed
feat: Improves file discovery and ignore patterns
Extends the universal ignore patterns to cover more common directories, improving the accuracy and performance of the linter. Adds diagnostic logging when `PICKIER_DIAGNOSTICS=1` is set to help debug file discovery and globbing issues. Implements safeguards to prevent out-of-memory errors by limiting the number of files processed, providing warnings and errors when file counts exceed predefined thresholds.
1 parent 5109545 commit 6c8e874

File tree

1 file changed

+156
-9
lines changed

1 file changed

+156
-9
lines changed

packages/pickier/src/linter.ts

Lines changed: 156 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,42 @@ export async function runLintProgrammatic(
9292
return !absBase.startsWith(process.cwd())
9393
})
9494

95-
const universalIgnores = ['**/node_modules/**', '**/dist/**', '**/build/**', '**/.git/**']
95+
// Universal ignore patterns that should apply everywhere
96+
const universalIgnores = [
97+
'**/node_modules/**',
98+
'**/dist/**',
99+
'**/build/**',
100+
'**/.git/**',
101+
'**/.next/**',
102+
'**/.nuxt/**',
103+
'**/.output/**',
104+
'**/.vercel/**',
105+
'**/.netlify/**',
106+
'**/.cache/**',
107+
'**/.turbo/**',
108+
'**/.vscode/**',
109+
'**/.idea/**',
110+
'**/coverage/**',
111+
'**/.nyc_output/**',
112+
'**/tmp/**',
113+
'**/temp/**',
114+
'**/.tmp/**',
115+
'**/.temp/**',
116+
'**/vendor/**',
117+
'**/target/**', // Rust
118+
'**/zig-cache/**', // Zig
119+
'**/zig-out/**', // Zig
120+
'**/.zig-cache/**', // Zig
121+
'**/__pycache__/**', // Python
122+
'**/.pytest_cache/**', // Python
123+
'**/venv/**', // Python
124+
'**/.venv/**', // Python
125+
'**/out/**',
126+
'**/.DS_Store',
127+
'**/Thumbs.db',
128+
]
96129
const globIgnores = isGlobbingOutsideProject
97-
? cfg.ignores.filter(pattern => universalIgnores.includes(pattern))
130+
? universalIgnores // Use ALL universal ignores when outside project
98131
: cfg.ignores
99132

100133
let entries: string[] = []
@@ -127,7 +160,7 @@ export async function runLintProgrammatic(
127160
for (const it of items) {
128161
const full = join(dir, it)
129162
const st = statSync(full)
130-
if (shouldIgnorePath(full, cfg.ignores))
163+
if (shouldIgnorePath(full, globIgnores))
131164
continue
132165
if (st.isDirectory())
133166
stack.push(full)
@@ -169,7 +202,7 @@ export async function runLintProgrammatic(
169202
cntNodeModules++
170203
continue
171204
}
172-
if (shouldIgnorePath(f, cfg.ignores)) {
205+
if (shouldIgnorePath(f, globIgnores)) {
173206
cntIgnored++
174207
continue
175208
}
@@ -1203,20 +1236,31 @@ export function scanContent(filePath: string, content: string, cfg: PickierConfi
12031236

12041237
export async function runLint(globs: string[], options: LintOptions): Promise<number> {
12051238
trace('runLint:start', { globs, options })
1239+
const enableDiagnostics = process.env.PICKIER_DIAGNOSTICS === '1'
1240+
if (enableDiagnostics)
1241+
logger.info('[pickier:diagnostics] Starting lint process...')
12061242
try {
1243+
if (enableDiagnostics)
1244+
logger.info('[pickier:diagnostics] Loading config...')
12071245
const cfg = await loadConfigFromPath(options.config)
12081246
trace('config:loaded', { reporter: cfg.lint.reporter, ext: cfg.lint.extensions.join(',') })
12091247

12101248
const raw = globs.length ? globs : ['.']
12111249
const patterns = expandPatterns(raw)
12121250
trace('patterns', patterns)
1251+
if (enableDiagnostics)
1252+
logger.info(`[pickier:diagnostics] Patterns to search: ${patterns.join(', ')}`)
12131253
const extCsv = options.ext || cfg.lint.extensions.join(',')
12141254
const extSet = new Set<string>(extCsv.split(',').map((s: string) => {
12151255
const t = s.trim()
12161256
return t.startsWith('.') ? t : `.${t}`
12171257
}))
1258+
if (enableDiagnostics)
1259+
logger.info(`[pickier:diagnostics] File extensions: ${Array.from(extSet).join(', ')}`)
12181260

12191261
const timeoutMs = Number(process.env.PICKIER_TIMEOUT_MS || '8000')
1262+
if (enableDiagnostics)
1263+
logger.info(`[pickier:diagnostics] Glob timeout: ${timeoutMs}ms`)
12201264

12211265
// Filter ignore patterns based on whether we're globbing inside or outside the project
12221266
// Universal ignores (like **/node_modules/**, **/dist/**) always apply
@@ -1228,15 +1272,54 @@ export async function runLint(globs: string[], options: LintOptions): Promise<nu
12281272
})
12291273

12301274
// Universal ignore patterns that should apply everywhere
1231-
const universalIgnores = ['**/node_modules/**', '**/dist/**', '**/build/**', '**/.git/**']
1275+
const universalIgnores = [
1276+
'**/node_modules/**',
1277+
'**/dist/**',
1278+
'**/build/**',
1279+
'**/.git/**',
1280+
'**/.next/**',
1281+
'**/.nuxt/**',
1282+
'**/.output/**',
1283+
'**/.vercel/**',
1284+
'**/.netlify/**',
1285+
'**/.cache/**',
1286+
'**/.turbo/**',
1287+
'**/.vscode/**',
1288+
'**/.idea/**',
1289+
'**/coverage/**',
1290+
'**/.nyc_output/**',
1291+
'**/tmp/**',
1292+
'**/temp/**',
1293+
'**/.tmp/**',
1294+
'**/.temp/**',
1295+
'**/vendor/**',
1296+
'**/target/**', // Rust
1297+
'**/zig-cache/**', // Zig
1298+
'**/zig-out/**', // Zig
1299+
'**/.zig-cache/**', // Zig
1300+
'**/__pycache__/**', // Python
1301+
'**/.pytest_cache/**', // Python
1302+
'**/venv/**', // Python
1303+
'**/.venv/**', // Python
1304+
'**/out/**',
1305+
'**/.DS_Store',
1306+
'**/Thumbs.db',
1307+
]
12321308
const globIgnores = isGlobbingOutsideProject
1233-
? cfg.ignores.filter(pattern => universalIgnores.includes(pattern))
1309+
? universalIgnores // Use ALL universal ignores when outside project
12341310
: cfg.ignores
1311+
if (enableDiagnostics) {
1312+
logger.info(`[pickier:diagnostics] Globbing outside project: ${isGlobbingOutsideProject}, ignore patterns: ${globIgnores.length}`)
1313+
if (isGlobbingOutsideProject)
1314+
logger.info(`[pickier:diagnostics] Using universal ignores: ${universalIgnores.slice(0, 5).join(', ')}... (${universalIgnores.length} total)`)
1315+
}
12351316

12361317
// Fallbacks to avoid globby hangs: handle explicit file paths and simple directory scans
12371318
let entries: string[] = []
12381319
// Fast path: if a single concrete file (no glob magic) is provided, just use it directly
12391320
const nonGlobSingle = patterns.length === 1 && !/[*?[\]{}()!]/.test(patterns[0])
1321+
if (enableDiagnostics)
1322+
logger.info(`[pickier:diagnostics] Starting file discovery... (nonGlobSingle: ${nonGlobSingle})`)
12401323
if (nonGlobSingle) {
12411324
try {
12421325
const { statSync } = await import('node:fs')
@@ -1254,27 +1337,37 @@ export async function runLint(globs: string[], options: LintOptions): Promise<nu
12541337
const simpleDirPattern = patterns.length === 1 && /\*\*\/*\*$/.test(patterns[0])
12551338
if (!entries.length && simpleDirPattern) {
12561339
const base = patterns[0].replace(/\/?\*\*\/*\*\*?$/, '')
1340+
if (enableDiagnostics)
1341+
logger.info(`[pickier:diagnostics] Using fast directory scan for: ${base}`)
12571342
try {
12581343
const { readdirSync, statSync } = await import('node:fs')
12591344
const { join } = await import('node:path')
12601345
const rootBase = isAbsolute(base) ? base : resolve(process.cwd(), base)
12611346
const stack: string[] = [rootBase]
1347+
let dirCount = 0
12621348
while (stack.length) {
12631349
const dir = stack.pop()!
1350+
dirCount++
1351+
if (enableDiagnostics && dirCount % 100 === 0)
1352+
logger.info(`[pickier:diagnostics] Scanned ${dirCount} directories, ${entries.length} files found so far...`)
12641353
const items = readdirSync(dir)
12651354
for (const it of items) {
12661355
const full = join(dir, it)
12671356
const st = statSync(full)
1268-
if (shouldIgnorePath(full, cfg.ignores))
1357+
if (shouldIgnorePath(full, globIgnores))
12691358
continue
12701359
if (st.isDirectory())
12711360
stack.push(full)
12721361
else
12731362
entries.push(full)
12741363
}
12751364
}
1365+
if (enableDiagnostics)
1366+
logger.info(`[pickier:diagnostics] Fast scan complete: ${dirCount} directories, ${entries.length} total files`)
12761367
}
1277-
catch {
1368+
catch (e) {
1369+
if (enableDiagnostics)
1370+
logger.info(`[pickier:diagnostics] Fast scan failed: ${(e as any)?.message}, falling back to tinyglobby`)
12781371
// If fallback fails, use tinyglobby with timeout
12791372
entries = await withTimeout(tinyGlob(patterns, {
12801373
dot: false,
@@ -1285,16 +1378,37 @@ export async function runLint(globs: string[], options: LintOptions): Promise<nu
12851378
}
12861379
}
12871380
else if (!entries.length) {
1381+
if (enableDiagnostics)
1382+
logger.info(`[pickier:diagnostics] Using tinyglobby with timeout ${timeoutMs}ms...`)
12881383
entries = await withTimeout(tinyGlob(patterns, {
12891384
dot: false,
12901385
ignore: globIgnores,
12911386
onlyFiles: true,
12921387
absolute: true,
12931388
}), timeoutMs, 'tinyGlob')
1389+
if (enableDiagnostics)
1390+
logger.info(`[pickier:diagnostics] tinyglobby found ${entries.length} files`)
12941391
}
12951392

12961393
trace('globbed entries', entries.length)
1394+
if (enableDiagnostics)
1395+
logger.info(`[pickier:diagnostics] File discovery complete: ${entries.length} files found`)
1396+
1397+
// Safety check: warn if file count is suspiciously high
1398+
if (entries.length > 10000) {
1399+
logger.warn(`[pickier:warn] Found ${entries.length} files. This seems unusually high and may cause memory issues.`)
1400+
logger.warn(`[pickier:warn] Consider checking your ignore patterns or being more specific with your glob pattern.`)
1401+
logger.warn(`[pickier:warn] Common culprits: node_modules, build directories, cache folders, or vendor dependencies.`)
1402+
if (entries.length > 100000) {
1403+
logger.error(`[pickier:error] File count exceeds 100,000 (${entries.length}). This will likely cause out-of-memory errors.`)
1404+
logger.error(`[pickier:error] Aborting to prevent crash. Please refine your glob pattern or ignore patterns.`)
1405+
return 1
1406+
}
1407+
}
1408+
12971409
// filter with trace counters
1410+
if (enableDiagnostics)
1411+
logger.info(`[pickier:diagnostics] Filtering ${entries.length} files by extension and ignore patterns...`)
12981412
let cntTotal = 0
12991413
let cntIncluded = 0
13001414
let cntNodeModules = 0
@@ -1303,12 +1417,14 @@ export async function runLint(globs: string[], options: LintOptions): Promise<nu
13031417
const files: string[] = []
13041418
for (const f of entries) {
13051419
cntTotal++
1420+
if (enableDiagnostics && cntTotal % 1000 === 0)
1421+
logger.info(`[pickier:diagnostics] Filtering progress: ${cntTotal}/${entries.length} files checked, ${cntIncluded} included...`)
13061422
const p = f.replace(/\\/g, '/')
13071423
if (p.includes('/node_modules/')) {
13081424
cntNodeModules++
13091425
continue
13101426
}
1311-
if (shouldIgnorePath(f, cfg.ignores)) {
1427+
if (shouldIgnorePath(f, globIgnores)) {
13121428
cntIgnored++
13131429
continue
13141430
}
@@ -1321,12 +1437,39 @@ export async function runLint(globs: string[], options: LintOptions): Promise<nu
13211437
}
13221438
trace('filter:cli', { total: cntTotal, included: cntIncluded, node_modules: cntNodeModules, ignored: cntIgnored, wrongExt: cntWrongExt })
13231439
trace('filtered files', files.length)
1440+
if (enableDiagnostics) {
1441+
logger.info(`[pickier:diagnostics] Filtering complete:`)
1442+
logger.info(`[pickier:diagnostics] Total files found: ${cntTotal}`)
1443+
logger.info(`[pickier:diagnostics] Files to lint: ${cntIncluded}`)
1444+
logger.info(`[pickier:diagnostics] Excluded (node_modules): ${cntNodeModules}`)
1445+
logger.info(`[pickier:diagnostics] Excluded (ignored): ${cntIgnored}`)
1446+
logger.info(`[pickier:diagnostics] Excluded (wrong extension): ${cntWrongExt}`)
1447+
}
1448+
1449+
// Safety check after filtering
1450+
if (files.length > 5000) {
1451+
logger.warn(`[pickier:warn] After filtering, ${files.length} files will be linted. This may take a while and use significant memory.`)
1452+
if (files.length > 50000) {
1453+
logger.error(`[pickier:error] ${files.length} files to lint exceeds safe limit (50,000). This will likely cause out-of-memory errors.`)
1454+
logger.error(`[pickier:error] Aborting to prevent crash. Please be more specific with your glob pattern.`)
1455+
logger.error(`[pickier:error] Example: Instead of '../stx', try '../stx/src' or '../stx/packages/core'`)
1456+
return 1
1457+
}
1458+
}
13241459

13251460
// OPTIMIZATION: Parallel file processing with concurrency limit
13261461
const concurrency = Number(process.env.PICKIER_CONCURRENCY) || 8
13271462
const limit = pLimit(concurrency)
1463+
if (enableDiagnostics)
1464+
logger.info(`[pickier:diagnostics] Starting to process ${files.length} files with concurrency ${concurrency}...`)
13281465

1466+
let processedCount = 0
13291467
const processFile = async (file: string): Promise<LintIssue[]> => {
1468+
if (enableDiagnostics) {
1469+
processedCount++
1470+
if (processedCount === 1 || processedCount % 10 === 0 || processedCount === files.length)
1471+
logger.info(`[pickier:diagnostics] Processing file ${processedCount}/${files.length}: ${relative(process.cwd(), file)}`)
1472+
}
13301473
trace('scan start', relative(process.cwd(), file))
13311474
const src = readFileSync(file, 'utf8')
13321475

@@ -1403,10 +1546,14 @@ export async function runLint(globs: string[], options: LintOptions): Promise<nu
14031546

14041547
const issueArrays = await Promise.all(files.map(file => limit(() => processFile(file))))
14051548
const allIssues = issueArrays.flat()
1549+
if (enableDiagnostics)
1550+
logger.info(`[pickier:diagnostics] Processing complete! Found ${allIssues.length} issues total`)
14061551

14071552
const errors = allIssues.filter(i => i.severity === 'error').length
14081553
const warnings = allIssues.filter(i => i.severity === 'warning').length
14091554
trace('issues:summary', { errors, warnings })
1555+
if (enableDiagnostics)
1556+
logger.info(`[pickier:diagnostics] Errors: ${errors}, Warnings: ${warnings}`)
14101557

14111558
const reporter = options.reporter || cfg.lint.reporter
14121559
// Determine verbose mode with proper precedence: CLI option > config > default

0 commit comments

Comments
 (0)