Batch reading content files to prevent too many open files error (#…

…12079) * Refactor * Refactor * Batch content file reads in Node into groups of 500 We shouldn’t need to do this for our Rust code because it utilizes Rayon’s default thread pool for parallelism. This threadpool has roughly the number of cores as the number of available threads except when overridden. This generally is much, much lower than 500 and can be explicitly overridden via an env var to work around potential issues with open file descriptors if anyone ever runs into that. * Fix sequential/parallel flip * Update changelog
tailwindlabs · Sep 25, 2023 · aaca7c4 · aaca7c4
1 parent 64c7d0e
commit aaca7c4
Show file tree

Hide file tree

Showing 3 changed files with 39 additions and 34 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Fix incorrectly generated CSS when using square brackets inside arbitrary properties ([#11709](https://github.com/tailwindlabs/tailwindcss/pull/11709))
 - Make `content` optional for presets in TypeScript types ([#11730](https://github.com/tailwindlabs/tailwindcss/pull/11730))
 - Handle variable colors that have variable fallback values ([#12049](https://github.com/tailwindlabs/tailwindcss/pull/12049))
+- Batch reading content files to prevent `too many open files` error ([#12079](https://github.com/tailwindlabs/tailwindcss/pull/12079))
 
 ### Added
 

diff --git a/oxide/crates/core/src/lib.rs b/oxide/crates/core/src/lib.rs
@@ -342,8 +342,8 @@ pub fn parse_candidate_strings(input: Vec<ChangedContent>, options: u8) -> Vec<S
 
     match (IO::from(options), Parsing::from(options)) {
         (IO::Sequential, Parsing::Sequential) => parse_all_blobs_sync(read_all_files_sync(input)),
-        (IO::Sequential, Parsing::Parallel) => parse_all_blobs_sync(read_all_files(input)),
-        (IO::Parallel, Parsing::Sequential) => parse_all_blobs(read_all_files_sync(input)),
+        (IO::Sequential, Parsing::Parallel) => parse_all_blobs(read_all_files_sync(input)),
+        (IO::Parallel, Parsing::Sequential) => parse_all_blobs_sync(read_all_files(input)),
         (IO::Parallel, Parsing::Parallel) => parse_all_blobs(read_all_files(input)),
     }
 }

diff --git a/src/lib/expandTailwindAtRules.js b/src/lib/expandTailwindAtRules.js
@@ -135,43 +135,47 @@ export default function expandTailwindAtRules(context) {
 
     env.DEBUG && console.time('Reading changed files')
 
-    if (flagEnabled(context.tailwindConfig, 'oxideParser')) {
-      let rustParserContent = []
-      let regexParserContent = []
-
-      for (let item of context.changedContent) {
-        let transformer = getTransformer(context.tailwindConfig, item.extension)
-        let extractor = getExtractor(context, item.extension)
-
-        if (transformer === builtInTransformers.DEFAULT && extractor?.DEFAULT_EXTRACTOR === true) {
-          rustParserContent.push(item)
-        } else {
-          regexParserContent.push([item, { transformer, extractor }])
-        }
+    /** @type {[item: {file?: string, content?: string}, meta: {transformer: any, extractor: any}][]} */
+    let regexParserContent = []
+
+    /** @type {{file?: string, content?: string}[]} */
+    let rustParserContent = []
+
+    for (let item of context.changedContent) {
+      let transformer = getTransformer(context.tailwindConfig, item.extension)
+      let extractor = getExtractor(context, item.extension)
+
+      if (
+        flagEnabled(context.tailwindConfig, 'oxideParser') &&
+        transformer === builtInTransformers.DEFAULT &&
+        extractor?.DEFAULT_EXTRACTOR === true
+      ) {
+        rustParserContent.push(item)
+      } else {
+        regexParserContent.push([item, { transformer, extractor }])
       }
+    }
 
-      if (rustParserContent.length > 0) {
-        for (let candidate of parseCandidateStrings(
-          rustParserContent,
-          IO.Parallel | Parsing.Parallel
-        )) {
-          candidates.add(candidate)
-        }
+    // Read files using our newer, faster parser when:
+    // - Oxide is enabled; AND
+    // - The file is using default transfomers and extractors
+    if (rustParserContent.length > 0) {
+      for (let candidate of parseCandidateStrings(
+        rustParserContent,
+        IO.Parallel | Parsing.Parallel
+      )) {
+        candidates.add(candidate)
       }
+    }
+
+    // Otherwise, read any files in node and parse with regexes
+    const BATCH_SIZE = 500
+
+    for (let i = 0; i < regexParserContent.length; i += BATCH_SIZE) {
+      let batch = regexParserContent.slice(i, i + BATCH_SIZE)
 
-      if (regexParserContent.length > 0) {
-        await Promise.all(
-          regexParserContent.map(async ([{ file, content }, { transformer, extractor }]) => {
-            content = file ? await fs.promises.readFile(file, 'utf8') : content
-            getClassCandidates(transformer(content), extractor, candidates, seen)
-          })
-        )
-      }
-    } else {
       await Promise.all(
-        context.changedContent.map(async ({ file, content, extension }) => {
-          let transformer = getTransformer(context.tailwindConfig, extension)
-          let extractor = getExtractor(context, extension)
+        batch.map(async ([{ file, content }, { transformer, extractor }]) => {
           content = file ? await fs.promises.readFile(file, 'utf8') : content
           getClassCandidates(transformer(content), extractor, candidates, seen)
         })