Skip to content

Commit

Permalink
[oxide] Expose experimental Rust parser setup (#11116)
Browse files Browse the repository at this point in the history
* make `sequential` and `parallel` version of a new (tmp) `parse_candidate_strings`

* use bitmasks for the strategy

Only sending a number over the wire instead of a serialized objects.

* use cleaner match syntax
  • Loading branch information
RobinMalfait committed Apr 27, 2023
1 parent ea4e1cd commit e4a37ce
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 16 deletions.
105 changes: 97 additions & 8 deletions oxide/crates/core/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use crate::parser::Extractor;
use fxhash::FxHashSet;
use rayon::prelude::*;
use std::path::PathBuf;
use tracing::event;
Expand All @@ -11,14 +12,7 @@ pub mod parser;
pub mod utility;
pub mod variant;

#[derive(Debug, Clone)]
pub struct ChangedContent {
pub file: Option<PathBuf>,
pub content: Option<String>,
pub extension: String,
}

pub fn parse_candidate_strings_from_files(changed_content: Vec<ChangedContent>) -> Vec<String> {
fn init_tracing() {
if matches!(std::env::var("DEBUG"), Ok(value) if value.eq("*") || value.eq("1") || value.eq("true") || value.contains("tailwind"))
{
tracing_subscriber::fmt()
Expand All @@ -27,10 +21,63 @@ pub fn parse_candidate_strings_from_files(changed_content: Vec<ChangedContent>)
.compact()
.init();
}
}

#[derive(Debug, Clone)]
pub struct ChangedContent {
pub file: Option<PathBuf>,
pub content: Option<String>,
pub extension: String,
}

#[derive(Debug)]
pub enum IO {
Sequential = 0b0001,
Parallel = 0b0010,
}

impl From<u8> for IO {
fn from(item: u8) -> Self {
match item & 0b0011 {
0b0001 => IO::Sequential,
0b0010 => IO::Parallel,
_ => unimplemented!("Unknown 'IO' strategy"),
}
}
}

#[derive(Debug)]
pub enum Parsing {
Sequential = 0b0100,
Parallel = 0b1000,
}

impl From<u8> for Parsing {
fn from(item: u8) -> Self {
match item & 0b1100 {
0b0100 => Parsing::Sequential,
0b1000 => Parsing::Parallel,
_ => unimplemented!("Unknown 'Parsing' strategy"),
}
}
}

pub fn parse_candidate_strings_from_files(changed_content: Vec<ChangedContent>) -> Vec<String> {
init_tracing();
parse_all_blobs(read_all_files(changed_content))
}

pub fn parse_candidate_strings(input: Vec<ChangedContent>, options: u8) -> Vec<String> {
init_tracing();

match (IO::from(options), Parsing::from(options)) {
(IO::Sequential, Parsing::Sequential) => parse_all_blobs_sync(read_all_files_sync(input)),
(IO::Sequential, Parsing::Parallel) => parse_all_blobs_sync(read_all_files(input)),
(IO::Parallel, Parsing::Sequential) => parse_all_blobs(read_all_files_sync(input)),
(IO::Parallel, Parsing::Parallel) => parse_all_blobs(read_all_files(input)),
}
}

#[tracing::instrument(skip(changed_content))]
fn read_all_files(changed_content: Vec<ChangedContent>) -> Vec<Vec<u8>> {
event!(
Expand All @@ -49,6 +96,24 @@ fn read_all_files(changed_content: Vec<ChangedContent>) -> Vec<Vec<u8>> {
.collect()
}

#[tracing::instrument(skip(changed_content))]
fn read_all_files_sync(changed_content: Vec<ChangedContent>) -> Vec<Vec<u8>> {
event!(
tracing::Level::INFO,
"Reading {:?} file(s)",
changed_content.len()
);

changed_content
.into_iter()
.map(|c| match (c.file, c.content) {
(Some(file), None) => std::fs::read(file).unwrap(),
(None, Some(content)) => content.into_bytes(),
_ => Default::default(),
})
.collect()
}

#[tracing::instrument(skip(blobs))]
fn parse_all_blobs(blobs: Vec<Vec<u8>>) -> Vec<String> {
let input: Vec<_> = blobs.iter().map(|blob| &blob[..]).collect();
Expand All @@ -72,3 +137,27 @@ fn parse_all_blobs(blobs: Vec<Vec<u8>>) -> Vec<String> {
result.sort();
result
}

#[tracing::instrument(skip(blobs))]
fn parse_all_blobs_sync(blobs: Vec<Vec<u8>>) -> Vec<String> {
let input: Vec<_> = blobs.iter().map(|blob| &blob[..]).collect();
let input = &input[..];

let mut result: Vec<String> = input
.iter()
.map(|input| Extractor::unique(input, Default::default()))
.fold(FxHashSet::default(), |mut a, b| {
a.extend(b);
a
})
.into_iter()
.map(|s| {
// SAFETY: When we parsed the candidates, we already guaranteed that the byte slices
// are valid, therefore we don't have to re-check here when we want to convert it back
// to a string.
unsafe { String::from_utf8_unchecked(s.to_vec()) }
})
.collect();
result.sort();
result
}
39 changes: 31 additions & 8 deletions oxide/crates/node/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use napi::bindgen_prelude::ToNapiValue;
use std::path::PathBuf;

#[macro_use]
Expand All @@ -11,16 +12,38 @@ pub struct ChangedContent {
pub extension: String,
}

impl From<ChangedContent> for tailwindcss_core::ChangedContent {
fn from(changed_content: ChangedContent) -> Self {
tailwindcss_core::ChangedContent {
file: changed_content.file.map(PathBuf::from),
content: changed_content.content,
extension: changed_content.extension,
}
}
}

#[napi]
pub fn parse_candidate_strings_from_files(changed_content: Vec<ChangedContent>) -> Vec<String> {
tailwindcss_core::parse_candidate_strings_from_files(
changed_content
.into_iter()
.map(|changed_content| tailwindcss_core::ChangedContent {
file: changed_content.file.map(PathBuf::from),
content: changed_content.content,
extension: changed_content.extension,
})
.collect(),
changed_content.into_iter().map(Into::into).collect(),
)
}

#[derive(Debug)]
#[napi]
pub enum IO {
Sequential = 0b0001,
Parallel = 0b0010,
}

#[derive(Debug)]
#[napi]
pub enum Parsing {
Sequential = 0b0100,
Parallel = 0b1000,
}

#[napi]
pub fn parse_candidate_strings(input: Vec<ChangedContent>, strategy: u8) -> Vec<String> {
tailwindcss_core::parse_candidate_strings(input.into_iter().map(Into::into).collect(), strategy)
}

0 comments on commit e4a37ce

Please sign in to comment.