diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index a7367b3c..bf28f672 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -39,6 +39,7 @@ jobs: with: toolchain: ${{ matrix.rust }} override: true + components: rustfmt - uses: actions-rs/cargo@v1 with: command: test diff --git a/Cargo.toml b/Cargo.toml index cbace288..4e04f479 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,9 +1,5 @@ [workspace] -members = [ - "logos", - "logos-derive", - "tests", -] +members = ["logos", "logos-cli", "logos-codegen", "logos-derive", "tests"] [profile] release = { lto = true } diff --git a/logos-cli/Cargo.toml b/logos-cli/Cargo.toml new file mode 100644 index 00000000..fb59bbd9 --- /dev/null +++ b/logos-cli/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "logos-cli" +version = "0.12.0" +license = "MIT OR Apache-2.0" +description = "Create ridiculously fast Lexers" +repository = "https://github.com/maciejhirsz/logos" +documentation = "https://docs.rs/logos-derive" +keywords = ["lexer", "lexical", "tokenizer", "parser", "no_std"] +categories = ["parsing", "text-processing"] +readme = "../README.md" +edition = "2018" + +[dependencies] +anyhow = "1.0.57" +clap = { version = "3.1.18", features = ["derive"] } +fs-err = "2.7.0" +logos-codegen = { path = "../logos-codegen", version = "0.12.0" } +proc-macro2 = "1.0.39" + +[dev-dependencies] +assert_cmd = "2.0.4" +assert_fs = "1.0.7" +predicates = "2.1.1" diff --git a/logos-cli/src/main.rs b/logos-cli/src/main.rs new file mode 100644 index 00000000..2cb74ffa --- /dev/null +++ b/logos-cli/src/main.rs @@ -0,0 +1,98 @@ +use std::{ + fmt::Write, + io, + path::PathBuf, + process::{Command, Stdio}, +}; + +use anyhow::{Context, Result}; +use clap::Parser; +use fs_err as fs; +use proc_macro2::{LexError, TokenStream}; + +/// Logos as a CLI! +#[derive(Parser)] +#[clap(author, version, about, long_about = None)] +pub struct Args { + /// Input file to process + #[clap(parse(from_os_str))] + input: PathBuf, + /// Path to write output. By default output is printed to stdout. + #[clap(long, short, parse(from_os_str))] + output: Option, + /// Checks whether the output file is up-to-date instead of writing to it. Requires --output to be specified. + #[clap(long, requires = "output")] + check: bool, + /// Invokes `rustfmt` on the generated code. `rustfmt` must be in $PATH. + #[clap(long)] + format: bool, +} + +pub fn main() -> Result<()> { + let args = Args::parse(); + + let input = fs::read_to_string(args.input)?; + let mut output = codegen(input).context("failed to run rustfmt")?; + + if args.format { + output = rustfmt(output)?; + } + + if let Some(output_path) = args.output { + let changed = match fs::read_to_string(&output_path) { + Ok(existing_output) => !eq_ignore_newlines(&existing_output, &output), + Err(err) if err.kind() == io::ErrorKind::NotFound => true, + Err(err) => return Err(err.into()), + }; + + if !changed { + Ok(()) + } else if args.check { + Err(anyhow::format_err!( + "contents of {} differed from generated code", + output_path.display() + )) + } else { + fs::write(output_path, output)?; + Ok(()) + } + } else { + println!("{}", output); + Ok(()) + } +} + +fn codegen(input: String) -> Result { + let input_tokens: TokenStream = input + .parse() + .map_err(|err: LexError| anyhow::Error::msg(err.to_string())) + .context("failed to parse input as rust code")?; + + let mut output = String::new(); + write!( + output, + "{}", + logos_codegen::strip_attributes(input_tokens.clone()) + )?; + write!(output, "{}", logos_codegen::generate(input_tokens))?; + Ok(output) +} + +fn rustfmt(input: String) -> Result { + let mut command = Command::new("rustfmt") + .stdin(Stdio::piped()) + .stderr(Stdio::inherit()) + .stdout(Stdio::piped()) + .spawn()?; + io::Write::write_all(&mut command.stdin.take().unwrap(), input.as_bytes())?; + let output = command.wait_with_output()?; + if !output.status.success() { + anyhow::bail!("rustfmt returned unsuccessful exit code"); + } + + String::from_utf8(output.stdout).context("failed to parse rustfmt output as utf-8") +} + +fn eq_ignore_newlines(lhs: &str, rhs: &str) -> bool { + lhs.lines().eq(rhs.lines()) +} diff --git a/logos-cli/tests/data/fmt_output.rs b/logos-cli/tests/data/fmt_output.rs new file mode 100644 index 00000000..22d88442 --- /dev/null +++ b/logos-cli/tests/data/fmt_output.rs @@ -0,0 +1,48 @@ +#[derive(Debug, Clone, Copy, PartialEq)] +enum Token { + Letter, + Error, +} +impl<'s> ::logos::Logos<'s> for Token { + type Extras = (); + type Source = str; + const ERROR: Self = Token::Error; + fn lex(lex: &mut ::logos::Lexer<'s, Self>) { + use logos::internal::{CallbackResult, LexerInternal}; + type Lexer<'s> = ::logos::Lexer<'s, Token>; + fn _end<'s>(lex: &mut Lexer<'s>) { + lex.end() + } + fn _error<'s>(lex: &mut Lexer<'s>) { + lex.bump_unchecked(1); + lex.error(); + } + macro_rules ! _fast_loop { ($ lex : ident , $ test : ident , $ miss : expr) => { while let Some (arr) = $ lex . read :: < & [u8 ; 16] > () { if $ test (arr [0]) { if $ test (arr [1]) { if $ test (arr [2]) { if $ test (arr [3]) { if $ test (arr [4]) { if $ test (arr [5]) { if $ test (arr [6]) { if $ test (arr [7]) { if $ test (arr [8]) { if $ test (arr [9]) { if $ test (arr [10]) { if $ test (arr [11]) { if $ test (arr [12]) { if $ test (arr [13]) { if $ test (arr [14]) { if $ test (arr [15]) { $ lex . bump_unchecked (16) ; continue ; } $ lex . bump_unchecked (15) ; return $ miss ; } $ lex . bump_unchecked (14) ; return $ miss ; } $ lex . bump_unchecked (13) ; return $ miss ; } $ lex . bump_unchecked (12) ; return $ miss ; } $ lex . bump_unchecked (11) ; return $ miss ; } $ lex . bump_unchecked (10) ; return $ miss ; } $ lex . bump_unchecked (9) ; return $ miss ; } $ lex . bump_unchecked (8) ; return $ miss ; } $ lex . bump_unchecked (7) ; return $ miss ; } $ lex . bump_unchecked (6) ; return $ miss ; } $ lex . bump_unchecked (5) ; return $ miss ; } $ lex . bump_unchecked (4) ; return $ miss ; } $ lex . bump_unchecked (3) ; return $ miss ; } $ lex . bump_unchecked (2) ; return $ miss ; } $ lex . bump_unchecked (1) ; return $ miss ; } return $ miss ; } while $ lex . test ($ test) { $ lex . bump_unchecked (1) ; } $ miss } ; } + #[inline] + fn goto1_x<'s>(lex: &mut Lexer<'s>) { + lex.set(Token::Letter); + } + #[inline] + fn goto3_at1_with3<'s>(lex: &mut Lexer<'s>) { + match lex.read_at::<&[u8; 2usize]>(1usize) { + Some(b"-z") => { + lex.bump_unchecked(3usize); + goto1_x(lex) + } + _ => _error(lex), + } + } + #[inline] + fn goto4<'s>(lex: &mut Lexer<'s>) { + let arr = match lex.read::<&[u8; 3usize]>() { + Some(arr) => arr, + None => return _end(lex), + }; + match arr[0] { + b'a' => goto3_at1_with3(lex), + _ => _error(lex), + } + } + goto4(lex) + } +} diff --git a/logos-cli/tests/data/input.rs b/logos-cli/tests/data/input.rs new file mode 100644 index 00000000..5f5eb060 --- /dev/null +++ b/logos-cli/tests/data/input.rs @@ -0,0 +1,7 @@ +#[derive(Logos, Debug, Clone, Copy, PartialEq)] +enum Token { + #[regex("a-z")] + Letter, + #[error] + Error, +} \ No newline at end of file diff --git a/logos-cli/tests/data/output.rs b/logos-cli/tests/data/output.rs new file mode 100644 index 00000000..e45833d6 --- /dev/null +++ b/logos-cli/tests/data/output.rs @@ -0,0 +1 @@ +# [derive (Debug , Clone , Copy , PartialEq)] enum Token { Letter , Error , }impl < 's > :: logos :: Logos < 's > for Token { type Extras = () ; type Source = str ; const ERROR : Self = Token :: Error ; fn lex (lex : & mut :: logos :: Lexer < 's , Self >) { use :: logos :: internal :: { LexerInternal , CallbackResult } ; type Lexer < 's > = :: logos :: Lexer < 's , Token > ; fn _end < 's > (lex : & mut Lexer < 's >) { lex . end () } fn _error < 's > (lex : & mut Lexer < 's >) { lex . bump_unchecked (1) ; lex . error () ; } macro_rules ! _fast_loop { ($ lex : ident , $ test : ident , $ miss : expr) => { while let Some (arr) = $ lex . read :: < & [u8 ; 16] > () { if $ test (arr [0]) { if $ test (arr [1]) { if $ test (arr [2]) { if $ test (arr [3]) { if $ test (arr [4]) { if $ test (arr [5]) { if $ test (arr [6]) { if $ test (arr [7]) { if $ test (arr [8]) { if $ test (arr [9]) { if $ test (arr [10]) { if $ test (arr [11]) { if $ test (arr [12]) { if $ test (arr [13]) { if $ test (arr [14]) { if $ test (arr [15]) { $ lex . bump_unchecked (16) ; continue ; } $ lex . bump_unchecked (15) ; return $ miss ; } $ lex . bump_unchecked (14) ; return $ miss ; } $ lex . bump_unchecked (13) ; return $ miss ; } $ lex . bump_unchecked (12) ; return $ miss ; } $ lex . bump_unchecked (11) ; return $ miss ; } $ lex . bump_unchecked (10) ; return $ miss ; } $ lex . bump_unchecked (9) ; return $ miss ; } $ lex . bump_unchecked (8) ; return $ miss ; } $ lex . bump_unchecked (7) ; return $ miss ; } $ lex . bump_unchecked (6) ; return $ miss ; } $ lex . bump_unchecked (5) ; return $ miss ; } $ lex . bump_unchecked (4) ; return $ miss ; } $ lex . bump_unchecked (3) ; return $ miss ; } $ lex . bump_unchecked (2) ; return $ miss ; } $ lex . bump_unchecked (1) ; return $ miss ; } return $ miss ; } while $ lex . test ($ test) { $ lex . bump_unchecked (1) ; } $ miss } ; } # [inline] fn goto1_x < 's > (lex : & mut Lexer < 's >) { lex . set (Token :: Letter) ; } # [inline] fn goto3_at1_with3 < 's > (lex : & mut Lexer < 's >) { match lex . read_at :: < & [u8 ; 2usize] > (1usize) { Some (b"-z") => { lex . bump_unchecked (3usize) ; goto1_x (lex) } , _ => _error (lex) , } } # [inline] fn goto4 < 's > (lex : & mut Lexer < 's >) { let arr = match lex . read :: < & [u8 ; 3usize] > () { Some (arr) => arr , None => return _end (lex) , } ; match arr [0] { b'a' => goto3_at1_with3 (lex) , _ => _error (lex) , } } goto4 (lex) } } \ No newline at end of file diff --git a/logos-cli/tests/tests.rs b/logos-cli/tests/tests.rs new file mode 100644 index 00000000..f52c8ddb --- /dev/null +++ b/logos-cli/tests/tests.rs @@ -0,0 +1,83 @@ +use std::path::Path; + +use assert_cmd::Command; +use assert_fs::{assert::PathAssert, fixture::FileWriteStr, NamedTempFile}; +use predicates::prelude::*; + +const INPUT_FILE: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/tests/data/input.rs"); +const OUTPUT_FILE: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/tests/data/output.rs"); +const FMT_OUTPUT_FILE: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/tests/data/fmt_output.rs"); + +#[test] +fn test_codegen() { + let tempfile = NamedTempFile::new("output.gen.rs").unwrap(); + + let mut cmd = Command::cargo_bin("logos-cli").unwrap(); + cmd.arg(INPUT_FILE) + .arg("--output") + .arg(tempfile.path()) + .assert() + .success(); + + tempfile.assert(normalize_newlines(OUTPUT_FILE)); +} + +#[test] +fn test_codegen_check() { + Command::cargo_bin("logos-cli") + .unwrap() + .arg(INPUT_FILE) + .arg("--check") + .arg("--output") + .arg(OUTPUT_FILE) + .assert() + .success(); +} + +#[test] +fn test_codegen_check_format() { + Command::cargo_bin("logos-cli") + .unwrap() + .arg(INPUT_FILE) + .arg("--format") + .arg("--check") + .arg("--output") + .arg(FMT_OUTPUT_FILE) + .assert() + .success(); +} + +#[test] +fn test_codegen_fail_check() { + let tempfile = NamedTempFile::new("output.gen.rs").unwrap(); + + tempfile.write_str("some random data").unwrap(); + + Command::cargo_bin("logos-cli") + .unwrap() + .arg(INPUT_FILE) + .arg("--check") + .arg("--output") + .arg(tempfile.path()) + .assert() + .failure(); +} + +#[test] +fn test_codegen_format() { + let tempfile = NamedTempFile::new("output.gen.rs").unwrap(); + + let mut cmd = Command::cargo_bin("logos-cli").unwrap(); + cmd.arg(INPUT_FILE) + .arg("--format") + .arg("--output") + .arg(tempfile.path()) + .assert() + .success(); + + tempfile.assert(normalize_newlines(FMT_OUTPUT_FILE)); +} + +fn normalize_newlines(s: impl AsRef) -> impl Predicate { + predicates::str::diff(fs_err::read_to_string(s).unwrap().replace("\r\n", "\n")).normalize() +} diff --git a/logos-codegen/Cargo.toml b/logos-codegen/Cargo.toml new file mode 100644 index 00000000..741acfb2 --- /dev/null +++ b/logos-codegen/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "logos-codegen" +version = "0.12.0" +authors = ["Maciej Hirsz "] +license = "MIT OR Apache-2.0" +description = "Implementation details for logos-codegen and logos-derive. Not for public consumption." +repository = "https://github.com/maciejhirsz/logos" +documentation = "https://docs.rs/logos-derive" +keywords = ["lexer", "lexical", "tokenizer", "parser", "no_std"] +categories = ["parsing", "text-processing"] +readme = "../README.md" +edition = "2018" + +[dependencies] +beef = "0.5.0" +fnv = "1.0.6" +syn = { version = "1.0.17", features = ["full"] } +quote = "1.0.3" +proc-macro2 = "1.0.9" +regex-syntax = "0.6" + +[dev-dependencies] +pretty_assertions = "0.6.1" diff --git a/logos-derive/src/error.rs b/logos-codegen/src/error.rs similarity index 100% rename from logos-derive/src/error.rs rename to logos-codegen/src/error.rs diff --git a/logos-derive/src/generator/context.rs b/logos-codegen/src/generator/context.rs similarity index 100% rename from logos-derive/src/generator/context.rs rename to logos-codegen/src/generator/context.rs diff --git a/logos-derive/src/generator/fork.rs b/logos-codegen/src/generator/fork.rs similarity index 100% rename from logos-derive/src/generator/fork.rs rename to logos-codegen/src/generator/fork.rs diff --git a/logos-derive/src/generator/leaf.rs b/logos-codegen/src/generator/leaf.rs similarity index 100% rename from logos-derive/src/generator/leaf.rs rename to logos-codegen/src/generator/leaf.rs diff --git a/logos-derive/src/generator/mod.rs b/logos-codegen/src/generator/mod.rs similarity index 100% rename from logos-derive/src/generator/mod.rs rename to logos-codegen/src/generator/mod.rs diff --git a/logos-derive/src/generator/rope.rs b/logos-codegen/src/generator/rope.rs similarity index 100% rename from logos-derive/src/generator/rope.rs rename to logos-codegen/src/generator/rope.rs diff --git a/logos-derive/src/generator/tables.rs b/logos-codegen/src/generator/tables.rs similarity index 100% rename from logos-derive/src/generator/tables.rs rename to logos-codegen/src/generator/tables.rs diff --git a/logos-derive/src/graph/fork.rs b/logos-codegen/src/graph/fork.rs similarity index 100% rename from logos-derive/src/graph/fork.rs rename to logos-codegen/src/graph/fork.rs diff --git a/logos-derive/src/graph/impls.rs b/logos-codegen/src/graph/impls.rs similarity index 100% rename from logos-derive/src/graph/impls.rs rename to logos-codegen/src/graph/impls.rs diff --git a/logos-derive/src/graph/meta.rs b/logos-codegen/src/graph/meta.rs similarity index 100% rename from logos-derive/src/graph/meta.rs rename to logos-codegen/src/graph/meta.rs diff --git a/logos-derive/src/graph/mod.rs b/logos-codegen/src/graph/mod.rs similarity index 100% rename from logos-derive/src/graph/mod.rs rename to logos-codegen/src/graph/mod.rs diff --git a/logos-derive/src/graph/range.rs b/logos-codegen/src/graph/range.rs similarity index 100% rename from logos-derive/src/graph/range.rs rename to logos-codegen/src/graph/range.rs diff --git a/logos-derive/src/graph/regex.rs b/logos-codegen/src/graph/regex.rs similarity index 100% rename from logos-derive/src/graph/regex.rs rename to logos-codegen/src/graph/regex.rs diff --git a/logos-derive/src/graph/rope.rs b/logos-codegen/src/graph/rope.rs similarity index 100% rename from logos-derive/src/graph/rope.rs rename to logos-codegen/src/graph/rope.rs diff --git a/logos-derive/src/leaf.rs b/logos-codegen/src/leaf.rs similarity index 100% rename from logos-derive/src/leaf.rs rename to logos-codegen/src/leaf.rs diff --git a/logos-codegen/src/lib.rs b/logos-codegen/src/lib.rs new file mode 100644 index 00000000..d7dd667d --- /dev/null +++ b/logos-codegen/src/lib.rs @@ -0,0 +1,362 @@ +//! Logos logo +//! +//! # Logos +//! +//! This is a `#[derive]` macro crate, [for documentation go to main crate](https://docs.rs/logos). + +// The `quote!` macro requires deep recursion. +#![recursion_limit = "196"] +#![doc(html_logo_url = "https://maciej.codes/kosz/logos.png")] + +mod error; +mod generator; +mod graph; +mod leaf; +mod mir; +mod parser; +mod util; + +use generator::Generator; +use graph::{DisambiguationError, Fork, Graph, Rope}; +use leaf::Leaf; +use parser::{Mode, Parser}; +use quote::ToTokens; +use util::MaybeVoid; + +use proc_macro2::Span; +use proc_macro2::TokenStream; +use quote::quote; +use syn::spanned::Spanned; +use syn::{Fields, ItemEnum}; + +const LOGOS_ATTR: &str = "logos"; +const EXTRAS_ATTR: &str = "extras"; +const ERROR_ATTR: &str = "error"; +const END_ATTR: &str = "end"; +const TOKEN_ATTR: &str = "token"; +const REGEX_ATTR: &str = "regex"; + +/// Generate a `Logos` implementation for the given struct, provided as a stream of rust tokens. +pub fn generate(input: TokenStream) -> TokenStream { + let mut item: ItemEnum = syn::parse2(input).expect("Logos can be only be derived for enums"); + + let name = &item.ident; + + let mut error = None; + let mut parser = Parser::default(); + + for param in item.generics.params { + parser.parse_generic(param); + } + + for attr in &mut item.attrs { + parser.try_parse_logos(attr); + + // TODO: Remove in future versions + if attr.path.is_ident(EXTRAS_ATTR) { + parser.err( + "\ + #[extras] attribute is deprecated. Use #[logos(extras = Type)] instead.\n\ + \n\ + For help with migration see release notes: \ + https://github.com/maciejhirsz/logos/releases\ + ", + attr.span(), + ); + } + } + + let mut ropes = Vec::new(); + let mut regex_ids = Vec::new(); + let mut graph = Graph::new(); + + for variant in &mut item.variants { + let field = match &mut variant.fields { + Fields::Unit => MaybeVoid::Void, + Fields::Unnamed(fields) => { + if fields.unnamed.len() != 1 { + parser.err( + format!( + "Logos currently only supports variants with one field, found {}", + fields.unnamed.len(), + ), + fields.span(), + ); + } + + let ty = &mut fields + .unnamed + .first_mut() + .expect("Already checked len; qed") + .ty; + let ty = parser.get_type(ty); + + MaybeVoid::Some(ty) + } + Fields::Named(fields) => { + parser.err("Logos doesn't support named fields yet.", fields.span()); + + MaybeVoid::Void + } + }; + + // Lazy leaf constructor to avoid cloning + let var_ident = &variant.ident; + let leaf = move |span| Leaf::new(var_ident, span).field(field.clone()); + + for attr in &mut variant.attrs { + let attr_name = match attr.path.get_ident() { + Some(ident) => ident.to_string(), + None => continue, + }; + + match attr_name.as_str() { + ERROR_ATTR => { + let span = variant.ident.span(); + if let Some(previous) = error.replace(&variant.ident) { + parser + .err("Only one #[error] variant can be declared.", span) + .err("Previously declared #[error]:", previous.span()); + } + } + END_ATTR => { + // TODO: Remove in future versions + parser.err( + "\ + Since 0.11 Logos no longer requires the #[end] variant.\n\ + \n\ + For help with migration see release notes: \ + https://github.com/maciejhirsz/logos/releases\ + ", + attr.span(), + ); + } + TOKEN_ATTR => { + let definition = match parser.parse_definition(attr) { + Some(definition) => definition, + None => { + parser.err("Expected #[token(...)]", attr.span()); + continue; + } + }; + + if definition.ignore_flags.is_empty() { + let bytes = definition.literal.to_bytes(); + let then = graph.push( + leaf(definition.literal.span()) + .priority(definition.priority.unwrap_or(bytes.len() * 2)) + .callback(definition.callback), + ); + + ropes.push(Rope::new(bytes, then)); + } else { + let mir = definition + .literal + .escape_regex() + .to_mir( + &Default::default(), + definition.ignore_flags, + &mut parser.errors, + ) + .expect("The literal should be perfectly valid regex"); + + let then = graph.push( + leaf(definition.literal.span()) + .priority(definition.priority.unwrap_or_else(|| mir.priority())) + .callback(definition.callback), + ); + let id = graph.regex(mir, then); + + regex_ids.push(id); + } + } + REGEX_ATTR => { + let definition = match parser.parse_definition(attr) { + Some(definition) => definition, + None => { + parser.err("Expected #[regex(...)]", attr.span()); + continue; + } + }; + let mir = match definition.literal.to_mir( + &parser.subpatterns, + definition.ignore_flags, + &mut parser.errors, + ) { + Ok(mir) => mir, + Err(err) => { + parser.err(err, definition.literal.span()); + continue; + } + }; + + let then = graph.push( + leaf(definition.literal.span()) + .priority(definition.priority.unwrap_or_else(|| mir.priority())) + .callback(definition.callback), + ); + let id = graph.regex(mir, then); + + regex_ids.push(id); + } + _ => (), + } + } + } + + let mut root = Fork::new(); + + let extras = parser.extras.take(); + let source = match parser.mode { + Mode::Utf8 => quote!(str), + Mode::Binary => quote!([u8]), + }; + + let error_def = match error { + Some(error) => Some(quote!(const ERROR: Self = #name::#error;)), + None => { + parser.err("missing #[error] token variant.", Span::call_site()); + None + } + }; + + let generics = parser.generics(); + let this = quote!(#name #generics); + + let impl_logos = |body| { + quote! { + impl<'s> ::logos::Logos<'s> for #this { + type Extras = #extras; + + type Source = #source; + + #error_def + + fn lex(lex: &mut ::logos::Lexer<'s, Self>) { + #body + } + } + } + }; + + for id in regex_ids { + let fork = graph.fork_off(id); + + root.merge(fork, &mut graph); + } + for rope in ropes { + root.merge(rope.into_fork(&mut graph), &mut graph); + } + while let Some(id) = root.miss.take() { + let fork = graph.fork_off(id); + + if fork.branches().next().is_some() { + root.merge(fork, &mut graph); + } else { + break; + } + } + + for &DisambiguationError(a, b) in graph.errors() { + let a = graph[a].unwrap_leaf(); + let b = graph[b].unwrap_leaf(); + let disambiguate = a.priority + 1; + + let mut err = |a: &Leaf, b: &Leaf| { + parser.err( + format!( + "\ + A definition of variant `{0}` can match the same input as another definition of variant `{1}`.\n\ + \n\ + hint: Consider giving one definition a higher priority: \ + #[regex(..., priority = {2})]\ + ", + a.ident, + b.ident, + disambiguate, + ), + a.span + ); + }; + + err(a, b); + err(b, a); + } + + if let Some(errors) = parser.errors.render() { + return impl_logos(errors).into(); + } + + let root = graph.push(root); + + graph.shake(root); + + // panic!("{:#?}\n\n{} nodes", graph, graph.nodes().iter().filter_map(|n| n.as_ref()).count()); + + let generator = Generator::new(name, &this, root, &graph); + + let body = generator.generate(); + let tokens = impl_logos(quote! { + use ::logos::internal::{LexerInternal, CallbackResult}; + + type Lexer<'s> = ::logos::Lexer<'s, #this>; + + fn _end<'s>(lex: &mut Lexer<'s>) { + lex.end() + } + + fn _error<'s>(lex: &mut Lexer<'s>) { + lex.bump_unchecked(1); + + lex.error(); + } + + #body + }); + + // panic!("{}", tokens); + + tokens +} + +/// Strip all logos attributes from the given struct, allowing it to be used in code without `logos-derive` present. +pub fn strip_attributes(input: TokenStream) -> TokenStream { + let mut item: ItemEnum = syn::parse2(input).expect("Logos can be only be derived for enums"); + + strip_attrs_from_vec(&mut item.attrs); + + for attr in &mut item.attrs { + if attr.path.is_ident("derive") { + if let Ok(syn::Meta::List(mut meta)) = attr.parse_meta() { + meta.nested = meta.nested.into_iter().filter(|nested| !matches!(nested, syn::NestedMeta::Meta(nested) if nested.path().is_ident("Logos"))).collect(); + + attr.tokens = TokenStream::new(); + meta.paren_token.surround(&mut attr.tokens, |tokens| { + meta.nested.to_tokens(tokens); + }); + } + } + } + + for variant in &mut item.variants { + strip_attrs_from_vec(&mut variant.attrs); + for field in &mut variant.fields { + strip_attrs_from_vec(&mut field.attrs); + } + } + + item.to_token_stream() +} + +fn strip_attrs_from_vec(attrs: &mut Vec) { + attrs.retain(|attr| !is_logos_attr(attr)) +} + +fn is_logos_attr(attr: &syn::Attribute) -> bool { + attr.path.is_ident(LOGOS_ATTR) + || attr.path.is_ident(EXTRAS_ATTR) + || attr.path.is_ident(ERROR_ATTR) + || attr.path.is_ident(END_ATTR) + || attr.path.is_ident(TOKEN_ATTR) + || attr.path.is_ident(REGEX_ATTR) +} diff --git a/logos-derive/src/mir.rs b/logos-codegen/src/mir.rs similarity index 100% rename from logos-derive/src/mir.rs rename to logos-codegen/src/mir.rs diff --git a/logos-derive/src/parser/definition.rs b/logos-codegen/src/parser/definition.rs similarity index 100% rename from logos-derive/src/parser/definition.rs rename to logos-codegen/src/parser/definition.rs diff --git a/logos-derive/src/parser/ignore_flags.rs b/logos-codegen/src/parser/ignore_flags.rs similarity index 100% rename from logos-derive/src/parser/ignore_flags.rs rename to logos-codegen/src/parser/ignore_flags.rs diff --git a/logos-derive/src/parser/mod.rs b/logos-codegen/src/parser/mod.rs similarity index 99% rename from logos-derive/src/parser/mod.rs rename to logos-codegen/src/parser/mod.rs index 3d656c9b..f38eb850 100644 --- a/logos-derive/src/parser/mod.rs +++ b/logos-codegen/src/parser/mod.rs @@ -7,6 +7,7 @@ use syn::{Attribute, GenericParam, Lit, Type}; use crate::error::Errors; use crate::leaf::{Callback, InlineCallback}; use crate::util::{expect_punct, MaybeVoid}; +use crate::LOGOS_ATTR; mod definition; mod ignore_flags; @@ -71,7 +72,7 @@ impl Parser { /// Try to parse the main `#[logos(...)]`, does nothing if /// the attribute's name isn't `logos`. pub fn try_parse_logos(&mut self, attr: &mut Attribute) { - if !attr.path.is_ident("logos") { + if !attr.path.is_ident(LOGOS_ATTR) { return; } diff --git a/logos-derive/src/parser/nested.rs b/logos-codegen/src/parser/nested.rs similarity index 100% rename from logos-derive/src/parser/nested.rs rename to logos-codegen/src/parser/nested.rs diff --git a/logos-derive/src/parser/subpattern.rs b/logos-codegen/src/parser/subpattern.rs similarity index 100% rename from logos-derive/src/parser/subpattern.rs rename to logos-codegen/src/parser/subpattern.rs diff --git a/logos-derive/src/parser/type_params.rs b/logos-codegen/src/parser/type_params.rs similarity index 100% rename from logos-derive/src/parser/type_params.rs rename to logos-codegen/src/parser/type_params.rs diff --git a/logos-derive/src/util.rs b/logos-codegen/src/util.rs similarity index 100% rename from logos-derive/src/util.rs rename to logos-codegen/src/util.rs diff --git a/logos-derive/Cargo.toml b/logos-derive/Cargo.toml index 2c0a7d78..c1ac8702 100644 --- a/logos-derive/Cargo.toml +++ b/logos-derive/Cargo.toml @@ -16,12 +16,4 @@ name = "logos_derive" proc-macro = true [dependencies] -beef = "0.5.0" -fnv = "1.0.6" -syn = { version = "1.0.17", features = ["full"] } -quote = "1.0.3" -proc-macro2 = "1.0.9" -regex-syntax = "0.6" - -[dev-dependencies] -pretty_assertions = "0.6.1" +logos-codegen = { path = "../logos-codegen", version = "0.12.0" } diff --git a/logos-derive/src/lib.rs b/logos-derive/src/lib.rs index b5ccb4b3..6830b207 100644 --- a/logos-derive/src/lib.rs +++ b/logos-derive/src/lib.rs @@ -1,312 +1,6 @@ -//! Logos logo -//! -//! # Logos -//! -//! This is a `#[derive]` macro crate, [for documentation go to main crate](https://docs.rs/logos). - -// The `quote!` macro requires deep recursion. -#![recursion_limit = "196"] -#![doc(html_logo_url = "https://maciej.codes/kosz/logos.png")] - -mod error; -mod generator; -mod graph; -mod leaf; -mod mir; -mod parser; -mod util; - -use generator::Generator; -use graph::{DisambiguationError, Fork, Graph, Rope}; -use leaf::Leaf; -use parser::{Mode, Parser}; -use util::MaybeVoid; - use proc_macro::TokenStream; -use proc_macro2::Span; -use quote::quote; -use syn::spanned::Spanned; -use syn::{Fields, ItemEnum}; #[proc_macro_derive(Logos, attributes(logos, extras, error, end, token, regex))] pub fn logos(input: TokenStream) -> TokenStream { - let mut item: ItemEnum = syn::parse(input).expect("Logos can be only be derived for enums"); - - let name = &item.ident; - - let mut error = None; - let mut parser = Parser::default(); - - for param in item.generics.params { - parser.parse_generic(param); - } - - for attr in &mut item.attrs { - parser.try_parse_logos(attr); - - // TODO: Remove in future versions - if attr.path.is_ident("extras") { - parser.err( - "\ - #[extras] attribute is deprecated. Use #[logos(extras = Type)] instead.\n\ - \n\ - For help with migration see release notes: \ - https://github.com/maciejhirsz/logos/releases\ - ", - attr.span(), - ); - } - } - - let mut ropes = Vec::new(); - let mut regex_ids = Vec::new(); - let mut graph = Graph::new(); - - for variant in &mut item.variants { - let field = match &mut variant.fields { - Fields::Unit => MaybeVoid::Void, - Fields::Unnamed(fields) => { - if fields.unnamed.len() != 1 { - parser.err( - format!( - "Logos currently only supports variants with one field, found {}", - fields.unnamed.len(), - ), - fields.span(), - ); - } - - let ty = &mut fields - .unnamed - .first_mut() - .expect("Already checked len; qed") - .ty; - let ty = parser.get_type(ty); - - MaybeVoid::Some(ty) - } - Fields::Named(fields) => { - parser.err("Logos doesn't support named fields yet.", fields.span()); - - MaybeVoid::Void - } - }; - - // Lazy leaf constructor to avoid cloning - let var_ident = &variant.ident; - let leaf = move |span| Leaf::new(var_ident, span).field(field.clone()); - - for attr in &mut variant.attrs { - let attr_name = match attr.path.get_ident() { - Some(ident) => ident.to_string(), - None => continue, - }; - - match attr_name.as_str() { - "error" => { - let span = variant.ident.span(); - if let Some(previous) = error.replace(&variant.ident) { - parser - .err("Only one #[error] variant can be declared.", span) - .err("Previously declared #[error]:", previous.span()); - } - } - "end" => { - // TODO: Remove in future versions - parser.err( - "\ - Since 0.11 Logos no longer requires the #[end] variant.\n\ - \n\ - For help with migration see release notes: \ - https://github.com/maciejhirsz/logos/releases\ - ", - attr.span(), - ); - } - "token" => { - let definition = match parser.parse_definition(attr) { - Some(definition) => definition, - None => { - parser.err("Expected #[token(...)]", attr.span()); - continue; - } - }; - - if definition.ignore_flags.is_empty() { - let bytes = definition.literal.to_bytes(); - let then = graph.push( - leaf(definition.literal.span()) - .priority(definition.priority.unwrap_or(bytes.len() * 2)) - .callback(definition.callback), - ); - - ropes.push(Rope::new(bytes, then)); - } else { - let mir = definition - .literal - .escape_regex() - .to_mir( - &Default::default(), - definition.ignore_flags, - &mut parser.errors, - ) - .expect("The literal should be perfectly valid regex"); - - let then = graph.push( - leaf(definition.literal.span()) - .priority(definition.priority.unwrap_or_else(|| mir.priority())) - .callback(definition.callback), - ); - let id = graph.regex(mir, then); - - regex_ids.push(id); - } - } - "regex" => { - let definition = match parser.parse_definition(attr) { - Some(definition) => definition, - None => { - parser.err("Expected #[regex(...)]", attr.span()); - continue; - } - }; - let mir = match definition.literal.to_mir( - &parser.subpatterns, - definition.ignore_flags, - &mut parser.errors, - ) { - Ok(mir) => mir, - Err(err) => { - parser.err(err, definition.literal.span()); - continue; - } - }; - - let then = graph.push( - leaf(definition.literal.span()) - .priority(definition.priority.unwrap_or_else(|| mir.priority())) - .callback(definition.callback), - ); - let id = graph.regex(mir, then); - - regex_ids.push(id); - } - _ => (), - } - } - } - - let mut root = Fork::new(); - - let extras = parser.extras.take(); - let source = match parser.mode { - Mode::Utf8 => quote!(str), - Mode::Binary => quote!([u8]), - }; - - let error_def = match error { - Some(error) => Some(quote!(const ERROR: Self = #name::#error;)), - None => { - parser.err("missing #[error] token variant.", Span::call_site()); - None - } - }; - - let generics = parser.generics(); - let this = quote!(#name #generics); - - let impl_logos = |body| { - quote! { - impl<'s> ::logos::Logos<'s> for #this { - type Extras = #extras; - - type Source = #source; - - #error_def - - fn lex(lex: &mut ::logos::Lexer<'s, Self>) { - #body - } - } - } - }; - - for id in regex_ids { - let fork = graph.fork_off(id); - - root.merge(fork, &mut graph); - } - for rope in ropes { - root.merge(rope.into_fork(&mut graph), &mut graph); - } - while let Some(id) = root.miss.take() { - let fork = graph.fork_off(id); - - if fork.branches().next().is_some() { - root.merge(fork, &mut graph); - } else { - break; - } - } - - for &DisambiguationError(a, b) in graph.errors() { - let a = graph[a].unwrap_leaf(); - let b = graph[b].unwrap_leaf(); - let disambiguate = a.priority + 1; - - let mut err = |a: &Leaf, b: &Leaf| { - parser.err( - format!( - "\ - A definition of variant `{0}` can match the same input as another definition of variant `{1}`.\n\ - \n\ - hint: Consider giving one definition a higher priority: \ - #[regex(..., priority = {2})]\ - ", - a.ident, - b.ident, - disambiguate, - ), - a.span - ); - }; - - err(a, b); - err(b, a); - } - - if let Some(errors) = parser.errors.render() { - return impl_logos(errors).into(); - } - - let root = graph.push(root); - - graph.shake(root); - - // panic!("{:#?}\n\n{} nodes", graph, graph.nodes().iter().filter_map(|n| n.as_ref()).count()); - - let generator = Generator::new(name, &this, root, &graph); - - let body = generator.generate(); - let tokens = impl_logos(quote! { - use ::logos::internal::{LexerInternal, CallbackResult}; - - type Lexer<'s> = ::logos::Lexer<'s, #this>; - - fn _end<'s>(lex: &mut Lexer<'s>) { - lex.end() - } - - fn _error<'s>(lex: &mut Lexer<'s>) { - lex.bump_unchecked(1); - - lex.error(); - } - - #body - }); - - // panic!("{}", tokens); - - TokenStream::from(tokens) + return logos_codegen::generate(input.into()).into(); }