-
-
Notifications
You must be signed in to change notification settings - Fork 105
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add a CLI which can be used for codegeneration (#248)
* Add logos-codegen CLI * Strip logos attributes * Add some tests * cargo fmt * Normalize newline characters in logos-cli * Fix tests on windows * Check rustfmt exit code * Include rustfmt tool for tests
- Loading branch information
1 parent
5dd386a
commit 51c1f8c
Showing
35 changed files
with
651 additions
and
322 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
[package] | ||
name = "logos-cli" | ||
version = "0.12.0" | ||
license = "MIT OR Apache-2.0" | ||
description = "Create ridiculously fast Lexers" | ||
repository = "https://github.com/maciejhirsz/logos" | ||
documentation = "https://docs.rs/logos-derive" | ||
keywords = ["lexer", "lexical", "tokenizer", "parser", "no_std"] | ||
categories = ["parsing", "text-processing"] | ||
readme = "../README.md" | ||
edition = "2018" | ||
|
||
[dependencies] | ||
anyhow = "1.0.57" | ||
clap = { version = "3.1.18", features = ["derive"] } | ||
fs-err = "2.7.0" | ||
logos-codegen = { path = "../logos-codegen", version = "0.12.0" } | ||
proc-macro2 = "1.0.39" | ||
|
||
[dev-dependencies] | ||
assert_cmd = "2.0.4" | ||
assert_fs = "1.0.7" | ||
predicates = "2.1.1" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
use std::{ | ||
fmt::Write, | ||
io, | ||
path::PathBuf, | ||
process::{Command, Stdio}, | ||
}; | ||
|
||
use anyhow::{Context, Result}; | ||
use clap::Parser; | ||
use fs_err as fs; | ||
use proc_macro2::{LexError, TokenStream}; | ||
|
||
/// Logos as a CLI! | ||
#[derive(Parser)] | ||
#[clap(author, version, about, long_about = None)] | ||
pub struct Args { | ||
/// Input file to process | ||
#[clap(parse(from_os_str))] | ||
input: PathBuf, | ||
/// Path to write output. By default output is printed to stdout. | ||
#[clap(long, short, parse(from_os_str))] | ||
output: Option<PathBuf>, | ||
/// Checks whether the output file is up-to-date instead of writing to it. Requires --output to be specified. | ||
#[clap(long, requires = "output")] | ||
check: bool, | ||
/// Invokes `rustfmt` on the generated code. `rustfmt` must be in $PATH. | ||
#[clap(long)] | ||
format: bool, | ||
} | ||
|
||
pub fn main() -> Result<()> { | ||
let args = Args::parse(); | ||
|
||
let input = fs::read_to_string(args.input)?; | ||
let mut output = codegen(input).context("failed to run rustfmt")?; | ||
|
||
if args.format { | ||
output = rustfmt(output)?; | ||
} | ||
|
||
if let Some(output_path) = args.output { | ||
let changed = match fs::read_to_string(&output_path) { | ||
Ok(existing_output) => !eq_ignore_newlines(&existing_output, &output), | ||
Err(err) if err.kind() == io::ErrorKind::NotFound => true, | ||
Err(err) => return Err(err.into()), | ||
}; | ||
|
||
if !changed { | ||
Ok(()) | ||
} else if args.check { | ||
Err(anyhow::format_err!( | ||
"contents of {} differed from generated code", | ||
output_path.display() | ||
)) | ||
} else { | ||
fs::write(output_path, output)?; | ||
Ok(()) | ||
} | ||
} else { | ||
println!("{}", output); | ||
Ok(()) | ||
} | ||
} | ||
|
||
fn codegen(input: String) -> Result<String> { | ||
let input_tokens: TokenStream = input | ||
.parse() | ||
.map_err(|err: LexError| anyhow::Error::msg(err.to_string())) | ||
.context("failed to parse input as rust code")?; | ||
|
||
let mut output = String::new(); | ||
write!( | ||
output, | ||
"{}", | ||
logos_codegen::strip_attributes(input_tokens.clone()) | ||
)?; | ||
write!(output, "{}", logos_codegen::generate(input_tokens))?; | ||
Ok(output) | ||
} | ||
|
||
fn rustfmt(input: String) -> Result<String> { | ||
let mut command = Command::new("rustfmt") | ||
.stdin(Stdio::piped()) | ||
.stderr(Stdio::inherit()) | ||
.stdout(Stdio::piped()) | ||
.spawn()?; | ||
io::Write::write_all(&mut command.stdin.take().unwrap(), input.as_bytes())?; | ||
let output = command.wait_with_output()?; | ||
if !output.status.success() { | ||
anyhow::bail!("rustfmt returned unsuccessful exit code"); | ||
} | ||
|
||
String::from_utf8(output.stdout).context("failed to parse rustfmt output as utf-8") | ||
} | ||
|
||
fn eq_ignore_newlines(lhs: &str, rhs: &str) -> bool { | ||
lhs.lines().eq(rhs.lines()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
#[derive(Debug, Clone, Copy, PartialEq)] | ||
enum Token { | ||
Letter, | ||
Error, | ||
} | ||
impl<'s> ::logos::Logos<'s> for Token { | ||
type Extras = (); | ||
type Source = str; | ||
const ERROR: Self = Token::Error; | ||
fn lex(lex: &mut ::logos::Lexer<'s, Self>) { | ||
use logos::internal::{CallbackResult, LexerInternal}; | ||
type Lexer<'s> = ::logos::Lexer<'s, Token>; | ||
fn _end<'s>(lex: &mut Lexer<'s>) { | ||
lex.end() | ||
} | ||
fn _error<'s>(lex: &mut Lexer<'s>) { | ||
lex.bump_unchecked(1); | ||
lex.error(); | ||
} | ||
macro_rules ! _fast_loop { ($ lex : ident , $ test : ident , $ miss : expr) => { while let Some (arr) = $ lex . read :: < & [u8 ; 16] > () { if $ test (arr [0]) { if $ test (arr [1]) { if $ test (arr [2]) { if $ test (arr [3]) { if $ test (arr [4]) { if $ test (arr [5]) { if $ test (arr [6]) { if $ test (arr [7]) { if $ test (arr [8]) { if $ test (arr [9]) { if $ test (arr [10]) { if $ test (arr [11]) { if $ test (arr [12]) { if $ test (arr [13]) { if $ test (arr [14]) { if $ test (arr [15]) { $ lex . bump_unchecked (16) ; continue ; } $ lex . bump_unchecked (15) ; return $ miss ; } $ lex . bump_unchecked (14) ; return $ miss ; } $ lex . bump_unchecked (13) ; return $ miss ; } $ lex . bump_unchecked (12) ; return $ miss ; } $ lex . bump_unchecked (11) ; return $ miss ; } $ lex . bump_unchecked (10) ; return $ miss ; } $ lex . bump_unchecked (9) ; return $ miss ; } $ lex . bump_unchecked (8) ; return $ miss ; } $ lex . bump_unchecked (7) ; return $ miss ; } $ lex . bump_unchecked (6) ; return $ miss ; } $ lex . bump_unchecked (5) ; return $ miss ; } $ lex . bump_unchecked (4) ; return $ miss ; } $ lex . bump_unchecked (3) ; return $ miss ; } $ lex . bump_unchecked (2) ; return $ miss ; } $ lex . bump_unchecked (1) ; return $ miss ; } return $ miss ; } while $ lex . test ($ test) { $ lex . bump_unchecked (1) ; } $ miss } ; } | ||
#[inline] | ||
fn goto1_x<'s>(lex: &mut Lexer<'s>) { | ||
lex.set(Token::Letter); | ||
} | ||
#[inline] | ||
fn goto3_at1_with3<'s>(lex: &mut Lexer<'s>) { | ||
match lex.read_at::<&[u8; 2usize]>(1usize) { | ||
Some(b"-z") => { | ||
lex.bump_unchecked(3usize); | ||
goto1_x(lex) | ||
} | ||
_ => _error(lex), | ||
} | ||
} | ||
#[inline] | ||
fn goto4<'s>(lex: &mut Lexer<'s>) { | ||
let arr = match lex.read::<&[u8; 3usize]>() { | ||
Some(arr) => arr, | ||
None => return _end(lex), | ||
}; | ||
match arr[0] { | ||
b'a' => goto3_at1_with3(lex), | ||
_ => _error(lex), | ||
} | ||
} | ||
goto4(lex) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
#[derive(Logos, Debug, Clone, Copy, PartialEq)] | ||
enum Token { | ||
#[regex("a-z")] | ||
Letter, | ||
#[error] | ||
Error, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
# [derive (Debug , Clone , Copy , PartialEq)] enum Token { Letter , Error , }impl < 's > :: logos :: Logos < 's > for Token { type Extras = () ; type Source = str ; const ERROR : Self = Token :: Error ; fn lex (lex : & mut :: logos :: Lexer < 's , Self >) { use :: logos :: internal :: { LexerInternal , CallbackResult } ; type Lexer < 's > = :: logos :: Lexer < 's , Token > ; fn _end < 's > (lex : & mut Lexer < 's >) { lex . end () } fn _error < 's > (lex : & mut Lexer < 's >) { lex . bump_unchecked (1) ; lex . error () ; } macro_rules ! _fast_loop { ($ lex : ident , $ test : ident , $ miss : expr) => { while let Some (arr) = $ lex . read :: < & [u8 ; 16] > () { if $ test (arr [0]) { if $ test (arr [1]) { if $ test (arr [2]) { if $ test (arr [3]) { if $ test (arr [4]) { if $ test (arr [5]) { if $ test (arr [6]) { if $ test (arr [7]) { if $ test (arr [8]) { if $ test (arr [9]) { if $ test (arr [10]) { if $ test (arr [11]) { if $ test (arr [12]) { if $ test (arr [13]) { if $ test (arr [14]) { if $ test (arr [15]) { $ lex . bump_unchecked (16) ; continue ; } $ lex . bump_unchecked (15) ; return $ miss ; } $ lex . bump_unchecked (14) ; return $ miss ; } $ lex . bump_unchecked (13) ; return $ miss ; } $ lex . bump_unchecked (12) ; return $ miss ; } $ lex . bump_unchecked (11) ; return $ miss ; } $ lex . bump_unchecked (10) ; return $ miss ; } $ lex . bump_unchecked (9) ; return $ miss ; } $ lex . bump_unchecked (8) ; return $ miss ; } $ lex . bump_unchecked (7) ; return $ miss ; } $ lex . bump_unchecked (6) ; return $ miss ; } $ lex . bump_unchecked (5) ; return $ miss ; } $ lex . bump_unchecked (4) ; return $ miss ; } $ lex . bump_unchecked (3) ; return $ miss ; } $ lex . bump_unchecked (2) ; return $ miss ; } $ lex . bump_unchecked (1) ; return $ miss ; } return $ miss ; } while $ lex . test ($ test) { $ lex . bump_unchecked (1) ; } $ miss } ; } # [inline] fn goto1_x < 's > (lex : & mut Lexer < 's >) { lex . set (Token :: Letter) ; } # [inline] fn goto3_at1_with3 < 's > (lex : & mut Lexer < 's >) { match lex . read_at :: < & [u8 ; 2usize] > (1usize) { Some (b"-z") => { lex . bump_unchecked (3usize) ; goto1_x (lex) } , _ => _error (lex) , } } # [inline] fn goto4 < 's > (lex : & mut Lexer < 's >) { let arr = match lex . read :: < & [u8 ; 3usize] > () { Some (arr) => arr , None => return _end (lex) , } ; match arr [0] { b'a' => goto3_at1_with3 (lex) , _ => _error (lex) , } } goto4 (lex) } } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
use std::path::Path; | ||
|
||
use assert_cmd::Command; | ||
use assert_fs::{assert::PathAssert, fixture::FileWriteStr, NamedTempFile}; | ||
use predicates::prelude::*; | ||
|
||
const INPUT_FILE: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/tests/data/input.rs"); | ||
const OUTPUT_FILE: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/tests/data/output.rs"); | ||
const FMT_OUTPUT_FILE: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/tests/data/fmt_output.rs"); | ||
|
||
#[test] | ||
fn test_codegen() { | ||
let tempfile = NamedTempFile::new("output.gen.rs").unwrap(); | ||
|
||
let mut cmd = Command::cargo_bin("logos-cli").unwrap(); | ||
cmd.arg(INPUT_FILE) | ||
.arg("--output") | ||
.arg(tempfile.path()) | ||
.assert() | ||
.success(); | ||
|
||
tempfile.assert(normalize_newlines(OUTPUT_FILE)); | ||
} | ||
|
||
#[test] | ||
fn test_codegen_check() { | ||
Command::cargo_bin("logos-cli") | ||
.unwrap() | ||
.arg(INPUT_FILE) | ||
.arg("--check") | ||
.arg("--output") | ||
.arg(OUTPUT_FILE) | ||
.assert() | ||
.success(); | ||
} | ||
|
||
#[test] | ||
fn test_codegen_check_format() { | ||
Command::cargo_bin("logos-cli") | ||
.unwrap() | ||
.arg(INPUT_FILE) | ||
.arg("--format") | ||
.arg("--check") | ||
.arg("--output") | ||
.arg(FMT_OUTPUT_FILE) | ||
.assert() | ||
.success(); | ||
} | ||
|
||
#[test] | ||
fn test_codegen_fail_check() { | ||
let tempfile = NamedTempFile::new("output.gen.rs").unwrap(); | ||
|
||
tempfile.write_str("some random data").unwrap(); | ||
|
||
Command::cargo_bin("logos-cli") | ||
.unwrap() | ||
.arg(INPUT_FILE) | ||
.arg("--check") | ||
.arg("--output") | ||
.arg(tempfile.path()) | ||
.assert() | ||
.failure(); | ||
} | ||
|
||
#[test] | ||
fn test_codegen_format() { | ||
let tempfile = NamedTempFile::new("output.gen.rs").unwrap(); | ||
|
||
let mut cmd = Command::cargo_bin("logos-cli").unwrap(); | ||
cmd.arg(INPUT_FILE) | ||
.arg("--format") | ||
.arg("--output") | ||
.arg(tempfile.path()) | ||
.assert() | ||
.success(); | ||
|
||
tempfile.assert(normalize_newlines(FMT_OUTPUT_FILE)); | ||
} | ||
|
||
fn normalize_newlines(s: impl AsRef<Path>) -> impl Predicate<str> { | ||
predicates::str::diff(fs_err::read_to_string(s).unwrap().replace("\r\n", "\n")).normalize() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
[package] | ||
name = "logos-codegen" | ||
version = "0.12.0" | ||
authors = ["Maciej Hirsz <hello@maciej.codes>"] | ||
license = "MIT OR Apache-2.0" | ||
description = "Implementation details for logos-codegen and logos-derive. Not for public consumption." | ||
repository = "https://github.com/maciejhirsz/logos" | ||
documentation = "https://docs.rs/logos-derive" | ||
keywords = ["lexer", "lexical", "tokenizer", "parser", "no_std"] | ||
categories = ["parsing", "text-processing"] | ||
readme = "../README.md" | ||
edition = "2018" | ||
|
||
[dependencies] | ||
beef = "0.5.0" | ||
fnv = "1.0.6" | ||
syn = { version = "1.0.17", features = ["full"] } | ||
quote = "1.0.3" | ||
proc-macro2 = "1.0.9" | ||
regex-syntax = "0.6" | ||
|
||
[dev-dependencies] | ||
pretty_assertions = "0.6.1" |
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Oops, something went wrong.