From 6188ddf4fbc2908789c974082d512e5070bfe2f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrick=20F=C3=B6rster?= Date: Sat, 29 Apr 2023 10:17:19 +0200 Subject: [PATCH] Fix parsing \verb command See #828. --- CHANGELOG.md | 1 + crates/parser/src/latex.rs | 40 ++++++++++++++++--- crates/parser/src/latex/lexer.rs | 1 + crates/parser/src/latex/lexer/commands.rs | 1 + crates/parser/src/latex/lexer/types.rs | 6 ++- ...er__latex__tests__parse@issue_828.txt.snap | 22 ++++++++++ .../parser/src/test_data/latex/issue_828.txt | 1 + 7 files changed, 66 insertions(+), 6 deletions(-) create mode 100644 crates/parser/src/snapshots/parser__latex__tests__parse@issue_828.txt.snap create mode 100644 crates/parser/src/test_data/latex/issue_828.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index e478763a..2dd0e411 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fix spurious completion results when completing environments ([#883](https://github.com/latex-lsp/texlab/issues/883)) - Fix regression when guessing cursor position after formatting ([#880](https://github.com/latex-lsp/texlab/issues/880)) +- Fix parsing `\verb` command ([#828](https://github.com/latex-lsp/texlab/issues/828)) ## [5.5.0] - 2023-04-16 diff --git a/crates/parser/src/latex.rs b/crates/parser/src/latex.rs index f3e978a9..c42031dc 100644 --- a/crates/parser/src/latex.rs +++ b/crates/parser/src/latex.rs @@ -44,6 +44,11 @@ impl<'a> Parser<'a> { self.builder.token(kind.into(), text); } + fn eat_remap(&mut self, kind: SyntaxKind) { + let (_, text) = self.lexer.eat().unwrap(); + self.builder.token(kind.into(), text); + } + fn peek(&self) -> Option { self.lexer.peek() } @@ -98,7 +103,7 @@ impl<'a> Parser<'a> { self.eat(); self.builder.finish_node(); } - Token::Word | Token::Comma => self.text(context), + Token::Pipe | Token::Word | Token::Comma => self.text(context), Token::Eq => self.eat(), Token::Dollar => self.formula(), Token::CommandName(name) => match name { @@ -141,6 +146,7 @@ impl<'a> Parser<'a> { CommandName::EnvironmentDefinition => self.environment_definition(), CommandName::BeginBlockComment => self.block_comment(), CommandName::EndBlockComment => self.generic_command(), + CommandName::VerbatimBlock => self.verbatim_block(), CommandName::GraphicsPath => self.graphics_path(), }, } @@ -158,6 +164,7 @@ impl<'a> Parser<'a> { | Token::Whitespace | Token::LineComment | Token::Word + | Token::Pipe | Token::Comma ) && (context.allow_comma || kind != Token::Comma) }) @@ -219,7 +226,7 @@ impl<'a> Parser<'a> { self.eat(); self.trivia(); match self.peek() { - Some(Token::Word) => { + Some(Token::Word | Token::Pipe) => { self.key(); } Some(Token::CommandName(_)) => { @@ -244,6 +251,7 @@ impl<'a> Parser<'a> { | Token::Whitespace | Token::LineComment | Token::Word + | Token::Pipe | Token::Comma ) }) @@ -298,7 +306,7 @@ impl<'a> Parser<'a> { self.eat(); self.trivia(); match self.peek() { - Some(Token::Word) => { + Some(Token::Word | Token::Pipe) => { self.key(); } Some(_) | None => {} @@ -334,7 +342,12 @@ impl<'a> Parser<'a> { self.eat(); while self .peek() - .filter(|&kind| matches!(kind, Token::Whitespace | Token::LineComment | Token::Word)) + .filter(|&kind| { + matches!( + kind, + Token::Whitespace | Token::LineComment | Token::Word | Token::Pipe + ) + }) .is_some() { self.eat(); @@ -390,7 +403,7 @@ impl<'a> Parser<'a> { while let Some(kind) = self.peek() { match kind { Token::LineBreak | Token::Whitespace | Token::LineComment => self.eat(), - Token::Word => { + Token::Word | Token::Pipe => { self.key_value_pair(); if self.peek() == Some(Token::Comma) { self.eat(); @@ -1116,6 +1129,23 @@ impl<'a> Parser<'a> { self.builder.finish_node(); } + + fn verbatim_block(&mut self) { + self.builder.start_node(GENERIC_COMMAND.into()); + self.eat(); + self.builder.finish_node(); + self.trivia(); + + if self.peek() == Some(Token::Pipe) { + self.eat_remap(SyntaxKind::VERBATIM); + while let Some(kind) = self.peek() { + self.eat_remap(SyntaxKind::VERBATIM); + if kind == Token::Pipe { + break; + } + } + } + } } pub fn parse_latex(text: &str, config: &SyntaxConfig) -> GreenNode { diff --git a/crates/parser/src/latex/lexer.rs b/crates/parser/src/latex/lexer.rs index e53a2ed0..47a6c43b 100644 --- a/crates/parser/src/latex/lexer.rs +++ b/crates/parser/src/latex/lexer.rs @@ -38,6 +38,7 @@ impl<'a> Lexer<'a> { Token::RParen => SyntaxKind::R_PAREN, Token::Comma => SyntaxKind::COMMA, Token::Eq => SyntaxKind::EQUALITY_SIGN, + Token::Pipe => SyntaxKind::WORD, Token::Word => SyntaxKind::WORD, Token::Dollar => SyntaxKind::DOLLAR, Token::CommandName(_) => SyntaxKind::COMMAND_NAME, diff --git a/crates/parser/src/latex/lexer/commands.rs b/crates/parser/src/latex/lexer/commands.rs index 8354fa1b..c5fe48e0 100644 --- a/crates/parser/src/latex/lexer/commands.rs +++ b/crates/parser/src/latex/lexer/commands.rs @@ -80,6 +80,7 @@ pub fn classify(name: &str, config: &SyntaxConfig) -> CommandName { "graphicspath" => CommandName::GraphicsPath, "iffalse" => CommandName::BeginBlockComment, "fi" => CommandName::EndBlockComment, + "verb" => CommandName::VerbatimBlock, _ if config.citation_commands.contains(name) => CommandName::Citation, _ => CommandName::Generic, } diff --git a/crates/parser/src/latex/lexer/types.rs b/crates/parser/src/latex/lexer/types.rs index 4e6c67ab..eea6ea3b 100644 --- a/crates/parser/src/latex/lexer/types.rs +++ b/crates/parser/src/latex/lexer/types.rs @@ -35,7 +35,10 @@ pub enum Token { #[token("=")] Eq, - #[regex(r"[^\s\\%\{\},\$\[\]\(\)=]+")] + #[token("|")] + Pipe, + + #[regex(r"[^\s\\%\{\},\$\[\]\(\)=\|]+")] Word, #[regex(r"\$\$?")] @@ -117,6 +120,7 @@ pub enum CommandName { GraphicsPath, BeginBlockComment, EndBlockComment, + VerbatimBlock, } #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)] diff --git a/crates/parser/src/snapshots/parser__latex__tests__parse@issue_828.txt.snap b/crates/parser/src/snapshots/parser__latex__tests__parse@issue_828.txt.snap new file mode 100644 index 00000000..059b9572 --- /dev/null +++ b/crates/parser/src/snapshots/parser__latex__tests__parse@issue_828.txt.snap @@ -0,0 +1,22 @@ +--- +source: crates/parser/src/latex.rs +expression: root +input_file: crates/parser/src/test_data/latex/issue_828.txt +--- +ROOT@0..51 + PREAMBLE@0..51 + GENERIC_COMMAND@0..5 + COMMAND_NAME@0..5 "\\verb" + VERBATIM@5..6 "|" + VERBATIM@6..17 "" + VERBATIM@17..22 " " + VERBATIM@22..24 "if" + VERBATIM@24..25 "(" + VERBATIM@25..31 "" + VERBATIM@31..32 ")" + VERBATIM@32..33 "{" + VERBATIM@33..43 "else" + VERBATIM@43..44 "{" + VERBATIM@44..50 "" + VERBATIM@50..51 "|" + diff --git a/crates/parser/src/test_data/latex/issue_828.txt b/crates/parser/src/test_data/latex/issue_828.txt new file mode 100644 index 00000000..76f973bd --- /dev/null +++ b/crates/parser/src/test_data/latex/issue_828.txt @@ -0,0 +1 @@ +\verb| if(){else{| \ No newline at end of file