Skip to content

Commit

Permalink
feat(errors): improve parsing errors and fix some bugs (#33)
Browse files Browse the repository at this point in the history
  • Loading branch information
zkat committed Apr 23, 2022
1 parent 16c82f1 commit 8ed6a5c
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 67 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ keywords = ["kdl", "document", "serialization", "config"]
edition = "2021"

[dependencies]
miette = "4.5.0"
miette = "4.6.0"
nom = { version = "7.1.1", default-features = false }
phf = { version = "0.8.0", features = ["macros"] }
thiserror = "1.0.22"
31 changes: 12 additions & 19 deletions src/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ mod test {
use super::*;

#[test]
fn parsing() {
fn parsing() -> miette::Result<()> {
let src = "
// This is the first node
foo 1 2 \"three\" null true bar=\"baz\" {
Expand All @@ -267,12 +267,11 @@ Some random comment
*/
a; b; c;
/-commented \"node\"
another /*foo*/ \"node\" /-1 /*bar*/ null;
final;";
let mut doc: KdlDocument = src.parse().unwrap();
let mut doc: KdlDocument = src.parse()?;

assert_eq!(doc.leading, Some("".into()));
assert_eq!(doc.get_arg("foo"), Some(&1.into()));
Expand Down Expand Up @@ -319,15 +318,17 @@ final;";
assert_eq!(format!("{}", doc), src);

// Programmatic manipulation works.
let mut node: KdlNode = "new\n".parse().unwrap();
let mut node: KdlNode = "new\n".parse()?;
// Manual entry parsing preserves formatting/reprs.
node.push("\"blah\"=0xDEADbeef".parse::<KdlEntry>().unwrap());
node.push("\"blah\"=0xDEADbeef".parse::<KdlEntry>()?);
doc.nodes_mut().push(node);

assert_eq!(
format!("{}", doc),
format!("{}new \"blah\"=0xDEADbeef\n", src)
);

Ok(())
}

#[test]
Expand Down Expand Up @@ -359,19 +360,11 @@ baz
}

#[test]
fn parse_examples() {
include_str!("../examples/kdl-schema.kdl")
.parse::<KdlDocument>()
.expect("parsing failed");
include_str!("../examples/Cargo.kdl")
.parse::<KdlDocument>()
.expect("parsing failed");
include_str!("../examples/ci.kdl")
.parse::<KdlDocument>()
.expect("parsing failed");
// TODO: This one fails?
// include_str!("../examples/nuget.kdl")
// .parse::<KdlDocument>()
// .expect("parsing failed");
fn parse_examples() -> miette::Result<()> {
include_str!("../examples/kdl-schema.kdl").parse::<KdlDocument>()?;
include_str!("../examples/Cargo.kdl").parse::<KdlDocument>()?;
include_str!("../examples/ci.kdl").parse::<KdlDocument>()?;
include_str!("../examples/nuget.kdl").parse::<KdlDocument>()?;
Ok(())
}
}
20 changes: 15 additions & 5 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,25 +12,35 @@ use {

/// An error that occurs when parsing a KDL document.
#[derive(Debug, Diagnostic, Clone, Eq, PartialEq, Error)]
#[error("Error parsing document: {kind}")]
#[diagnostic(code("{kind.code()}"))]
#[error("{kind}")]
pub struct KdlError {
#[source_code]
pub input: String,

/// Offset in chars of the error.
#[label = "here"]
pub offset: usize,

pub kind: KdlErrorKind,
}

/// A type reprenting additional information specific to the type of error being returned.
#[derive(Debug, Clone, Eq, PartialEq, Error)]
#[derive(Debug, Diagnostic, Clone, Eq, PartialEq, Error)]
pub enum KdlErrorKind {
#[error(transparent)]
#[diagnostic(code(kdl::parse_int))]
ParseIntError(ParseIntError),

#[error(transparent)]
#[diagnostic(code(kdl::parse_float))]
ParseFloatError(ParseFloatError),
#[error("Failed to parse `{0}` component.")]

#[error("Expected {0}.")]
#[diagnostic(code(kdl::parse_component))]
Context(&'static str),

#[error("An unspecified error occurred.")]
#[diagnostic(code(kdl::other))]
Other,
}

Expand Down Expand Up @@ -65,7 +75,7 @@ impl<I> ParseError<I> for KdlParseError<I> {

impl<I> ContextError<I> for KdlParseError<I> {
fn add_context(_input: I, ctx: &'static str, mut other: Self) -> Self {
other.context = Some(ctx);
other.context = other.context.or(Some(ctx));
other
}
}
Expand Down
127 changes: 85 additions & 42 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ use crate::nom_compat::{many0, many1, many_till};
use nom::branch::alt;
use nom::bytes::complete::{tag, take_until, take_until1, take_while, take_while_m_n};
use nom::character::complete::{anychar, char, none_of, one_of};
use nom::combinator::{eof, map, map_opt, map_res, opt, recognize};
use nom::error::ParseError;
use nom::combinator::{cut, eof, map, map_opt, map_res, opt, recognize};
use nom::error::{context, ParseError};
use nom::sequence::{delimited, preceded, terminated, tuple};
use nom::{IResult, Offset, Parser, Slice};

Expand All @@ -23,14 +23,20 @@ pub(crate) fn document(input: &str) -> IResult<&str, KdlDocument, KdlParseError<

pub(crate) fn node(input: &str) -> IResult<&str, KdlNode, KdlParseError<&str>> {
let (input, leading) = all_whitespace(input)?;
let (input, ty) = opt(annotation)(input)?;
let (input, name) = identifier(input)?;
let (input, entries) = many0(entry)(input)?;
let (input, children) = opt(children)(input)?;
let (input, trailing) = recognize(preceded(
many0(node_space),
terminated(recognize(opt(tag(";"))), opt(alt((linespace, eof)))),
))(input)?;
let (input, ty) = opt(context("valid node type annotation", annotation))(input)?;
let (input, name) = context("valid node name", identifier)(input)?;
let (input, entries) = many0(context("valid node entry", entry))(input)?;
let (input, children) = opt(context("valid node children block", children))(input)?;
let (input, trailing) = context(
"trailing whitespace after node",
cut(recognize(preceded(
many0(node_space),
alt((
terminated(recognize(opt(tag(";"))), alt((linespace, eof))),
alt((newline, single_line_comment, eof)),
)),
))),
)(input)?;
let mut node = KdlNode::new(name);
node.set_leading(leading);
node.set_trailing(trailing);
Expand All @@ -52,7 +58,7 @@ fn identifier(input: &str) -> IResult<&str, KdlIdentifier, KdlParseError<&str>>
fn plain_identifier(input: &str) -> IResult<&str, KdlIdentifier, KdlParseError<&str>> {
let (input, name) = recognize(preceded(
take_while_m_n(1, 1, KdlIdentifier::is_initial_char),
take_while(KdlIdentifier::is_identifier_char),
cut(take_while(KdlIdentifier::is_identifier_char)),
))(input)?;
let mut ident = KdlIdentifier::from(name);
ident.set_repr(name);
Expand All @@ -74,8 +80,8 @@ fn property(input: &str) -> IResult<&str, KdlEntry, KdlParseError<&str>> {
let (input, leading) = recognize(many0(node_space))(input)?;
let (input, ty) = opt(annotation)(input)?;
let (input, name) = identifier(input)?;
let (input, _) = tag("=")(input)?;
let (input, (raw, value)) = value(input)?;
let (input, _) = context("'=' after property name", tag("="))(input)?;
let (input, (raw, value)) = context("property value", cut(value))(input)?;
let mut entry = KdlEntry::new_prop(name, value);
entry.ty = ty;
entry.set_leading(if leading.is_empty() { " " } else { leading });
Expand All @@ -86,7 +92,11 @@ fn property(input: &str) -> IResult<&str, KdlEntry, KdlParseError<&str>> {
fn argument(input: &str) -> IResult<&str, KdlEntry, KdlParseError<&str>> {
let (input, leading) = recognize(many0(node_space))(input)?;
let (input, ty) = opt(annotation)(input)?;
let (input, (raw, value)) = value(input)?;
let (input, (raw, value)) = if ty.is_some() {
context("valid value", cut(value))(input)
} else {
context("valid value", value)(input)
}?;
let mut entry = KdlEntry::new(value);
entry.ty = ty;
entry.set_leading(if leading.is_empty() { " " } else { leading });
Expand All @@ -109,17 +119,17 @@ fn value(input: &str) -> IResult<&str, (String, KdlValue), KdlParseError<&str>>
}

fn children(input: &str) -> IResult<&str, (&str, KdlDocument), KdlParseError<&str>> {
let (input, before) = alt((unicode_space, comment))(input)?;
let (input, before) = recognize(many0(node_space))(input)?;
let (input, _) = tag("{")(input)?;
let (input, children) = document(input)?;
let (input, _) = tag("}")(input)?;
let (input, _) = cut(context("closing '}' in node children block", tag("}")))(input)?;
Ok((input, (before, children)))
}

fn annotation(input: &str) -> IResult<&str, KdlIdentifier, KdlParseError<&str>> {
let (input, _) = tag("(")(input)?;
let (input, ty) = identifier(input)?;
let (input, _) = tag(")")(input)?;
let (input, ty) = cut(identifier)(input)?;
let (input, _) = context("closing ')' for type annotation", cut(tag(")")))(input)?;
Ok((input, ty))
}

Expand All @@ -136,17 +146,26 @@ fn linespace(input: &str) -> IResult<&str, &str, KdlParseError<&str>> {
}

fn node_space(input: &str) -> IResult<&str, &str, KdlParseError<&str>> {
recognize(alt((
delimited(many0(whitespace), escline, many0(whitespace)),
recognize(many1(whitespace)),
node_slashdash,
)))(input)
context(
"node space",
recognize(alt((
delimited(many0(whitespace), escline, many0(whitespace)),
recognize(many1(whitespace)),
node_slashdash,
))),
)(input)
}

fn escline(input: &str) -> IResult<&str, &str, KdlParseError<&str>> {
recognize(preceded(
tag("\\"),
preceded(many0(whitespace), alt((single_line_comment, newline))),
context(
"newline after line escape",
cut(preceded(
many0(whitespace),
alt((single_line_comment, newline)),
)),
),
))(input)
}

Expand Down Expand Up @@ -193,12 +212,21 @@ fn comment(input: &str) -> IResult<&str, &str, KdlParseError<&str>> {

/// `single-line-comment := '//' ('\r' [^\n] | [^\r\n])* (newline | eof)`
fn single_line_comment(input: &str) -> IResult<&str, &str, KdlParseError<&str>> {
recognize(preceded(tag("//"), many_till(anychar, alt((newline, eof)))))(input)
recognize(preceded(
tag("//"),
cut(many_till(
anychar,
context("newline or eof after //", alt((newline, eof))),
)),
))(input)
}

/// `multi-line-comment := '/*' commented-block
fn multi_line_comment(input: &str) -> IResult<&str, &str, KdlParseError<&str>> {
recognize(preceded(tag("/*"), commented_block))(input)
recognize(preceded(
tag("/*"),
context("comment block body", cut(commented_block)),
))(input)
}

/// `commented-block := '*/' | (multi-line-comment | '*' | '/' | [^*/]+) commented-block`
Expand All @@ -215,12 +243,15 @@ fn commented_block(input: &str) -> IResult<&str, &str, KdlParseError<&str>> {
fn node_slashdash(input: &str) -> IResult<&str, &str, KdlParseError<&str>> {
recognize(preceded(
tag("/-"),
alt((recognize(entry), recognize(children))),
context(
"node following a slashdash",
cut(alt((recognize(entry), recognize(children)))),
),
))(input)
}

fn slashdash_comment(input: &str) -> IResult<&str, &str, KdlParseError<&str>> {
recognize(preceded(tag("/-"), node))(input)
recognize(preceded(tag("/-"), cut(node)))(input)
}

fn boolean(input: &str) -> IResult<&str, (String, KdlValue), KdlParseError<&str>> {
Expand All @@ -245,14 +276,14 @@ fn string(input: &str) -> IResult<&str, (String, KdlValue), KdlParseError<&str>>
original.push_str(raw);
value.push(processed);
}
let (input, _) = tag("\"")(input)?;
let (input, _) = cut(tag("\""))(input)?;
original.push('"');
Ok((input, (original, KdlValue::String(value))))
}

/// `character := '\' escape | [^\"]`
fn character(input: &str) -> IResult<&str, (&str, char), KdlParseError<&str>> {
with_raw(alt((preceded(char('\\'), escape), none_of("\\\""))))(input)
with_raw(alt((preceded(char('\\'), cut(escape)), none_of("\\\""))))(input)
}

/// This is like `recognize`, but _also_ returns the actual value.
Expand Down Expand Up @@ -296,7 +327,7 @@ pub(crate) static ESCAPE_CHARS: (phf::Map<char, char>, phf::Map<char, char>) = b
/// `escape := ["\\/bfnrt] | 'u{' hex-digit{1, 6} '}'`
fn escape(input: &str) -> IResult<&str, char, KdlParseError<&str>> {
alt((
delimited(tag("u{"), unicode, char('}')),
delimited(tag("u{"), cut(unicode), char('}')),
map_opt(anychar, |c| ESCAPE_CHARS.0.get(&c).copied()),
))(input)
}
Expand All @@ -320,12 +351,12 @@ fn raw_string(input: &str) -> IResult<&str, (String, KdlValue), KdlParseError<&s
raw.push('r');
let (input, hashes) = recognize(many0(char('#')))(input)?;
raw.push_str(hashes);
let (input, _) = char('"')(input)?;
let (input, _) = cut(char('"'))(input)?;
raw.push('"');
let close = format!("\"{}", hashes);
let (input, value) = take_until(&close[..])(input)?;
raw.push_str(value);
let (input, _) = tag(&close[..])(input)?;
let (input, _) = cut(tag(&close[..]))(input)?;
raw.push_str(&close);
Ok((input, (raw, KdlValue::RawString(value.into()))))
}
Expand All @@ -335,12 +366,12 @@ fn float(input: &str) -> IResult<&str, (String, KdlValue), KdlParseError<&str>>
with_raw(alt((
recognize(tuple((
integer,
opt(preceded(char('.'), integer)),
opt(preceded(char('.'), cut(integer))),
one_of("eE"),
opt(one_of("+-")),
integer,
cut(integer),
))),
recognize(tuple((integer, char('.'), integer))),
recognize(tuple((integer, char('.'), cut(integer)))),
))),
|(raw, x)| {
str::replace(x, "_", "")
Expand Down Expand Up @@ -380,10 +411,13 @@ fn hexadecimal(input: &str) -> IResult<&str, (String, KdlValue), KdlParseError<&
map_res(
with_raw(preceded(
alt((tag("0x"), tag("0X"))),
recognize(many1(terminated(
one_of("0123456789abcdefABCDEF"),
many0(char('_')),
))),
context(
"hexadecimal value",
cut(recognize(many1(terminated(
one_of("0123456789abcdefABCDEF"),
many0(char('_')),
)))),
),
)),
move |(raw_body, hex): (&str, &str)| {
raw.push_str(raw_body);
Expand All @@ -402,7 +436,13 @@ fn octal(input: &str) -> IResult<&str, (String, KdlValue), KdlParseError<&str>>
map_res(
with_raw(preceded(
alt((tag("0o"), tag("0O"))),
recognize(many1(terminated(one_of("01234567"), many0(char('_'))))),
context(
"octal value",
cut(recognize(many1(terminated(
one_of("01234567"),
many0(char('_')),
)))),
),
)),
move |(raw_body, oct): (&str, &str)| {
raw.push_str(raw_body);
Expand All @@ -421,7 +461,10 @@ fn binary(input: &str) -> IResult<&str, (String, KdlValue), KdlParseError<&str>>
map_res(
with_raw(preceded(
alt((tag("0b"), tag("0B"))),
recognize(many1(terminated(one_of("01"), many0(char('_'))))),
context(
"binary value",
cut(recognize(many1(terminated(one_of("01"), many0(char('_')))))),
),
)),
move |(raw_body, binary): (&str, &str)| {
raw.push_str(raw_body);
Expand Down

0 comments on commit 8ed6a5c

Please sign in to comment.