Skip to content

Commit

Permalink
fix(parser): Limit recursion to block stackoverflow
Browse files Browse the repository at this point in the history
Without the macros of the old parser, it was much easier to add new
state for this.  I chose the limit of 128 as that is what serde_json
does.

I didn't both exposing more configuration for this than the `unbounded`
feature.  We can add more later if needed but I doubt that.
Technically, this may break someone but the likelihood is extremely low,
especially with how toml isn't exactly designed for arbitrary depth and
with how recently this release was out.

Fixes #206
  • Loading branch information
epage committed Dec 27, 2022
1 parent f284001 commit 1ad4147
Show file tree
Hide file tree
Showing 9 changed files with 189 additions and 82 deletions.
6 changes: 6 additions & 0 deletions crates/toml_edit/Cargo.toml
Expand Up @@ -39,6 +39,12 @@ default = []
easy = ["serde"]
perf = ["dep:kstring"]
serde = ["dep:serde", "toml_datetime/serde"]
# Provide a method disable_recursion_limit to parse arbitrarily deep structures
# without any consideration for overflowing the stack. Additionally you will
# need to be careful around other recursive operations on the parsed result
# which may overflow the stack after deserialization has completed, including,
# but not limited to, Display and Debug and Drop impls.
unbounded = []

[dependencies]
indexmap = "1.9.1"
Expand Down
83 changes: 48 additions & 35 deletions crates/toml_edit/src/parser/array.rs
Expand Up @@ -12,15 +12,19 @@ use crate::parser::prelude::*;
// ;; Array

// array = array-open array-values array-close
pub(crate) fn array(input: Input<'_>) -> IResult<Input<'_>, Array, ParserError<'_>> {
delimited(
ARRAY_OPEN,
cut(array_values),
cut(ARRAY_CLOSE)
.context(Context::Expression("array"))
.context(Context::Expected(ParserValue::CharLiteral(']'))),
)
.parse(input)
pub(crate) fn array(
check: RecursionCheck,
) -> impl FnMut(Input<'_>) -> IResult<Input<'_>, Array, ParserError<'_>> {
move |input| {
delimited(
ARRAY_OPEN,
cut(array_values(check)),
cut(ARRAY_CLOSE)
.context(Context::Expression("array"))
.context(Context::Expected(ParserValue::CharLiteral(']'))),
)
.parse(input)
}
}

// note: we're omitting ws and newlines here, because
Expand All @@ -35,36 +39,45 @@ const ARRAY_SEP: u8 = b',';
// note: this rule is modified
// array-values = [ ( array-value array-sep array-values ) /
// array-value / ws-comment-newline ]
pub(crate) fn array_values(input: Input<'_>) -> IResult<Input<'_>, Array, ParserError<'_>> {
(
opt(
(separated_list1(ARRAY_SEP, array_value), opt(ARRAY_SEP)).map(
|(v, trailing): (Vec<Value>, Option<u8>)| {
pub(crate) fn array_values(
check: RecursionCheck,
) -> impl FnMut(Input<'_>) -> IResult<Input<'_>, Array, ParserError<'_>> {
move |input| {
let check = check.recursing(input)?;
(
opt((
separated_list1(ARRAY_SEP, array_value(check)),
opt(ARRAY_SEP),
)
.map(|(v, trailing): (Vec<Value>, Option<u8>)| {
(
Array::with_vec(v.into_iter().map(Item::Value).collect()),
trailing.is_some(),
)
},
),
),
ws_comment_newline,
)
.map_res::<_, _, std::str::Utf8Error>(|(array, trailing)| {
let (mut array, comma) = array.unwrap_or_default();
array.set_trailing_comma(comma);
array.set_trailing(std::str::from_utf8(trailing)?);
Ok(array)
})
.parse(input)
})),
ws_comment_newline,
)
.map_res::<_, _, std::str::Utf8Error>(|(array, trailing)| {
let (mut array, comma) = array.unwrap_or_default();
array.set_trailing_comma(comma);
array.set_trailing(std::str::from_utf8(trailing)?);
Ok(array)
})
.parse(input)
}
}

pub(crate) fn array_value(input: Input<'_>) -> IResult<Input<'_>, Value, ParserError<'_>> {
(ws_comment_newline, value, ws_comment_newline)
.map_res::<_, _, std::str::Utf8Error>(|(ws1, v, ws2)| {
let v = v.decorated(std::str::from_utf8(ws1)?, std::str::from_utf8(ws2)?);
Ok(v)
})
.parse(input)
pub(crate) fn array_value(
check: RecursionCheck,
) -> impl FnMut(Input<'_>) -> IResult<Input<'_>, Value, ParserError<'_>> {
move |input| {
(ws_comment_newline, value(check), ws_comment_newline)
.map_res::<_, _, std::str::Utf8Error>(|(ws1, v, ws2)| {
let v = v.decorated(std::str::from_utf8(ws1)?, std::str::from_utf8(ws2)?);
Ok(v)
})
.parse(input)
}
}

#[cfg(test)]
Expand Down Expand Up @@ -109,13 +122,13 @@ mod test {
r#"[ { x = 1, a = "2" }, {a = "a",b = "b", c = "c"} ]"#,
];
for input in inputs {
let parsed = array.parse(input.as_bytes()).finish();
let parsed = array(Default::default()).parse(input.as_bytes()).finish();
assert_eq!(parsed.map(|a| a.to_string()), Ok(input.to_owned()));
}

let invalid_inputs = [r#"["#, r#"[,]"#, r#"[,2]"#, r#"[1e165,,]"#];
for input in invalid_inputs {
let parsed = array.parse(input.as_bytes()).finish();
let parsed = array(Default::default()).parse(input.as_bytes()).finish();
assert!(parsed.is_err());
}
}
Expand Down
2 changes: 1 addition & 1 deletion crates/toml_edit/src/parser/document.rs
Expand Up @@ -111,7 +111,7 @@ pub(crate) fn parse_keyval(
.context(Context::Expected(ParserValue::CharLiteral('='))),
(
ws,
value,
value(RecursionCheck::default()),
line_trailing
.context(Context::Expected(ParserValue::CharLiteral('\n')))
.context(Context::Expected(ParserValue::CharLiteral('#'))),
Expand Down
3 changes: 3 additions & 0 deletions crates/toml_edit/src/parser/errors.rs
Expand Up @@ -344,6 +344,8 @@ pub(crate) enum CustomError {
actual: &'static str,
},
OutOfRange,
#[cfg_attr(feature = "unbounded", allow(dead_code))]
RecursionLimitExceeded,
}

impl CustomError {
Expand Down Expand Up @@ -394,6 +396,7 @@ impl Display for CustomError {
)
}
CustomError::OutOfRange => writeln!(f, "Value is out of range"),
CustomError::RecursionLimitExceeded => writeln!(f, "Recursion limit exceded"),
}
}
}
92 changes: 54 additions & 38 deletions crates/toml_edit/src/parser/inline_table.rs
Expand Up @@ -17,15 +17,19 @@ use indexmap::map::Entry;
// ;; Inline Table

// inline-table = inline-table-open inline-table-keyvals inline-table-close
pub(crate) fn inline_table(input: Input<'_>) -> IResult<Input<'_>, InlineTable, ParserError<'_>> {
delimited(
INLINE_TABLE_OPEN,
cut(inline_table_keyvals.map_res(|(kv, p)| table_from_pairs(kv, p))),
cut(INLINE_TABLE_CLOSE)
.context(Context::Expression("inline table"))
.context(Context::Expected(ParserValue::CharLiteral('}'))),
)
.parse(input)
pub(crate) fn inline_table(
check: RecursionCheck,
) -> impl FnMut(Input<'_>) -> IResult<Input<'_>, InlineTable, ParserError<'_>> {
move |input| {
delimited(
INLINE_TABLE_OPEN,
cut(inline_table_keyvals(check).map_res(|(kv, p)| table_from_pairs(kv, p))),
cut(INLINE_TABLE_CLOSE)
.context(Context::Expression("inline table"))
.context(Context::Expected(ParserValue::CharLiteral('}'))),
)
.parse(input)
}
}

fn table_from_pairs(
Expand Down Expand Up @@ -93,36 +97,44 @@ pub(crate) const KEYVAL_SEP: u8 = b'=';
// ( key keyval-sep val )

fn inline_table_keyvals(
input: Input<'_>,
) -> IResult<Input<'_>, (Vec<(Vec<Key>, TableKeyValue)>, &str), ParserError<'_>> {
(separated_list0(INLINE_TABLE_SEP, keyval), ws).parse(input)
check: RecursionCheck,
) -> impl FnMut(Input<'_>) -> IResult<Input<'_>, (Vec<(Vec<Key>, TableKeyValue)>, &str), ParserError<'_>>
{
move |input| {
let check = check.recursing(input)?;
(separated_list0(INLINE_TABLE_SEP, keyval(check)), ws).parse(input)
}
}

fn keyval(input: Input<'_>) -> IResult<Input<'_>, (Vec<Key>, TableKeyValue), ParserError<'_>> {
(
key,
cut((
one_of(KEYVAL_SEP)
.context(Context::Expected(ParserValue::CharLiteral('.')))
.context(Context::Expected(ParserValue::CharLiteral('='))),
(ws, value, ws),
)),
)
.map(|(key, (_, v))| {
let mut path = key;
let key = path.pop().expect("grammar ensures at least 1");
fn keyval(
check: RecursionCheck,
) -> impl FnMut(Input<'_>) -> IResult<Input<'_>, (Vec<Key>, TableKeyValue), ParserError<'_>> {
move |input| {
(
key,
cut((
one_of(KEYVAL_SEP)
.context(Context::Expected(ParserValue::CharLiteral('.')))
.context(Context::Expected(ParserValue::CharLiteral('='))),
(ws, value(check), ws),
)),
)
.map(|(key, (_, v))| {
let mut path = key;
let key = path.pop().expect("grammar ensures at least 1");

let (pre, v, suf) = v;
let v = v.decorated(pre, suf);
(
path,
TableKeyValue {
key,
value: Item::Value(v),
},
)
})
.parse(input)
let (pre, v, suf) = v;
let v = v.decorated(pre, suf);
(
path,
TableKeyValue {
key,
value: Item::Value(v),
},
)
})
.parse(input)
}
}

#[cfg(test)]
Expand All @@ -139,12 +151,16 @@ mod test {
r#"{ hello.world = "a" }"#,
];
for input in inputs {
let parsed = inline_table.parse(input.as_bytes()).finish();
let parsed = inline_table(Default::default())
.parse(input.as_bytes())
.finish();
assert_eq!(parsed.map(|a| a.to_string()), Ok(input.to_owned()));
}
let invalid_inputs = [r#"{a = 1e165"#, r#"{ hello = "world", a = 2, hello = 1}"#];
for input in invalid_inputs {
let parsed = inline_table.parse(input.as_bytes()).finish();
let parsed = inline_table(Default::default())
.parse(input.as_bytes())
.finish();
assert!(parsed.is_err());
}
}
Expand Down
43 changes: 42 additions & 1 deletion crates/toml_edit/src/parser/mod.rs
Expand Up @@ -18,7 +18,7 @@ pub(crate) mod value;

pub use errors::TomlError;

mod prelude {
pub(crate) mod prelude {
pub(crate) use super::errors::Context;
pub(crate) use super::errors::ParserError;
pub(crate) use super::errors::ParserValue;
Expand Down Expand Up @@ -61,4 +61,45 @@ mod prelude {
}
}
}

#[cfg(not(feature = "unbounded"))]
#[derive(Copy, Clone, Debug, Default)]
pub(crate) struct RecursionCheck {
current: usize,
}

#[cfg(not(feature = "unbounded"))]
impl RecursionCheck {
pub(crate) fn recursing(
mut self,
input: Input<'_>,
) -> Result<Self, nom8::Err<ParserError<'_>>> {
self.current += 1;
if self.current < 128 {
Ok(self)
} else {
Err(nom8::Err::Error(
nom8::error::FromExternalError::from_external_error(
input,
nom8::error::ErrorKind::Eof,
super::errors::CustomError::RecursionLimitExceeded,
),
))
}
}
}

#[cfg(feature = "unbounded")]
#[derive(Copy, Clone, Debug, Default)]
pub(crate) struct RecursionCheck {}

#[cfg(feature = "unbounded")]
impl RecursionCheck {
pub(crate) fn recursing(
self,
_input: Input<'_>,
) -> Result<Self, nom8::Err<ParserError<'_>>> {
Ok(self)
}
}
}
14 changes: 9 additions & 5 deletions crates/toml_edit/src/parser/value.rs
Expand Up @@ -15,16 +15,19 @@ use crate::value as v;
use crate::Value;

// val = string / boolean / array / inline-table / date-time / float / integer
pub(crate) fn value(input: Input<'_>) -> IResult<Input<'_>, v::Value, ParserError<'_>> {
dispatch!{peek(any);
pub(crate) fn value(
check: RecursionCheck,
) -> impl FnMut(Input<'_>) -> IResult<Input<'_>, v::Value, ParserError<'_>> {
move |input| {
dispatch!{peek(any);
crate::parser::strings::QUOTATION_MARK |
crate::parser::strings::APOSTROPHE => string.map(|s| {
v::Value::String(Formatted::new(
s.into_owned()
))
}),
crate::parser::array::ARRAY_OPEN => array.map(v::Value::Array),
crate::parser::inline_table::INLINE_TABLE_OPEN => inline_table.map(v::Value::InlineTable),
crate::parser::array::ARRAY_OPEN => array(check).map(v::Value::Array),
crate::parser::inline_table::INLINE_TABLE_OPEN => inline_table(check).map(v::Value::InlineTable),
// Date/number starts
b'+' | b'-' | b'0'..=b'9' => {
// Uncommon enough not to be worth optimizing at this time
Expand Down Expand Up @@ -83,6 +86,7 @@ pub(crate) fn value(input: Input<'_>) -> IResult<Input<'_>, v::Value, ParserErro
.with_recognized()
.map_res(|(value, raw)| apply_raw(value, raw))
.parse(input)
}
}

fn apply_raw(mut val: Value, raw: &[u8]) -> Result<Value, std::str::Utf8Error> {
Expand Down Expand Up @@ -137,7 +141,7 @@ trimmed in raw strings.
r#"[ { x = 1, a = "2" }, {a = "a",b = "b", c = "c"} ]"#,
];
for input in inputs {
let parsed = value.parse(input.as_bytes()).finish();
let parsed = value(Default::default()).parse(input.as_bytes()).finish();
assert_eq!(parsed.map(|a| a.to_string()), Ok(input.to_owned()));
}
}
Expand Down
7 changes: 5 additions & 2 deletions crates/toml_edit/src/value.rs
Expand Up @@ -211,10 +211,13 @@ impl FromStr for Value {

/// Parses a value from a &str
fn from_str(s: &str) -> Result<Self, Self::Err> {
use nom8::prelude::*;
use crate::parser::prelude::*;
use nom8::FinishIResult;

let b = s.as_bytes();
let parsed = parser::value::value.parse(b).finish();
let parsed = parser::value::value(RecursionCheck::default())
.parse(b)
.finish();
match parsed {
Ok(mut value) => {
// Only take the repr and not decor, as its probably not intended
Expand Down

0 comments on commit 1ad4147

Please sign in to comment.