Skip to content

Commit

Permalink
Merge pull request #404 from epage/limit
Browse files Browse the repository at this point in the history
fix(parser): Limit recursion to block stackoverflow
  • Loading branch information
epage committed Dec 27, 2022
2 parents f284001 + 1ad4147 commit 0c71a0f
Show file tree
Hide file tree
Showing 9 changed files with 189 additions and 82 deletions.
6 changes: 6 additions & 0 deletions crates/toml_edit/Cargo.toml
Expand Up @@ -39,6 +39,12 @@ default = []
easy = ["serde"]
perf = ["dep:kstring"]
serde = ["dep:serde", "toml_datetime/serde"]
# Provide a method disable_recursion_limit to parse arbitrarily deep structures
# without any consideration for overflowing the stack. Additionally you will
# need to be careful around other recursive operations on the parsed result
# which may overflow the stack after deserialization has completed, including,
# but not limited to, Display and Debug and Drop impls.
unbounded = []

[dependencies]
indexmap = "1.9.1"
Expand Down
83 changes: 48 additions & 35 deletions crates/toml_edit/src/parser/array.rs
Expand Up @@ -12,15 +12,19 @@ use crate::parser::prelude::*;
// ;; Array

// array = array-open array-values array-close
pub(crate) fn array(input: Input<'_>) -> IResult<Input<'_>, Array, ParserError<'_>> {
delimited(
ARRAY_OPEN,
cut(array_values),
cut(ARRAY_CLOSE)
.context(Context::Expression("array"))
.context(Context::Expected(ParserValue::CharLiteral(']'))),
)
.parse(input)
pub(crate) fn array(
check: RecursionCheck,
) -> impl FnMut(Input<'_>) -> IResult<Input<'_>, Array, ParserError<'_>> {
move |input| {
delimited(
ARRAY_OPEN,
cut(array_values(check)),
cut(ARRAY_CLOSE)
.context(Context::Expression("array"))
.context(Context::Expected(ParserValue::CharLiteral(']'))),
)
.parse(input)
}
}

// note: we're omitting ws and newlines here, because
Expand All @@ -35,36 +39,45 @@ const ARRAY_SEP: u8 = b',';
// note: this rule is modified
// array-values = [ ( array-value array-sep array-values ) /
// array-value / ws-comment-newline ]
pub(crate) fn array_values(input: Input<'_>) -> IResult<Input<'_>, Array, ParserError<'_>> {
(
opt(
(separated_list1(ARRAY_SEP, array_value), opt(ARRAY_SEP)).map(
|(v, trailing): (Vec<Value>, Option<u8>)| {
pub(crate) fn array_values(
check: RecursionCheck,
) -> impl FnMut(Input<'_>) -> IResult<Input<'_>, Array, ParserError<'_>> {
move |input| {
let check = check.recursing(input)?;
(
opt((
separated_list1(ARRAY_SEP, array_value(check)),
opt(ARRAY_SEP),
)
.map(|(v, trailing): (Vec<Value>, Option<u8>)| {
(
Array::with_vec(v.into_iter().map(Item::Value).collect()),
trailing.is_some(),
)
},
),
),
ws_comment_newline,
)
.map_res::<_, _, std::str::Utf8Error>(|(array, trailing)| {
let (mut array, comma) = array.unwrap_or_default();
array.set_trailing_comma(comma);
array.set_trailing(std::str::from_utf8(trailing)?);
Ok(array)
})
.parse(input)
})),
ws_comment_newline,
)
.map_res::<_, _, std::str::Utf8Error>(|(array, trailing)| {
let (mut array, comma) = array.unwrap_or_default();
array.set_trailing_comma(comma);
array.set_trailing(std::str::from_utf8(trailing)?);
Ok(array)
})
.parse(input)
}
}

pub(crate) fn array_value(input: Input<'_>) -> IResult<Input<'_>, Value, ParserError<'_>> {
(ws_comment_newline, value, ws_comment_newline)
.map_res::<_, _, std::str::Utf8Error>(|(ws1, v, ws2)| {
let v = v.decorated(std::str::from_utf8(ws1)?, std::str::from_utf8(ws2)?);
Ok(v)
})
.parse(input)
pub(crate) fn array_value(
check: RecursionCheck,
) -> impl FnMut(Input<'_>) -> IResult<Input<'_>, Value, ParserError<'_>> {
move |input| {
(ws_comment_newline, value(check), ws_comment_newline)
.map_res::<_, _, std::str::Utf8Error>(|(ws1, v, ws2)| {
let v = v.decorated(std::str::from_utf8(ws1)?, std::str::from_utf8(ws2)?);
Ok(v)
})
.parse(input)
}
}

#[cfg(test)]
Expand Down Expand Up @@ -109,13 +122,13 @@ mod test {
r#"[ { x = 1, a = "2" }, {a = "a",b = "b", c = "c"} ]"#,
];
for input in inputs {
let parsed = array.parse(input.as_bytes()).finish();
let parsed = array(Default::default()).parse(input.as_bytes()).finish();
assert_eq!(parsed.map(|a| a.to_string()), Ok(input.to_owned()));
}

let invalid_inputs = [r#"["#, r#"[,]"#, r#"[,2]"#, r#"[1e165,,]"#];
for input in invalid_inputs {
let parsed = array.parse(input.as_bytes()).finish();
let parsed = array(Default::default()).parse(input.as_bytes()).finish();
assert!(parsed.is_err());
}
}
Expand Down
2 changes: 1 addition & 1 deletion crates/toml_edit/src/parser/document.rs
Expand Up @@ -111,7 +111,7 @@ pub(crate) fn parse_keyval(
.context(Context::Expected(ParserValue::CharLiteral('='))),
(
ws,
value,
value(RecursionCheck::default()),
line_trailing
.context(Context::Expected(ParserValue::CharLiteral('\n')))
.context(Context::Expected(ParserValue::CharLiteral('#'))),
Expand Down
3 changes: 3 additions & 0 deletions crates/toml_edit/src/parser/errors.rs
Expand Up @@ -344,6 +344,8 @@ pub(crate) enum CustomError {
actual: &'static str,
},
OutOfRange,
#[cfg_attr(feature = "unbounded", allow(dead_code))]
RecursionLimitExceeded,
}

impl CustomError {
Expand Down Expand Up @@ -394,6 +396,7 @@ impl Display for CustomError {
)
}
CustomError::OutOfRange => writeln!(f, "Value is out of range"),
CustomError::RecursionLimitExceeded => writeln!(f, "Recursion limit exceded"),
}
}
}
92 changes: 54 additions & 38 deletions crates/toml_edit/src/parser/inline_table.rs
Expand Up @@ -17,15 +17,19 @@ use indexmap::map::Entry;
// ;; Inline Table

// inline-table = inline-table-open inline-table-keyvals inline-table-close
pub(crate) fn inline_table(input: Input<'_>) -> IResult<Input<'_>, InlineTable, ParserError<'_>> {
delimited(
INLINE_TABLE_OPEN,
cut(inline_table_keyvals.map_res(|(kv, p)| table_from_pairs(kv, p))),
cut(INLINE_TABLE_CLOSE)
.context(Context::Expression("inline table"))
.context(Context::Expected(ParserValue::CharLiteral('}'))),
)
.parse(input)
pub(crate) fn inline_table(
check: RecursionCheck,
) -> impl FnMut(Input<'_>) -> IResult<Input<'_>, InlineTable, ParserError<'_>> {
move |input| {
delimited(
INLINE_TABLE_OPEN,
cut(inline_table_keyvals(check).map_res(|(kv, p)| table_from_pairs(kv, p))),
cut(INLINE_TABLE_CLOSE)
.context(Context::Expression("inline table"))
.context(Context::Expected(ParserValue::CharLiteral('}'))),
)
.parse(input)
}
}

fn table_from_pairs(
Expand Down Expand Up @@ -93,36 +97,44 @@ pub(crate) const KEYVAL_SEP: u8 = b'=';
// ( key keyval-sep val )

fn inline_table_keyvals(
input: Input<'_>,
) -> IResult<Input<'_>, (Vec<(Vec<Key>, TableKeyValue)>, &str), ParserError<'_>> {
(separated_list0(INLINE_TABLE_SEP, keyval), ws).parse(input)
check: RecursionCheck,
) -> impl FnMut(Input<'_>) -> IResult<Input<'_>, (Vec<(Vec<Key>, TableKeyValue)>, &str), ParserError<'_>>
{
move |input| {
let check = check.recursing(input)?;
(separated_list0(INLINE_TABLE_SEP, keyval(check)), ws).parse(input)
}
}

fn keyval(input: Input<'_>) -> IResult<Input<'_>, (Vec<Key>, TableKeyValue), ParserError<'_>> {
(
key,
cut((
one_of(KEYVAL_SEP)
.context(Context::Expected(ParserValue::CharLiteral('.')))
.context(Context::Expected(ParserValue::CharLiteral('='))),
(ws, value, ws),
)),
)
.map(|(key, (_, v))| {
let mut path = key;
let key = path.pop().expect("grammar ensures at least 1");
fn keyval(
check: RecursionCheck,
) -> impl FnMut(Input<'_>) -> IResult<Input<'_>, (Vec<Key>, TableKeyValue), ParserError<'_>> {
move |input| {
(
key,
cut((
one_of(KEYVAL_SEP)
.context(Context::Expected(ParserValue::CharLiteral('.')))
.context(Context::Expected(ParserValue::CharLiteral('='))),
(ws, value(check), ws),
)),
)
.map(|(key, (_, v))| {
let mut path = key;
let key = path.pop().expect("grammar ensures at least 1");

let (pre, v, suf) = v;
let v = v.decorated(pre, suf);
(
path,
TableKeyValue {
key,
value: Item::Value(v),
},
)
})
.parse(input)
let (pre, v, suf) = v;
let v = v.decorated(pre, suf);
(
path,
TableKeyValue {
key,
value: Item::Value(v),
},
)
})
.parse(input)
}
}

#[cfg(test)]
Expand All @@ -139,12 +151,16 @@ mod test {
r#"{ hello.world = "a" }"#,
];
for input in inputs {
let parsed = inline_table.parse(input.as_bytes()).finish();
let parsed = inline_table(Default::default())
.parse(input.as_bytes())
.finish();
assert_eq!(parsed.map(|a| a.to_string()), Ok(input.to_owned()));
}
let invalid_inputs = [r#"{a = 1e165"#, r#"{ hello = "world", a = 2, hello = 1}"#];
for input in invalid_inputs {
let parsed = inline_table.parse(input.as_bytes()).finish();
let parsed = inline_table(Default::default())
.parse(input.as_bytes())
.finish();
assert!(parsed.is_err());
}
}
Expand Down
43 changes: 42 additions & 1 deletion crates/toml_edit/src/parser/mod.rs
Expand Up @@ -18,7 +18,7 @@ pub(crate) mod value;

pub use errors::TomlError;

mod prelude {
pub(crate) mod prelude {
pub(crate) use super::errors::Context;
pub(crate) use super::errors::ParserError;
pub(crate) use super::errors::ParserValue;
Expand Down Expand Up @@ -61,4 +61,45 @@ mod prelude {
}
}
}

#[cfg(not(feature = "unbounded"))]
#[derive(Copy, Clone, Debug, Default)]
pub(crate) struct RecursionCheck {
current: usize,
}

#[cfg(not(feature = "unbounded"))]
impl RecursionCheck {
pub(crate) fn recursing(
mut self,
input: Input<'_>,
) -> Result<Self, nom8::Err<ParserError<'_>>> {
self.current += 1;
if self.current < 128 {
Ok(self)
} else {
Err(nom8::Err::Error(
nom8::error::FromExternalError::from_external_error(
input,
nom8::error::ErrorKind::Eof,
super::errors::CustomError::RecursionLimitExceeded,
),
))
}
}
}

#[cfg(feature = "unbounded")]
#[derive(Copy, Clone, Debug, Default)]
pub(crate) struct RecursionCheck {}

#[cfg(feature = "unbounded")]
impl RecursionCheck {
pub(crate) fn recursing(
self,
_input: Input<'_>,
) -> Result<Self, nom8::Err<ParserError<'_>>> {
Ok(self)
}
}
}
14 changes: 9 additions & 5 deletions crates/toml_edit/src/parser/value.rs
Expand Up @@ -15,16 +15,19 @@ use crate::value as v;
use crate::Value;

// val = string / boolean / array / inline-table / date-time / float / integer
pub(crate) fn value(input: Input<'_>) -> IResult<Input<'_>, v::Value, ParserError<'_>> {
dispatch!{peek(any);
pub(crate) fn value(
check: RecursionCheck,
) -> impl FnMut(Input<'_>) -> IResult<Input<'_>, v::Value, ParserError<'_>> {
move |input| {
dispatch!{peek(any);
crate::parser::strings::QUOTATION_MARK |
crate::parser::strings::APOSTROPHE => string.map(|s| {
v::Value::String(Formatted::new(
s.into_owned()
))
}),
crate::parser::array::ARRAY_OPEN => array.map(v::Value::Array),
crate::parser::inline_table::INLINE_TABLE_OPEN => inline_table.map(v::Value::InlineTable),
crate::parser::array::ARRAY_OPEN => array(check).map(v::Value::Array),
crate::parser::inline_table::INLINE_TABLE_OPEN => inline_table(check).map(v::Value::InlineTable),
// Date/number starts
b'+' | b'-' | b'0'..=b'9' => {
// Uncommon enough not to be worth optimizing at this time
Expand Down Expand Up @@ -83,6 +86,7 @@ pub(crate) fn value(input: Input<'_>) -> IResult<Input<'_>, v::Value, ParserErro
.with_recognized()
.map_res(|(value, raw)| apply_raw(value, raw))
.parse(input)
}
}

fn apply_raw(mut val: Value, raw: &[u8]) -> Result<Value, std::str::Utf8Error> {
Expand Down Expand Up @@ -137,7 +141,7 @@ trimmed in raw strings.
r#"[ { x = 1, a = "2" }, {a = "a",b = "b", c = "c"} ]"#,
];
for input in inputs {
let parsed = value.parse(input.as_bytes()).finish();
let parsed = value(Default::default()).parse(input.as_bytes()).finish();
assert_eq!(parsed.map(|a| a.to_string()), Ok(input.to_owned()));
}
}
Expand Down
7 changes: 5 additions & 2 deletions crates/toml_edit/src/value.rs
Expand Up @@ -211,10 +211,13 @@ impl FromStr for Value {

/// Parses a value from a &str
fn from_str(s: &str) -> Result<Self, Self::Err> {
use nom8::prelude::*;
use crate::parser::prelude::*;
use nom8::FinishIResult;

let b = s.as_bytes();
let parsed = parser::value::value.parse(b).finish();
let parsed = parser::value::value(RecursionCheck::default())
.parse(b)
.finish();
match parsed {
Ok(mut value) => {
// Only take the repr and not decor, as its probably not intended
Expand Down

0 comments on commit 0c71a0f

Please sign in to comment.