diff --git a/LICENSE b/LICENSE index efec544..dfae55f 100644 --- a/LICENSE +++ b/LICENSE @@ -24,13 +24,12 @@ SOFTWARE. This project contains code from the following third-party project, which is licensed under the MIT License: -denoland/std - The original license is as follows: -MIT License +The MIT License (MIT) -Copyright 2018-2022 the Deno authors. +Copyright (c) 2015 Chen Yuheng +Copyright (c) 2023 Ethiraric Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.mbt.md b/README.mbt.md index b599623..e7621c3 100644 --- a/README.mbt.md +++ b/README.mbt.md @@ -1,5 +1,5 @@ # YAML -A comprehensive YAML parsing and stringifying library for MoonBit, supporting YAML 1.2. +A simple YAML parsing and stringifying library for MoonBit, support a simplified YAML subset which can be convert to JSON. This library is ported from yaml-rust2. \ No newline at end of file diff --git a/moon.mod.json b/moon.mod.json index d7ad922..fc04e8b 100644 --- a/moon.mod.json +++ b/moon.mod.json @@ -2,7 +2,7 @@ "name": "myfreess/yaml", "version": "0.1.0", "readme": "README.md", - "repository": "", + "repository": "https://github.com/moonbit-community/yaml.mbt", "license": "MIT", "keywords": [], "description": "", diff --git a/src/error.mbt b/src/error.mbt new file mode 100644 index 0000000..9a80afb --- /dev/null +++ b/src/error.mbt @@ -0,0 +1,6 @@ +///| +pub suberror YamlError { + /// - `mark`: The position at which the error happened in the source. + /// - `info`: Human-readable details about the error. + YamlError(mark~ : Marker, info~ : String) +} derive(Show) diff --git a/src/event.mbt b/src/event.mbt new file mode 100644 index 0000000..d89ff2c --- /dev/null +++ b/src/event.mbt @@ -0,0 +1,92 @@ +///| +pub enum Event { + /// Event generated at the very beginning of parsing. + StreamStart + /// Last event that will be generated by the parser. Signals EOF. + StreamEnd + /// The YAML start document directive (`---`). + DocumentStart + /// The YAML end document directive (`...`). + DocumentEnd + /// A YAML Alias. + /// - id : The anchor ID the alias refers to. + Alias(id~ : Int) + /// Value, style, anchor id, tag + Scalar(value~ : String, style~ : TScalarStyle, id~ : Int, tag~ : Tag?) + /// The start of a YAML sequence (array). + /// - id : The anchor ID of the start of the sequence. + /// - tag : An optional tag + SequenceStart(id~ : Int, tag~ : Tag?) + /// The end of a YAML sequence (array). + SequenceEnd + /// The start of a YAML mapping (object, hash). + /// - id : The anchor ID of the start of the mapping. + /// - tag : An optional tag + MappingStart(id~ : Int, tag~ : Tag?) + /// The end of a YAML mapping (object, hash). + MappingEnd +} derive(Eq, Show) + +///| +fn Event::empty_scalar() -> Event { + Event::Scalar(value="", style=TScalarStyle::Plain, id=0, tag=None) +} + +///| +fn Event::empty_scalar_with_anchor(anchor : Int, tag : Tag?) -> Event { + Event::Scalar(value="", style=TScalarStyle::Plain, id=anchor, tag~) +} + +///| +/// Trait to be implemented in order to use the low-level parsing API. +/// +/// The low-level parsing API is event-based (a push parser), calling [`EventReceiver::on_event`] +/// for each YAML [`Event`] that occurs. +/// The [`EventReceiver`] trait only receives events. In order to receive both events and their +/// location in the source, use [`MarkedEventReceiver`]. Note that [`EventReceiver`]s implement +/// [`MarkedEventReceiver`] automatically. +/// +/// # Event hierarchy +/// The event stream starts with an [`Event::StreamStart`] event followed by an +/// [`Event::DocumentStart`] event. If the YAML document starts with a mapping (an object), an +/// [`Event::MappingStart`] event is emitted. If it starts with a sequence (an array), an +/// [`Event::SequenceStart`] event is emitted. Otherwise, an [`Event::Scalar`] event is emitted. +/// +/// In a mapping, key-values are sent as consecutive events. The first event after an +/// [`Event::MappingStart`] will be the key, and following its value. If the mapping contains no +/// sub-mapping or sub-sequence, then even events (starting from 0) will always be keys and odd +/// ones will always be values. The mapping ends when an [`Event::MappingEnd`] event is received. +/// +/// In a sequence, values are sent consecutively until the [`Event::SequenceEnd`] event. +/// +/// If a value is a sub-mapping or a sub-sequence, an [`Event::MappingStart`] or +/// [`Event::SequenceStart`] event will be sent respectively. Following events until the associated +/// [`Event::MappingStart`] or [`Event::SequenceEnd`] (beware of nested mappings or sequences) will +/// be part of the value and not another key-value pair or element in the sequence. +/// +/// For instance, the following yaml: +/// ```yaml +/// a: b +/// c: +/// d: e +/// f: +/// - g +/// - h +/// ``` +/// will emit (indented and commented for lisibility): +/// ```text +/// StreamStart, DocumentStart, MappingStart, +/// Scalar("a", ..), Scalar("b", ..) +/// Scalar("c", ..), MappingStart, Scalar("d", ..), Scalar("e", ..), MappingEnd, +/// Scalar("f", ..), SequenceStart, Scalar("g", ..), Scalar("h", ..), SequenceEnd, +/// MappingEnd, DocumentEnd, StreamEnd +/// ``` +pub trait EventReceiver { + /// Handler called for each YAML event that is emitted by the parser. + on_event(Self, event : Event) -> Unit +} + +///| +pub trait MarkedEventReceiver { + on_event(Self, event : Event, _mark : Marker) -> Unit +} diff --git a/src/lexer.mbt b/src/lexer.mbt index 0a8a99d..15aa5a1 100644 --- a/src/lexer.mbt +++ b/src/lexer.mbt @@ -1,36 +1,5 @@ ///| -pub enum TScalarStyle { - /// A YAML plain scalar. - Plain - /// A YAML single quoted scalar. - SingleQuoted - /// A YAML double quoted scalar. - DoubleQuoted - /// A YAML literal block (`|` block). - Literal - /// A YAML folded block (`>` block). - Folded -} derive(Eq, Show) - -///| -pub struct Marker { - /// The index (in chars) in the input string. - index : Int - /// The line (1-indexed). - line : Int - /// The column (1-indexed). - col : Int -} derive(Show) - -///| -pub suberror LexError { - /// - `mark`: The position at which the error happened in the source. - /// - `info`: Human-readable details about the error. - LexError(mark~ : Marker, info~ : String) -} derive(Show) - -///| -pub enum TokenType { +priv enum TokenType { /// The start of the stream. Sent first, before even `TokenType::DocumentStart`. StreamStart /// The end of the stream, EOF. @@ -80,7 +49,7 @@ pub enum TokenType { } derive(Eq, Show) ///| -pub struct Token { +priv struct Token { marker : Marker token_type : TokenType } derive(Show) @@ -120,7 +89,7 @@ pub struct Token { /// /// In the second document however, reaching the EOF would stale the `SimpleKey` and no /// `TokenType::Key` would be emitted by the scanner. -struct SimpleKey { +priv struct SimpleKey { /// Whether the token this `SimpleKey` refers to may still be a key. /// /// Sometimes, when we have more context, we notice that what we thought could be a key no @@ -198,7 +167,7 @@ priv struct Indent { /// This corresponds to the low-level interface when reading YAML, but it also holds sufficient context /// to be able to disambiguate some of the constructs. It has understanding of indentation and whitespace and is able to /// generate error messages for some invalid YAML constructs. -struct Lexer { +priv struct Lexer { mut input : StringView /// The index (in chars) in the input string. mut index : Int @@ -211,7 +180,7 @@ struct Lexer { /// Buffer for the next characters to consume. buffer : @deque.Deque[Char] /// The last error that happened. - mut error : LexError? + mut error : YamlError? /// Whether we have already emitted the `StreamStart` token. mut stream_start_produced : Bool /// Whether we have already emitted the `StreamEnd` token. @@ -267,7 +236,7 @@ struct Lexer { const BUFFER_LEN = 16 ///| -pub fn Lexer::new(input : StringView) -> Lexer { +fn Lexer::new(input : StringView) -> Lexer { Lexer::{ input, index: 0, @@ -294,7 +263,7 @@ pub fn Lexer::new(input : StringView) -> Lexer { ///| /// Get the last error that was encountered, if any. -fn Lexer::get_error(self : Lexer) -> LexError? { +fn Lexer::get_error(self : Lexer) -> YamlError? { self.error } @@ -418,6 +387,12 @@ fn Lexer::stream_ended(self : Lexer) -> Bool { self.stream_end_produced } +///| +/// Return whether the `TokenType::StreamStart` event has been emitted +fn Lexer::stream_started(self : Lexer) -> Bool { + self.stream_start_produced +} + ///| /// Read and consume a line break (either `\r`, `\n` or `\r\n`). /// @@ -425,7 +400,7 @@ fn Lexer::stream_ended(self : Lexer) -> Bool { fn Lexer::read_break(self : Lexer, buf : StringBuilder) -> Unit { let c = self.buffer[0] let nc = self.buffer[1] - guard c.is_break() + // guard c.is_break() if c == '\r' && nc == '\n' { self.skip_blank() } @@ -477,12 +452,12 @@ fn Lexer::disallow_simple_key(self : Lexer) -> Unit { } ///| -pub fn Lexer::next(self : Lexer) -> Token? { +fn Lexer::next(self : Lexer) -> Token? { if self.error is Some(_) { return None } let tok = self.next_token() catch { - LexError(_) as err => { + YamlError(_) as err => { self.error = Some(err) return None } @@ -492,8 +467,8 @@ pub fn Lexer::next(self : Lexer) -> Token? { ///| /// Fetch the next token in the stream. -/// Returns `LexError` when the scanner does not find the next expected token. -pub fn Lexer::fetch_next_token(self : Lexer) -> Unit raise LexError { +/// Returns `YamlError` when the scanner does not find the next expected token. +fn Lexer::fetch_next_token(self : Lexer) -> Unit raise YamlError { self.lookahead(1) if !self.stream_start_produced { self.fetch_stream_start() @@ -529,7 +504,7 @@ pub fn Lexer::fetch_next_token(self : Lexer) -> Unit raise LexError { self.fetch_document_indicator(TokenType::DocumentEnd) ignore(self.skip_ws_to_eol(SkipTabs::Yes)) if !self.char().is_breakz() { - raise LexError::LexError( + raise YamlError::YamlError( mark=self.get_marker(), info="invalid content after document end marker", ) @@ -537,7 +512,10 @@ pub fn Lexer::fetch_next_token(self : Lexer) -> Unit raise LexError { return } if self.col < self.indent { - raise LexError::LexError(mark=self.get_marker(), info="invalid indentation") + raise YamlError::YamlError( + mark=self.get_marker(), + info="invalid indentation", + ) } let c = self.buffer[0] let nc = self.buffer[1] @@ -570,7 +548,7 @@ pub fn Lexer::fetch_next_token(self : Lexer) -> Unit raise LexError { ':' | '?' if nc.is_blank_or_breakz() && self.flow_level == 0 => self.fetch_plain_scalar() '%' | '@' | '`' => - raise LexError::LexError( + raise YamlError::YamlError( mark=self.get_marker(), info="unexpected character: \{c}", ) @@ -594,8 +572,8 @@ fn Lexer::fetch_stream_start(self : Lexer) -> Unit { ///| /// Return the next token in the stream. /// # Errors -/// Returns `LexError` when scanning fails to find an expected next token. -pub fn Lexer::next_token(self : Lexer) -> Token? raise LexError { +/// Returns `YamlError` when scanning fails to find an expected next token. +fn Lexer::next_token(self : Lexer) -> Token? raise YamlError { if self.stream_end_produced { return None } @@ -603,7 +581,7 @@ pub fn Lexer::next_token(self : Lexer) -> Token? raise LexError { self.fetch_more_tokens() } guard self.tokens.pop_front() is Some(t) else { - raise LexError::LexError( + raise YamlError::YamlError( mark=self.get_marker(), info="did not find expected next token", ) @@ -619,8 +597,8 @@ pub fn Lexer::next_token(self : Lexer) -> Token? raise LexError { ///| /// Return the next token in the stream. /// # Errors -/// Returns `LexError` when scanning fails to find an expected next token. -pub fn Lexer::fetch_more_tokens(self : Lexer) -> Unit raise LexError { +/// Returns `YamlError` when scanning fails to find an expected next token. +fn Lexer::fetch_more_tokens(self : Lexer) -> Unit raise YamlError { let mut need_more = false while true { if self.tokens.is_empty() { @@ -646,7 +624,7 @@ pub fn Lexer::fetch_more_tokens(self : Lexer) -> Unit raise LexError { } ///| -fn Lexer::fetch_plain_scalar(self : Lexer) -> Unit raise LexError { +fn Lexer::fetch_plain_scalar(self : Lexer) -> Unit raise YamlError { self.save_simple_key() self.disallow_simple_key() let tok = self.scan_plain_scalar() @@ -658,12 +636,12 @@ fn Lexer::fetch_plain_scalar(self : Lexer) -> Unit raise LexError { /// /// Plain scalars are the most readable but restricted style. They may span multiple lines in /// some contexts. -fn Lexer::scan_plain_scalar(self : Lexer) -> Token raise LexError { +fn Lexer::scan_plain_scalar(self : Lexer) -> Token raise YamlError { self.unroll_non_block_indents() let indent = self.indent + 1 let start_mark = self.get_marker() if self.flow_level > 0 && self.col < indent { - raise LexError::LexError( + raise YamlError::YamlError( mark=start_mark, info="invalid indentation in flow construct", ) @@ -678,7 +656,7 @@ fn Lexer::scan_plain_scalar(self : Lexer) -> Token raise LexError { break } if self.flow_level > 0 && self.char() == '-' && self.buffer[1].is_flow() { - raise LexError::LexError( + raise YamlError::YamlError( mark=self.get_marker(), info="plain scalar cannot start with '-' followed by ,[]{}", ) @@ -740,7 +718,7 @@ fn Lexer::scan_plain_scalar(self : Lexer) -> Token raise LexError { // empty. Skip to the end of the line. ignore(self.skip_ws_to_eol(SkipTabs::Yes)) if !self.char().is_breakz() { - raise LexError::LexError( + raise YamlError::YamlError( mark=start_mark, info="while scanning a plain scalar, found a tab", ) @@ -790,7 +768,10 @@ fn Lexer::next_can_be_plain_scalar(self : Lexer) -> Bool { } ///| -fn Lexer::fetch_flow_scalar(self : Lexer, single : Bool) -> Unit raise LexError { +fn Lexer::fetch_flow_scalar( + self : Lexer, + single : Bool, +) -> Unit raise YamlError { self.save_simple_key() self.disallow_simple_key() let tok = self.scan_flow_scalar(single) @@ -803,7 +784,10 @@ fn Lexer::fetch_flow_scalar(self : Lexer, single : Bool) -> Unit raise LexError } ///| -fn Lexer::scan_flow_scalar(self : Lexer, single : Bool) -> Token raise LexError { +fn Lexer::scan_flow_scalar( + self : Lexer, + single : Bool, +) -> Token raise YamlError { let start_mark = self.get_marker() let buf = StringBuilder::new() let leading_break = StringBuilder::new() @@ -831,19 +815,19 @@ fn Lexer::scan_flow_scalar(self : Lexer, single : Bool) -> Token raise LexError ) ) && self.buffer[3].is_blank_or_breakz() { - raise LexError::LexError( + raise YamlError::YamlError( mark=start_mark, info="while scanning a quoted scalar, found unexpected document indicator", ) } if self.char().is_z() { - raise LexError::LexError( + raise YamlError::YamlError( mark=start_mark, info="while scanning a quoted scalar, found unexpected end of stream", ) } if self.col < self.indent { - raise LexError::LexError( + raise YamlError::YamlError( mark=start_mark, info="invalid indentation in quoted scalar", ) @@ -864,7 +848,7 @@ fn Lexer::scan_flow_scalar(self : Lexer, single : Bool) -> Token raise LexError // Consume a space or a tab character. if leading_blanks.val { if self.char() == '\t' && self.col < self.indent { - raise LexError::LexError( + raise YamlError::YamlError( mark=self.get_marker(), info="tab cannot be used as indentation", ) @@ -925,7 +909,7 @@ fn Lexer::scan_flow_scalar(self : Lexer, single : Bool) -> Token raise LexError // Inside a flow context, this is allowed. ':' if self.flow_level > 0 => () _ => - raise LexError::LexError( + raise YamlError::YamlError( mark=self.get_marker(), info="invalid trailing content after double-quoted scalar", ) @@ -956,7 +940,7 @@ fn Lexer::consume_flow_scalar_non_whitespace_chars( buf : StringBuilder, leading_blanks : Ref[Bool], start_mark : Marker, -) -> Unit raise LexError { +) -> Unit raise YamlError { self.lookahead(2) while !self.char().is_blank_or_breakz() { match self.char() { @@ -998,7 +982,7 @@ fn Lexer::consume_flow_scalar_non_whitespace_chars( fn Lexer::resolve_flow_scalar_escape_sequence( self : Lexer, start_mark : Marker, -) -> Char raise LexError { +) -> Char raise YamlError { let mut code_length = 0 let mut ret = '\u{0}' match self.buffer[1] { @@ -1027,7 +1011,7 @@ fn Lexer::resolve_flow_scalar_escape_sequence( 'u' => code_length = 4 'U' => code_length = 8 _ => - raise LexError::LexError( + raise YamlError::YamlError( mark=start_mark, info="while parsing a quoted scalar, found unknown escape character", ) @@ -1040,7 +1024,7 @@ fn Lexer::resolve_flow_scalar_escape_sequence( let mut value : Int = 32 for i in 0.. Unit raise LexError { +) -> Unit raise YamlError { self.save_simple_key() self.allow_simple_key() let tok = self.scan_block_scalar(literal) @@ -1074,7 +1058,7 @@ fn Lexer::fetch_block_scalar( fn Lexer::scan_block_scalar( self : Lexer, literal : Bool, -) -> Token raise LexError { +) -> Token raise YamlError { let start_mark = self.get_marker() let mut chomping = Chomping::Clip let mut increment = 0 @@ -1099,7 +1083,7 @@ fn Lexer::scan_block_scalar( self.skip_non_blank() if self.look_char().is_ascii_digit() { if self.char() == '0' { - raise LexError::LexError( + raise YamlError::YamlError( mark=start_mark, info="while scanning a block scalar, found an indentation indicator equal to 0", ) @@ -1109,7 +1093,7 @@ fn Lexer::scan_block_scalar( } } else if self.char().is_ascii_digit() { if self.char() == '0' { - raise LexError::LexError( + raise YamlError::YamlError( mark=start_mark, info="while scanning a block scalar, found an indentation indicator equal to 0", ) @@ -1129,8 +1113,8 @@ fn Lexer::scan_block_scalar( ignore(self.skip_ws_to_eol(SkipTabs::Yes)) // Check if we are at the end of the line. - if self.look_char().is_breakz() { - raise LexError::LexError( + if !self.look_char().is_breakz() { + raise YamlError::YamlError( mark=start_mark, info="while scanning a block scalar, did not find expected comment or line break", ) @@ -1140,7 +1124,7 @@ fn Lexer::scan_block_scalar( self.read_break(chomping_break) } if self.look_char() == '\t' { - raise LexError::LexError( + raise YamlError::YamlError( mark=start_mark, info="a block scalar content cannot start with a tab", ) @@ -1185,14 +1169,14 @@ fn Lexer::scan_block_scalar( } } if self.col < indent.val && self.col > self.indent { - raise LexError::LexError( + raise YamlError::YamlError( mark=self.get_marker(), info="wrongly indented line in block scalar", ) } let line_buffer = StringBuilder::new(size_hint=100) let start_mark = self.get_marker() - while self.col == indent.val && self.char().is_z() { + while self.col == indent.val && !self.char().is_z() { if indent.val == 0 { self.lookahead(4) if self.next_is_document_end() { @@ -1281,7 +1265,7 @@ fn Lexer::scan_block_scalar_content_line( // We will read all consecutive non-breakz characters. We push them into a temporary buffer. let mut line_buffer_size = 0 let mut c = self.raw_read_char() - while c.is_breakz() { + while !c.is_breakz() { line_buffer.write_char(c) line_buffer_size += 1 c = self.raw_read_char() @@ -1396,7 +1380,7 @@ fn Lexer::unroll_non_block_indents(self : Lexer) -> Unit { } ///| -fn Lexer::fetch_tag(self : Lexer) -> Unit raise LexError { +fn Lexer::fetch_tag(self : Lexer) -> Unit raise YamlError { self.save_simple_key() self.disallow_simple_key() let tok = self.scan_tag() @@ -1404,7 +1388,7 @@ fn Lexer::fetch_tag(self : Lexer) -> Unit raise LexError { } ///| -fn Lexer::scan_tag(self : Lexer) -> Token raise LexError { +fn Lexer::scan_tag(self : Lexer) -> Token raise YamlError { let start_mark = self.get_marker() let mut handle = "" let mut suffix = "" @@ -1439,7 +1423,7 @@ fn Lexer::scan_tag(self : Lexer) -> Token raise LexError { // XXX: ex 7.2, an empty scalar can follow a secondary tag Token::{ marker: start_mark, token_type: TokenType::Tag(handle~, suffix~) } } else { - raise LexError::LexError( + raise YamlError::YamlError( mark=start_mark, info="while scanning a tag, did not find expected whitespace or line break", ) @@ -1453,7 +1437,7 @@ fn Lexer::scan_tag_shorthand_suffix( _is_secondary : Bool, head : String, mark : Marker, -) -> String raise LexError { +) -> String raise YamlError { let buf = StringBuilder::new() // Copy the head if needed. @@ -1474,7 +1458,7 @@ fn Lexer::scan_tag_shorthand_suffix( length += 1 } if length == 0 { - raise LexError::LexError( + raise YamlError::YamlError( mark~, info="while parsing a tag, did not find expected tag URI", ) @@ -1489,7 +1473,7 @@ fn Lexer::scan_tag_shorthand_suffix( fn Lexer::scan_verbatim_tag( self : Lexer, start_mark : Marker, -) -> String raise LexError { +) -> String raise YamlError { // Eat `!<` self.skip_non_blank() self.skip_non_blank() @@ -1502,7 +1486,7 @@ fn Lexer::scan_verbatim_tag( } } if self.char() != '>' { - raise LexError::LexError( + raise YamlError::YamlError( mark=start_mark, info="while scanning a verbatim tag, did not find the expected '>'", ) @@ -1512,7 +1496,7 @@ fn Lexer::scan_verbatim_tag( } ///| -fn Lexer::fetch_anchor(self : Lexer, is_alias : Bool) -> Unit raise LexError { +fn Lexer::fetch_anchor(self : Lexer, is_alias : Bool) -> Unit raise YamlError { self.save_simple_key() self.disallow_simple_key() let tok = self.scan_anchor(is_alias) @@ -1520,7 +1504,7 @@ fn Lexer::fetch_anchor(self : Lexer, is_alias : Bool) -> Unit raise LexError { } ///| -fn Lexer::scan_anchor(self : Lexer, is_alias : Bool) -> Token raise LexError { +fn Lexer::scan_anchor(self : Lexer, is_alias : Bool) -> Token raise YamlError { let buf = StringBuilder::new() let start_mark = self.get_marker() self.skip_non_blank() @@ -1529,7 +1513,7 @@ fn Lexer::scan_anchor(self : Lexer, is_alias : Bool) -> Token raise LexError { self.skip_non_blank() } if buf.is_empty() { - raise LexError::LexError( + raise YamlError::YamlError( mark=start_mark, info="while scanning an anchor or alias, did not find expected alphabetic or numeric character", ) @@ -1546,7 +1530,7 @@ fn Lexer::scan_anchor(self : Lexer, is_alias : Bool) -> Token raise LexError { ///| /// Fetch a value from a mapping (after a `:`). -fn Lexer::fetch_value(self : Lexer) -> Unit raise LexError { +fn Lexer::fetch_value(self : Lexer) -> Unit raise YamlError { let sk = self.simple_keys.last().unwrap().clone() let start_mark = self.get_marker() self.implicit_flow_mapping = self.flow_level > 0 && !self.flow_mapping_started @@ -1556,7 +1540,7 @@ fn Lexer::fetch_value(self : Lexer) -> Unit raise LexError { if self.look_char() == '\t' && !self.skip_ws_to_eol(SkipTabs::Yes).has_valid_yaml_ws() && (self.char() == '-' || self.char().is_alpha()) { - raise LexError::LexError( + raise YamlError::YamlError( mark=self.get_marker(), info="':' must be followed by a valid YAML whitespace", ) @@ -1567,7 +1551,7 @@ fn Lexer::fetch_value(self : Lexer) -> Unit raise LexError { self.insert_token(sk.token_number - self.tokens_parsed, tok) if self.implicit_flow_mapping { if sk.mark.line < start_mark.line { - raise LexError::LexError( + raise YamlError::YamlError( mark=start_mark, info="illegal placement of ':' indicator", ) @@ -1599,7 +1583,7 @@ fn Lexer::fetch_value(self : Lexer) -> Unit raise LexError { // The ':' indicator follows a complex key. if self.flow_level == 0 { if !self.simple_key_allowed { - raise LexError::LexError( + raise YamlError::YamlError( mark=start_mark, info="mapping values are not allowed in this context", ) @@ -1625,12 +1609,12 @@ fn Lexer::fetch_value(self : Lexer) -> Unit raise LexError { } ///| -fn Lexer::fetch_key(self : Lexer) -> Unit raise LexError { +fn Lexer::fetch_key(self : Lexer) -> Unit raise YamlError { let start_mark = self.get_marker() if self.flow_level == 0 { // Check if we are allowed to start a new key (not necessarily simple). if !self.simple_key_allowed { - raise LexError::LexError( + raise YamlError::YamlError( mark=self.get_marker(), info="mapping keys are not allowed in this context", ) @@ -1655,7 +1639,7 @@ fn Lexer::fetch_key(self : Lexer) -> Unit raise LexError { self.skip_non_blank() self.skip_yaml_whitespace() if self.char() == '\t' { - raise LexError::LexError( + raise YamlError::YamlError( mark=self.get_marker(), info="tabs disallowed in this context", ) @@ -1671,7 +1655,7 @@ fn Lexer::fetch_key(self : Lexer) -> Unit raise LexError { /// /// # Errors /// This function returns an error if no whitespace was found. -fn Lexer::skip_yaml_whitespace(self : Lexer) -> Unit raise LexError { +fn Lexer::skip_yaml_whitespace(self : Lexer) -> Unit raise YamlError { let mut need_whitespace = true while true { match self.look_char() { @@ -1695,7 +1679,10 @@ fn Lexer::skip_yaml_whitespace(self : Lexer) -> Unit raise LexError { } } if need_whitespace { - raise LexError::LexError(mark=self.get_marker(), info="expected whitespace") + raise YamlError::YamlError( + mark=self.get_marker(), + info="expected whitespace", + ) } } @@ -1705,10 +1692,10 @@ fn Lexer::skip_yaml_whitespace(self : Lexer) -> Unit raise LexError { /// Add an indentation level and push a `BlockSequenceStart` token if needed, then push a /// `BlockEntry` token. /// This function only skips over the `-` and does not fetch the entry value. -fn Lexer::fetch_block_entry(self : Lexer) -> Unit raise LexError { +fn Lexer::fetch_block_entry(self : Lexer) -> Unit raise YamlError { if self.flow_level > 0 { // - * only allowed in block - raise LexError::LexError( + raise YamlError::YamlError( mark=self.get_marker(), info="'-' is only valid inside a block", ) @@ -1716,7 +1703,7 @@ fn Lexer::fetch_block_entry(self : Lexer) -> Unit raise LexError { // Check if we are allowed to start a new entry. if !self.simple_key_allowed { - raise LexError::LexError( + raise YamlError::YamlError( mark=self.get_marker(), info="block sequence entries are not allowed in this context", ) @@ -1725,7 +1712,7 @@ fn Lexer::fetch_block_entry(self : Lexer) -> Unit raise LexError { // ???, fixes test G9HC. if self.tokens.back() is Some({ marker, token_type: Anchor(_) | Tag(_) }) { if self.col == 0 && marker.col == 0 && self.indent > -1 { - raise LexError::LexError( + raise YamlError::YamlError( mark=marker, info="invalid indentation for anchor", ) @@ -1741,7 +1728,7 @@ fn Lexer::fetch_block_entry(self : Lexer) -> Unit raise LexError { let found_tabs = self.skip_ws_to_eol(SkipTabs::Yes).found_tabs() self.lookahead(2) if found_tabs && self.buffer[0] == '-' && self.buffer[1].is_blank_or_breakz() { - raise LexError::LexError( + raise YamlError::YamlError( mark=self.get_marker(), info="'-' must be followed by a valid YAML whitespace", ) @@ -1803,7 +1790,7 @@ fn Lexer::roll_indent( ///| /// Push the `FlowEntry` token and skip over the `,`. -fn Lexer::fetch_flow_entry(self : Lexer) -> Unit raise LexError { +fn Lexer::fetch_flow_entry(self : Lexer) -> Unit raise YamlError { self.remove_simple_key() self.allow_simple_key() self.end_implicit_mapping(self.get_marker()) @@ -1820,7 +1807,7 @@ fn Lexer::fetch_flow_entry(self : Lexer) -> Unit raise LexError { fn Lexer::fetch_flow_collection_end( self : Lexer, tok : TokenType, -) -> Unit raise LexError { +) -> Unit raise YamlError { self.remove_simple_key() self.decrease_flow_level() self.disallow_simple_key() @@ -1865,7 +1852,7 @@ fn Lexer::decrease_flow_level(self : Lexer) -> Unit { fn Lexer::fetch_flow_collection_start( self : Lexer, tok : TokenType, -) -> Unit raise LexError { +) -> Unit raise YamlError { // The indicators '[' and '{' may start a simple key. self.save_simple_key() self.roll_one_col_indent() @@ -1881,12 +1868,12 @@ fn Lexer::fetch_flow_collection_start( } ///| -fn Lexer::increase_flow_level(self : Lexer) -> Unit raise LexError { +fn Lexer::increase_flow_level(self : Lexer) -> Unit raise YamlError { self.simple_keys.push(SimpleKey::new(Marker::{ index: 0, line: 0, col: 0 })) self.flow_level = if self.flow_level < 0xFF { self.flow_level + 1 } else { - raise LexError::LexError( + raise YamlError::YamlError( mark=self.get_marker(), info="recursion limit exceeded", ) @@ -1926,7 +1913,7 @@ fn Lexer::save_simple_key(self : Lexer) -> Unit { fn Lexer::fetch_document_indicator( self : Lexer, t : TokenType, -) -> Unit raise LexError { +) -> Unit raise YamlError { self.unroll_indent(-1) self.remove_simple_key() self.disallow_simple_key() @@ -1936,7 +1923,7 @@ fn Lexer::fetch_document_indicator( } ///| -fn Lexer::fetch_directive(self : Lexer) -> Unit raise LexError { +fn Lexer::fetch_directive(self : Lexer) -> Unit raise YamlError { self.unroll_indent(-1) self.remove_simple_key() self.disallow_simple_key() @@ -1945,7 +1932,7 @@ fn Lexer::fetch_directive(self : Lexer) -> Unit raise LexError { } ///| -fn Lexer::scan_directive(self : Lexer) -> Token raise LexError { +fn Lexer::scan_directive(self : Lexer) -> Token raise YamlError { let start_mark = self.get_marker() self.skip_non_blank() let name = self.scan_directive_name() @@ -1966,7 +1953,7 @@ fn Lexer::scan_directive(self : Lexer) -> Token raise LexError { token_type: TokenType::TagDirective(handle="", prefix=""), } - // return Err(LexError::new(start_mark, + // return Err(YamlError::new(start_mark, // "while scanning a directive, found unknown directive name")) } } @@ -1976,7 +1963,7 @@ fn Lexer::scan_directive(self : Lexer) -> Token raise LexError { self.skip_linebreak() return tok } else { - raise LexError::LexError( + raise YamlError::YamlError( mark=start_mark, info="while scanning a directive, did not find expected comment or line break", ) @@ -1987,7 +1974,7 @@ fn Lexer::scan_directive(self : Lexer) -> Token raise LexError { fn Lexer::scan_tag_directive_value( self : Lexer, mark : Marker, -) -> Token raise LexError { +) -> Token raise YamlError { // Eat whitespaces. while self.look_char().is_blank() { self.skip_blank() @@ -2006,7 +1993,7 @@ fn Lexer::scan_tag_directive_value( token_type: TokenType::TagDirective(handle~, prefix~), } } else { - raise LexError::LexError( + raise YamlError::YamlError( mark~, info="while scanning TAG, did not find expected whitespace or line break", ) @@ -2022,15 +2009,15 @@ fn Lexer::scan_tag_directive_value( fn Lexer::scan_tag_prefix( self : Lexer, start_mark : Marker, -) -> String raise LexError { +) -> String raise YamlError { let buf = StringBuilder::new() if self.look_char() == '!' { // If we have a local tag, insert and skip `!`. buf.write_char(self.char()) self.skip_non_blank() - } else if self.char().is_tag_char() { + } else if !self.char().is_tag_char() { // Otherwise, check if the first global tag character is valid. - raise LexError::LexError( + raise YamlError::YamlError( mark=start_mark, info="invalid global tag character", ) @@ -2054,7 +2041,7 @@ fn Lexer::scan_tag_prefix( } ///| -fn Lexer::scan_uri_escapes(self : Lexer, mark : Marker) -> Char raise LexError { +fn Lexer::scan_uri_escapes(self : Lexer, mark : Marker) -> Char raise YamlError { let mut width = 0 let mut code = 0 while true { @@ -2062,7 +2049,7 @@ fn Lexer::scan_uri_escapes(self : Lexer, mark : Marker) -> Char raise LexError { if !(self.char() == '%' && self.buffer[1].is_hex() && self.buffer[2].is_hex()) { - raise LexError::LexError( + raise YamlError::YamlError( mark~, info="while parsing a tag, did not find URI escaped octet", ) @@ -2075,7 +2062,7 @@ fn Lexer::scan_uri_escapes(self : Lexer, mark : Marker) -> Char raise LexError { _ if (octet & 0xF0) == 0xE0 => 3 _ if (octet & 0xF8) == 0xF0 => 4 _ => - raise LexError::LexError( + raise YamlError::YamlError( mark~, info="while parsing a tag, found an incorrect leading UTF-8 octet", ) @@ -2083,7 +2070,7 @@ fn Lexer::scan_uri_escapes(self : Lexer, mark : Marker) -> Char raise LexError { code = octet } else { if (octet & 0xc0) != 0x80 { - raise LexError::LexError( + raise YamlError::YamlError( mark~, info="while parsing a tag, found an incorrect trailing UTF-8 octet", ) @@ -2099,7 +2086,7 @@ fn Lexer::scan_uri_escapes(self : Lexer, mark : Marker) -> Char raise LexError { match Int::to_char(code) { Some(ch) => ch None => - raise LexError::LexError( + raise YamlError::YamlError( mark~, info="while parsing a tag, found an invalid UTF-8 codepoint", ) @@ -2111,10 +2098,10 @@ fn Lexer::scan_tag_handle( self : Lexer, directive : Bool, mark : Marker, -) -> String raise LexError { +) -> String raise YamlError { let buf = StringBuilder::new() if self.look_char() != '!' { - raise LexError::LexError( + raise YamlError::YamlError( mark~, info="while scanning a tag, did not find expected '!'", ) @@ -2125,35 +2112,34 @@ fn Lexer::scan_tag_handle( buf.write_char(self.char()) self.skip_non_blank() } - let res = buf.to_string() // Check if the trailing character is '!' and copy it. if self.char() == '!' { buf.write_char(self.char()) self.skip_non_blank() - } else if directive && res != "!" { + } else if directive && buf.to_string() != "!" { // It's either the '!' tag or not really a tag handle. If it's a %TAG // directive, it's an error. If it's a tag token, it must be a part of // URI. - raise LexError::LexError( + raise YamlError::YamlError( mark~, info="while parsing a tag directive, did not find expected '!'", ) } - return res + return buf.to_string() } ///| fn Lexer::scan_version_directive_value( self : Lexer, mark : Marker, -) -> Token raise LexError { +) -> Token raise YamlError { while self.look_char().is_blank() { self.skip_blank() } let major = self.scan_version_directive_number(mark) if self.char() != '.' { - raise LexError::LexError( + raise YamlError::YamlError( mark~, info="while scanning a YAML directive, did not find expected digit or '.' character", ) @@ -2170,12 +2156,12 @@ fn Lexer::scan_version_directive_value( fn Lexer::scan_version_directive_number( self : Lexer, mark : Marker, -) -> Int raise LexError { +) -> Int raise YamlError { let mut val = 0 let mut length = 0 while self.look_char().to_digit() is Some(digit) { if length + 1 > 9 { - raise LexError::LexError( + raise YamlError::YamlError( mark~, info="while scanning a YAML directive, found extremely long version number", ) @@ -2185,7 +2171,7 @@ fn Lexer::scan_version_directive_number( self.skip_non_blank() } if length == 0 { - raise LexError::LexError( + raise YamlError::YamlError( mark~, info="while scanning a YAML directive, did not find expected version number", ) @@ -2194,21 +2180,21 @@ fn Lexer::scan_version_directive_number( } ///| -fn Lexer::scan_directive_name(self : Lexer) -> String raise LexError { +fn Lexer::scan_directive_name(self : Lexer) -> String raise YamlError { let start_mark = self.get_marker() let buf = StringBuilder::new() - while self.char().is_alpha() { + while self.look_char().is_alpha() { buf.write_char(self.char()) self.skip_non_blank() } if buf.is_empty() { - raise LexError::LexError( + raise YamlError::YamlError( mark=start_mark, info="while scanning a directive, could not find expected directive name", ) } if !self.char().is_blank_or_breakz() { - raise LexError::LexError( + raise YamlError::YamlError( mark=start_mark, info="while scanning a directive, found unexpected non-alphabetical character", ) @@ -2217,7 +2203,7 @@ fn Lexer::scan_directive_name(self : Lexer) -> String raise LexError { } ///| -fn Lexer::fetch_stream_end(self : Lexer) -> Unit raise LexError { +fn Lexer::fetch_stream_end(self : Lexer) -> Unit raise YamlError { // force new line if self.col != 0 { self.col = 0 @@ -2227,7 +2213,7 @@ fn Lexer::fetch_stream_end(self : Lexer) -> Unit raise LexError { // had. If one was required, however, that was an error and we must propagate it. for sk in self.simple_keys { if sk.required && sk.possible { - raise LexError::LexError( + raise YamlError::YamlError( mark=self.get_marker(), info="simple key expected", ) @@ -2244,10 +2230,13 @@ fn Lexer::fetch_stream_end(self : Lexer) -> Unit raise LexError { } ///| -fn Lexer::remove_simple_key(self : Lexer) -> Unit raise LexError { +fn Lexer::remove_simple_key(self : Lexer) -> Unit raise YamlError { let last = self.simple_keys.last().unwrap() if last.possible && last.required { - raise LexError::LexError(mark=self.get_marker(), info="simple key expected") + raise YamlError::YamlError( + mark=self.get_marker(), + info="simple key expected", + ) } last.possible = false } @@ -2282,13 +2271,13 @@ fn Lexer::unroll_indent(self : Lexer, col : Int) -> Unit { /// /// # Errors /// This function returns an error if one of the key we would stale was required to be a key. -fn Lexer::stale_simple_keys(self : Lexer) -> Unit raise LexError { +fn Lexer::stale_simple_keys(self : Lexer) -> Unit raise YamlError { for sk in self.simple_keys { if sk.possible && self.flow_level == 0 && (sk.mark.line < self.line || sk.mark.index + 1024 < self.index) { if sk.required { - raise LexError::LexError( + raise YamlError::YamlError( mark=self.get_marker(), info="simple key expect ':'", ) @@ -2304,7 +2293,7 @@ fn Lexer::stale_simple_keys(self : Lexer) -> Unit raise LexError { /// # Errors /// This function returns an error if a tabulation is encountered where there should not be /// one. -fn Lexer::skip_to_next_token(self : Lexer) -> Unit raise LexError { +fn Lexer::skip_to_next_token(self : Lexer) -> Unit raise YamlError { while true { match self.look_char() { // Tabs may not be used as indentation. @@ -2319,7 +2308,7 @@ fn Lexer::skip_to_next_token(self : Lexer) -> Unit raise LexError { ignore(self.skip_ws_to_eol(SkipTabs::Yes)) // If we have content on that line with a tab, return an error. if self.char().is_breakz() { - raise LexError::LexError( + raise YamlError::YamlError( mark=self.get_marker(), info="tabs disallowed within this context (block indentation)", ) @@ -2347,7 +2336,7 @@ fn Lexer::skip_to_next_token(self : Lexer) -> Unit raise LexError { fn Lexer::skip_ws_to_eol( self : Lexer, skip_tabs : SkipTabs, -) -> SkipTabs raise LexError { +) -> SkipTabs raise YamlError { let mut encountered_tab = false let mut has_onemore_whitespace = false while true { @@ -2362,7 +2351,7 @@ fn Lexer::skip_ws_to_eol( } // YAML comments must be preceded by whitespace. '#' if !encountered_tab && !has_onemore_whitespace => - raise LexError::LexError( + raise YamlError::YamlError( mark=self.get_marker(), info="comments must be separated from other tokens by whitespace", ) @@ -2382,7 +2371,7 @@ fn Lexer::is_within_block(self : Lexer) -> Bool { } ///| -pub enum SkipTabs { +priv enum SkipTabs { /// Skip all tabs as whitespace. Yes /// Don't skip any tab. Return from the function when encountering one. @@ -2413,7 +2402,7 @@ fn SkipTabs::has_valid_yaml_ws(self : SkipTabs) -> Bool { /// Chomping, how final line breaks and trailing empty lines are interpreted. /// /// See YAML spec 8.1.1.2. -pub enum Chomping { +priv enum Chomping { /// The final line break and any trailing empty lines are excluded. Strip /// The final line break is preserved, but trailing empty lines are excluded. diff --git a/src/lexer_wbtest.mbt b/src/lexer_wbtest.mbt index 8124e3f..5760529 100644 --- a/src/lexer_wbtest.mbt +++ b/src/lexer_wbtest.mbt @@ -701,3 +701,350 @@ test "lexer cr" { inspect(p.next_t(), content="StreamEnd") inspect(p.next(), content="None") } + +///| +test "keep tags across multiple documents" { + let source = + #|%YAML 1.1 + #|%TAG !t! tag:test,2024: + #|--- !t!1 &1 + #|foo: "bar" + #|--- !t!2 &2 + #|baz: "qux" + let p = Lexer::new(source) + inspect(p.next_t(), content="StreamStart") + inspect(p.next_t(), content="VersionDirective(major=1, minor=1)") + inspect( + p.next_t(), + content=( + #|TagDirective(handle="!t!", prefix="tag:test,2024:") + ), + ) + inspect(p.next_t(), content="DocumentStart") + inspect( + p.next_t(), + content=( + #|Tag(handle="!t!", suffix="1") + ), + ) + inspect( + p.next_t(), + content=( + #|Anchor("1") + ), + ) + inspect(p.next_t(), content="BlockMappingStart") + inspect(p.next_t(), content="Key") + inspect( + p.next_t(), + content=( + #|Scalar(Plain, "foo") + ), + ) + inspect(p.next_t(), content="Value") + inspect( + p.next_t(), + content=( + #|Scalar(DoubleQuoted, "bar") + ), + ) + inspect(p.next_t(), content="BlockEnd") + inspect(p.next_t(), content="DocumentStart") + inspect( + p.next_t(), + content=( + #|Tag(handle="!t!", suffix="2") + ), + ) + inspect( + p.next_t(), + content=( + #|Anchor("2") + ), + ) + inspect(p.next_t(), content="BlockMappingStart") + inspect(p.next_t(), content="Key") + inspect( + p.next_t(), + content=( + #|Scalar(Plain, "baz") + ), + ) + inspect(p.next_t(), content="Value") + inspect( + p.next_t(), + content=( + #|Scalar(DoubleQuoted, "qux") + ), + ) + inspect(p.next_t(), content="BlockEnd") + inspect(p.next_t(), content="StreamEnd") + inspect(p.next(), content="None") +} + +///| +test "large block scalar indent" { + let source = + #|a: |- + #| a + #| b + let p = Lexer::new(source) + inspect(p.next_t(), content="StreamStart") + inspect(p.next_t(), content="BlockMappingStart") + inspect(p.next_t(), content="Key") + inspect( + p.next_t(), + content=( + #|Scalar(Plain, "a") + ), + ) + inspect(p.next_t(), content="Value") + inspect( + p.next_t(), + content=( + #|Scalar(Literal, "a\n b") + ), + ) + inspect(p.next_t(), content="BlockEnd") + inspect(p.next_t(), content="StreamEnd") + inspect(p.next(), content="None") +} + +///| +test "tag directives" { + let source = + #|- !!str 0 + #|- !!int 100 + #|- !!float 2 + #|- !!null ~ + #|- !!bool true + #|- !!bool false + let p = Lexer::new(source) + inspect(p.next_t(), content="StreamStart") + inspect(p.next_t(), content="BlockSequenceStart") + inspect(p.next_t(), content="BlockEntry") + inspect( + p.next_t(), + content=( + #|Tag(handle="!!", suffix="str") + ), + ) + inspect( + p.next_t(), + content=( + #|Scalar(Plain, "0") + ), + ) + inspect(p.next_t(), content="BlockEntry") + inspect( + p.next_t(), + content=( + #|Tag(handle="!!", suffix="int") + ), + ) + inspect( + p.next_t(), + content=( + #|Scalar(Plain, "100") + ), + ) + inspect(p.next_t(), content="BlockEntry") + inspect( + p.next_t(), + content=( + #|Tag(handle="!!", suffix="float") + ), + ) + inspect( + p.next_t(), + content=( + #|Scalar(Plain, "2") + ), + ) + inspect(p.next_t(), content="BlockEntry") + inspect( + p.next_t(), + content=( + #|Tag(handle="!!", suffix="null") + ), + ) + inspect( + p.next_t(), + content=( + #|Scalar(Plain, "~") + ), + ) + inspect(p.next_t(), content="BlockEntry") + inspect( + p.next_t(), + content=( + #|Tag(handle="!!", suffix="bool") + ), + ) + inspect( + p.next_t(), + content=( + #|Scalar(Plain, "true") + ), + ) + inspect(p.next_t(), content="BlockEntry") + inspect( + p.next_t(), + content=( + #|Tag(handle="!!", suffix="bool") + ), + ) + inspect( + p.next_t(), + content=( + #|Scalar(Plain, "false") + ), + ) + inspect(p.next_t(), content="BlockEnd") + inspect(p.next_t(), content="StreamEnd") + inspect(p.next(), content="None") +} + +///| +test "looks like float but actually string" { + let buf = StringBuilder::new(size_hint=400) + let bases = ["nan", "NAN", "NaN", "inf", "infinity", "Infinity"] + for base in bases { + buf.write_string("- +\{base}\n") + buf.write_string("- -\{base}\n") + buf.write_string("- \{base}\n") + } + let source = buf.to_string() + let p = Lexer::new(source) + inspect(p.next_t(), content="StreamStart") + inspect(p.next_t(), content="BlockSequenceStart") + inspect(p.next_t(), content="BlockEntry") + inspect( + p.next_t(), + content=( + #|Scalar(Plain, "+nan") + ), + ) + inspect(p.next_t(), content="BlockEntry") + inspect( + p.next_t(), + content=( + #|Scalar(Plain, "-nan") + ), + ) + inspect(p.next_t(), content="BlockEntry") + inspect( + p.next_t(), + content=( + #|Scalar(Plain, "nan") + ), + ) + inspect(p.next_t(), content="BlockEntry") + inspect( + p.next_t(), + content=( + #|Scalar(Plain, "+NAN") + ), + ) + inspect(p.next_t(), content="BlockEntry") + inspect( + p.next_t(), + content=( + #|Scalar(Plain, "-NAN") + ), + ) + inspect(p.next_t(), content="BlockEntry") + inspect( + p.next_t(), + content=( + #|Scalar(Plain, "NAN") + ), + ) + inspect(p.next_t(), content="BlockEntry") + inspect( + p.next_t(), + content=( + #|Scalar(Plain, "+NaN") + ), + ) + inspect(p.next_t(), content="BlockEntry") + inspect( + p.next_t(), + content=( + #|Scalar(Plain, "-NaN") + ), + ) + inspect(p.next_t(), content="BlockEntry") + inspect( + p.next_t(), + content=( + #|Scalar(Plain, "NaN") + ), + ) + inspect(p.next_t(), content="BlockEntry") + inspect( + p.next_t(), + content=( + #|Scalar(Plain, "+inf") + ), + ) + inspect(p.next_t(), content="BlockEntry") + inspect( + p.next_t(), + content=( + #|Scalar(Plain, "-inf") + ), + ) + inspect(p.next_t(), content="BlockEntry") + inspect( + p.next_t(), + content=( + #|Scalar(Plain, "inf") + ), + ) + inspect(p.next_t(), content="BlockEntry") + inspect( + p.next_t(), + content=( + #|Scalar(Plain, "+infinity") + ), + ) + inspect(p.next_t(), content="BlockEntry") + inspect( + p.next_t(), + content=( + #|Scalar(Plain, "-infinity") + ), + ) + inspect(p.next_t(), content="BlockEntry") + inspect( + p.next_t(), + content=( + #|Scalar(Plain, "infinity") + ), + ) + inspect(p.next_t(), content="BlockEntry") + inspect( + p.next_t(), + content=( + #|Scalar(Plain, "+Infinity") + ), + ) + inspect(p.next_t(), content="BlockEntry") + inspect( + p.next_t(), + content=( + #|Scalar(Plain, "-Infinity") + ), + ) + inspect(p.next_t(), content="BlockEntry") + inspect( + p.next_t(), + content=( + #|Scalar(Plain, "Infinity") + ), + ) + inspect(p.next_t(), content="BlockEnd") + inspect(p.next_t(), content="StreamEnd") + inspect(p.next(), content="None") +} diff --git a/src/loader_test.mbt b/src/loader_test.mbt new file mode 100644 index 0000000..3c8b905 --- /dev/null +++ b/src/loader_test.mbt @@ -0,0 +1,445 @@ +///| +test "keep tags across multiple documents" { + let source = + #|%YAML 1.1 + #|%TAG !t! tag:test,2024: + #|--- !t!1 &1 + #|foo: "bar" + #|--- !t!2 &2 + #|baz: "qux" + let parser = Parser::new(source) + parser.keep_tags(true) + let docs = Yaml::load_from_parser(parser) + @json.inspect(docs, content=[{ "foo": "bar" }, { "baz": "qux" }]) +} + +///| +test "empty doc" { + let source = "---" + let docs = Yaml::load_from_string(source) + assert_eq(docs.length(), 1) + let yaml = docs[0] + assert_true(yaml is Yaml::Null) + let source = "" + let docs = Yaml::load_from_string(source) + assert_eq(docs.length(), 0) + let source = "&a" + let docs = Yaml::load_from_string(source) + let doc = docs[0] + assert_true(doc is Yaml::Null) +} + +///| +test "basic" { + let source = + #|# comment + #|a0 bb: val + #|a1: + #| b1: 4 + #| b2: d + #|a2: 4 # i'm comment + #|a3: [1, 2, 3] + #|a4: + #| - - a1 + #| - a2 + #| - 2 + #|a5: 'single_quoted' + #|a6: "double_quoted" + #|a7: 你好 + let docs = Yaml::load_from_string(source) + assert_eq(docs.length(), 1) + let doc = docs[0] + @json.inspect(doc, content={ + "a0 bb": "val", + "a1": { "b1": 4, "b2": "d" }, + "a2": 4, + "a3": [1, 2, 3], + "a4": [["a1", "a2"], 2], + "a5": "single_quoted", + "a6": "double_quoted", + "a7": "你好", + }) + let source = + #|# from yaml-cpp example + #|- name: Ogre + #| position: [0, 5, 0] + #| powers: + #| - name: Club + #| damage: 10 + #| - name: Fist + #| damage: 8 + #|- name: Dragon + #| position: [1, 0, 10] + #| powers: + #| - name: Fire Breath + #| damage: 25 + #| - name: Claws + #| damage: 15 + #|- name: Wizard + #| position: [5, -3, 0] + #| powers: + #| - name: Acid Rain + #| damage: 50 + #| - name: Staff + #| damage: 3 + let docs = Yaml::load_from_string(source) + @json.inspect(docs, content=[ + [ + { + "name": "Ogre", + "position": [0, 5, 0], + "powers": [ + { "name": "Club", "damage": 10 }, + { "name": "Fist", "damage": 8 }, + ], + }, + { + "name": "Dragon", + "position": [1, 0, 10], + "powers": [ + { "name": "Fire Breath", "damage": 25 }, + { "name": "Claws", "damage": 15 }, + ], + }, + { + "name": "Wizard", + "position": [5, -3, 0], + "powers": [ + { "name": "Acid Rain", "damage": 50 }, + { "name": "Staff", "damage": 3 }, + ], + }, + ], + ]) +} + +///| +test "multi doc" { + let source = + #|'a scalar' + #|--- + #|'a scalar' + #|--- + #|'a scalar' + let docs = Yaml::load_from_string(source) + assert_eq(docs.length(), 3) +} + +///| +test "anchor" { + let source = + #|a1: &DEFAULT + #| b1: 4 + #| b2: d + #|a2: *DEFAULT + let docs = Yaml::load_from_string(source) + let doc = docs[0] + @json.inspect(doc, content={ + "a1": { "b1": 4, "b2": "d" }, + "a2": { "b1": 4, "b2": "d" }, + }) +} + +///| +test "bad anchor" { + let source = + #|a1: &DEFAULT + #| b1: 4 + #| b2: *DEFAULT + let docs = Yaml::load_from_string(source) + let doc = docs[0] + guard doc is Yaml::Map(yaml_map) && yaml_map["a1"] is Yaml::Map(yaml_map) else { + fail("\{doc}") + } + inspect(yaml_map["b2"], content="BadValue") +} + +///| +test "should throw error" { + // bad hyphen + let source = "{-" + assert_true((try? Yaml::load_from_string(source)) is Err(_)) + + // invalid trailing content after a double quoted string + let source = + #|"foo" l" + assert_true((try? Yaml::load_from_string(source)) is Err(_)) +} + +///| +test "comment after tag" { + let source = + #|%YAML 1.2 + #|# This is a comment + #|--- #------- + #|foobar + let docs = Yaml::load_from_string(source) + @json.inspect(docs, content=["foobar"]) +} + +///| +test "large block scalar indent" { + let source = + #|a: |- + #| a + #| b + let docs = Yaml::load_from_string(source) + @json.inspect(docs, content=[{ "a": "a\n b" }]) +} + +///| +test "bad docstart" { + @json.inspect( + Yaml::load_from_string("---This used to cause an infinite loop"), + content=["---This used to cause an infinite loop"], + ) + @json.inspect(Yaml::load_from_string("----"), content=["----"]) + @json.inspect(Yaml::load_from_string("--- #here goes a comment"), content=[ + null, + ]) + @json.inspect(Yaml::load_from_string("---- #here goes a comment"), content=[ + "----", + ]) +} + +///| +test "plain datatype" { + let source = + #|- 'string' + #|- "string" + #|- string + #|- 123 + #|- -321 + #|- 1.23 + #|- -1e4 + #|- ~ + #|- null + #|- true + #|- false + #|- !!str 0 + #|- !!int 100 + #|- !!float 2 + #|- !!null ~ + #|- !!bool true + #|- !!bool false + #|- 0xFF + #|# bad values + #|- !!int string + #|- !!float string + #|- !!bool null + #|- !!null val + #|- 0o77 + #|- [ 0xF, 0xF ] + #|- +12345 + #|- [ true, false ] + let docs = Yaml::load_from_string(source) + assert_eq(docs.length(), 1) + guard docs[0] is Yaml::Array(yaml_array) else { + fail("expect yaml array but got \{docs[0]}") + } + inspect( + yaml_array[0], + content=( + #|String("string") + ), + ) + inspect( + yaml_array[1], + content=( + #|String("string") + ), + ) + inspect( + yaml_array[2], + content=( + #|String("string") + ), + ) + inspect(yaml_array[3], content="Integer(123)") + inspect(yaml_array[4], content="Integer(-321)") + inspect( + yaml_array[5], + content=( + #|Real(1.23, repr="1.23") + ), + ) + inspect( + yaml_array[6], + content=( + #|Real(-10000, repr="-1e4") + ), + ) + inspect(yaml_array[7], content="Null") + inspect(yaml_array[8], content="Null") + inspect(yaml_array[9], content="Boolean(true)") + inspect(yaml_array[10], content="Boolean(false)") + inspect( + yaml_array[11], + content=( + #|String("0") + ), + ) + inspect(yaml_array[12], content="Integer(100)") + inspect( + yaml_array[13], + content=( + #|Real(2, repr="2") + ), + ) + inspect(yaml_array[14], content="Null") + inspect(yaml_array[15], content="Boolean(true)") + inspect(yaml_array[16], content="Boolean(false)") + inspect(yaml_array[17], content="Integer(255)") + inspect(yaml_array[18], content="BadValue") + inspect(yaml_array[19], content="BadValue") + inspect(yaml_array[20], content="BadValue") + inspect(yaml_array[21], content="BadValue") + inspect(yaml_array[22], content="Integer(63)") + inspect(yaml_array[23], content="Array([Integer(15), Integer(15)])") + inspect(yaml_array[24], content="Integer(12345)") + inspect(yaml_array[25], content="Array([Boolean(true), Boolean(false)])") +} + +///| +test "map order" { + let source = + #|--- + #|b: ~ + #|a: ~ + #|c: ~ + let docs = Yaml::load_from_string(source) + assert_eq(docs.length(), 1) + guard docs[0] is Yaml::Map(map) else { + fail("expect yaml map but got \{docs[0]}") + } + @json.inspect(map.iter().collect(), content=[ + ["b", null], + ["a", null], + ["c", null], + ]) +} + +///| +test "integer key" { + // always fail by design + let source = + #|0: + #| important: true + #|1: + #| important: false + guard (try? Yaml::load_from_string(source)) is Err(YamlError(info~, ..)) + inspect(info, content="yaml.mbt doesn't support non-string key") +} + +///| +test "indentation equality" { + let four_spaces = + #|hash: + #| with: + #| indentations + let four_spaces = Yaml::load_from_string(four_spaces) + let two_spaces = + #|hash: + #| with: + #| indentations + let two_spaces = Yaml::load_from_string(two_spaces) + let one_space = + #|hash: + #| with: + #| indentations + let one_space = Yaml::load_from_string(one_space) + let mixed_spaces = + #|hash: + #| with: + #| indentations + let mixed_spaces = Yaml::load_from_string(mixed_spaces) + assert_eq(four_spaces, two_spaces) + assert_eq(two_spaces, one_space) + assert_eq(four_spaces, mixed_spaces) +} + +///| +test "two space indentations" { + let source = + #|subcommands: + #| - server: + #| about: server related commands + #|subcommands2: + #| - server: + #| about: server related commands + #|subcommands3: + #| - server: + #| about: server related commands + let docs = Yaml::load_from_string(source) + @json.inspect(docs, content=[ + { + "subcommands": [{ "server": null, "about": "server related commands" }], + "subcommands2": [{ "server": { "about": "server related commands" } }], + "subcommands3": [{ "server": { "about": "server related commands" } }], + }, + ]) +} + +///| +test "recursion depth check objects" { + let buf = StringBuilder::new(size_hint=4 * 2 * 10_000) + for _ in 0..<10_000 { + buf.write_string("{a:") + } + for _ in 0..<10_000 { + buf.write_string("}") + } + let source = buf.to_string() + assert_true((try? Yaml::load_from_string(source)) is Err(_)) +} + +///| +test "recursion depth check arrays" { + let buf = StringBuilder::new(size_hint=2 * 2 * 10_000) + for _ in 0..<10_000 { + buf.write_char('[') + } + for _ in 0..<10_000 { + buf.write_char(']') + } + let source = buf.to_string() + assert_true((try? Yaml::load_from_string(source)) is Err(_)) +} + +///| +test "mapping duplicates" { + let source = + #|a: foo + #|a: bar + assert_true((try? Yaml::load_from_string(source)) is Err(_)) +} + +///| +test "nominal float parse" { + // Generates a document that looks like so: + // ```yaml + // - +nan + // - -nan + // - nan + // - +NAN + // ``` + // Every single one of these values should be parsed as a string in yaml, + let buf = StringBuilder::new(size_hint=400) + let bases = ["nan", "NAN", "NaN", "inf", "infinity", "Infinity"] + for base in bases { + buf.write_string("- +\{base}\n") + buf.write_string("- -\{base}\n") + buf.write_string("- \{base}\n") + } + let source = buf.to_string() + let docs = Yaml::load_from_string(source) + assert_eq(docs.length(), 1) + guard docs[0] is Yaml::Array(yaml_array) else { + fail("expect yaml array but got \{docs[0]}") + } + for yaml in yaml_array { + guard yaml is Yaml::String(_) else { + fail("expect a yaml string but got \{yaml}") + } + } +} diff --git a/src/marker.mbt b/src/marker.mbt new file mode 100644 index 0000000..69baa07 --- /dev/null +++ b/src/marker.mbt @@ -0,0 +1,9 @@ +///| +pub struct Marker { + /// The index (in chars) in the input string. + index : Int + /// The line (1-indexed). + line : Int + /// The column (1-indexed). + col : Int +} derive(Show, Eq) diff --git a/src/moon.pkg.json b/src/moon.pkg.json index 9e26dfe..83f9e1e 100644 --- a/src/moon.pkg.json +++ b/src/moon.pkg.json @@ -1 +1,3 @@ -{} \ No newline at end of file +{ + "warn-list": "-1" +} \ No newline at end of file diff --git a/src/parser.mbt b/src/parser.mbt new file mode 100644 index 0000000..da494a6 --- /dev/null +++ b/src/parser.mbt @@ -0,0 +1,984 @@ +///| +priv enum State { + /// We await the start of the stream. + StreamStart + ImplicitDocumentStart + DocumentStart + DocumentContent + DocumentEnd + BlockNode + // BlockNodeOrIndentlessSequence + // FlowNode + BlockSequenceFirstEntry + BlockSequenceEntry + IndentlessSequenceEntry + BlockMappingFirstKey + BlockMappingKey + BlockMappingValue + FlowSequenceFirstEntry + FlowSequenceEntry + FlowSequenceEntryMappingKey + FlowSequenceEntryMappingValue + FlowSequenceEntryMappingEnd + FlowMappingFirstKey + FlowMappingKey + FlowMappingValue + FlowMappingEmptyValue + End +} derive(Eq) + +///| +struct Parser { + lexer : Lexer + states : Array[State] + mut state : State + mut token : Token? + mut current : (Event, Marker)? + anchors : @hashmap.HashMap[String, Int] + mut anchor_id : Int + /// The tag directives (`%TAG`) the parser has encountered. + /// + /// Key is the handle, and value is the prefix. + mut tags : @hashmap.HashMap[String, String] + /// Make tags global across all documents. + mut keep_tags : Bool +} + +///| +pub fn Parser::new(str : StringView) -> Parser { + Parser::{ + lexer: Lexer::new(str), + states: Array::new(capacity=40), + state: State::StreamStart, + token: None, + current: None, + anchors: @hashmap.HashMap::new(), + // valid anchor_id starts from 1 + anchor_id: 1, + tags: @hashmap.HashMap::new(), + keep_tags: false, + } +} + +///| +/// Whether to keep tags across multiple documents when parsing. +/// +/// This behavior is non-standard as per the YAML specification but can be encountered in the +/// wild. This boolean allows enabling this non-standard extension. This would result in the +/// parser accepting input from [test +/// QLJ7](https://github.com/yaml/yaml-test-suite/blob/ccfa74e56afb53da960847ff6e6976c0a0825709/src/QLJ7.yaml) +/// of the yaml-test-suite: +/// +/// ```yaml +/// %TAG !prefix! tag:example.com,2011: +/// --- !prefix!A +/// a: b +/// --- !prefix!B +/// c: d +/// --- !prefix!C +/// e: f +/// ``` +/// +/// With `keep_tags` set to `false`, the above YAML is rejected. As per the specification, tags +/// only apply to the document immediately following them. This would error on `!prefix!B`. +/// +/// With `keep_tags` set to `true`, the above YAML is accepted by the parser. +pub fn Parser::keep_tags(self : Parser, value : Bool) -> Unit { + self.keep_tags = value +} + +///| +/// Try to load the next event and return it, but do not consuming it from `self`. +/// +/// Any subsequent call to [`Parser::peek`] will return the same value, until a call to +/// [`Iterator::next`] or [`Parser::load`]. +/// # Errors +/// Returns `YamlError` when loading the next event fails. +fn Parser::peek(self : Parser) -> (Event, Marker) raise YamlError { + if self.current is Some(x) { + return x + } else { + self.current = Some(self.next_token()) + self.peek() + } +} + +///| +/// Try to load the next event and return it, consuming it from `self`. +/// # Errors +/// Returns `ScanError` when loading the next event fails. +fn Parser::next_token(self : Parser) -> (Event, Marker) raise YamlError { + let current = self.current + self.current = None + match current { + Some(x) => x + None => self.parse() + } +} + +///| +test "peek eq parse" { + let source = + #|a0 bb: val + #|a1: &x + #| b1: 4 + #| b2: d + #|a2: 4 + #|a3: [1, 2, 3] + #|a4: + #| - [a1, a2] + #| - 2 + #|a5: *x + let p = Parser::new(source) + while true { + let event_peek = p.peek() + let event = p.next_token() + assert_eq(event, event_peek) + if event.0 is Event::StreamEnd { + break + } + } +} + +///| +fn Parser::peek_token(self : Parser) -> Token raise YamlError { + match self.token { + None => { + let token = self.scan_next_token() + self.token = Some(token) + token + } + Some(tok) => tok + } +} + +///| +fn Parser::scan_next_token(self : Parser) -> Token raise YamlError { + let token = self.lexer.next() + match token { + None => + match self.lexer.get_error() { + None => + raise YamlError::YamlError( + mark=self.lexer.get_marker(), + info="unexpected eof", + ) + Some(e) => raise e + } + Some(tok) => tok + } +} + +///| +fn Parser::parse(self : Parser) -> (Event, Marker) raise YamlError { + if self.state == State::End { + return (Event::StreamEnd, self.lexer.get_marker()) + } + self.state_machine() +} + +///| +/// Load the YAML from the stream in `self`, pushing events into `recv`. +/// +/// The contents of the stream are parsed and the corresponding events are sent into the +/// recveiver. For detailed explanations about how events work, see `EventReceiver`. +/// +/// If `multi` is set to `true`, the parser will allow parsing of multiple YAML documents +/// inside the stream. +/// +/// Note that any `EventReceiver` is also a `MarkedEventReceiver`, so implementing the +/// former is enough to call this function. +/// # Errors +/// Returns `YamlError` when loading fails. +pub fn[R : MarkedEventReceiver] Parser::load( + self : Parser, + recv : R, + multi : Bool, +) -> Unit raise YamlError { + if !self.lexer.stream_started() { + let (ev, mark) = self.next_token() + if ev != Event::StreamStart { + raise YamlError::YamlError( + mark~, + info="did not find expected ", + ) + } + recv.on_event(ev, mark) + } + if self.lexer.stream_ended() { + recv.on_event(Event::StreamEnd, self.lexer.get_marker()) + return + } + while true { + let (ev, mark) = self.next_token() + if ev == Event::StreamEnd { + recv.on_event(ev, mark) + return + } + + // clear anchors before a new document + self.anchors.clear() + self.load_document(ev, mark, recv) + if !multi { + break + } + } +} + +///| +fn[R : MarkedEventReceiver] Parser::load_document( + self : Parser, + first_ev : Event, + mark : Marker, + recv : R, +) -> Unit raise YamlError { + if first_ev != Event::DocumentStart { + raise YamlError::YamlError( + mark~, + info="did not find expected ", + ) + } + recv.on_event(first_ev, mark) + let (ev, mark) = self.next_token() + self.load_node(ev, mark, recv) + + // DOCUMENT-END is expected. + let (ev, mark) = self.next_token() + guard ev == Event::DocumentEnd + recv.on_event(ev, mark) +} + +///| +fn[R : MarkedEventReceiver] Parser::load_node( + self : Parser, + first_ev : Event, + mark : Marker, + recv : R, +) -> Unit raise YamlError { + match first_ev { + Event::Alias(..) | Event::Scalar(..) => recv.on_event(first_ev, mark) + Event::SequenceStart(..) => { + recv.on_event(first_ev, mark) + self.load_sequence(recv) + } + Event::MappingStart(..) => { + recv.on_event(first_ev, mark) + self.load_mapping(recv) + } + _ => { + println("UNREACHABLE EVENT: \{first_ev}") + panic() + } + } +} + +///| +fn[R : MarkedEventReceiver] Parser::load_sequence( + self : Parser, + recv : R, +) -> Unit raise YamlError { + let (ev, mark) = self.next_token() + let mut ev = ev + let mut mark = mark + while ev != Event::SequenceEnd { + self.load_node(ev, mark, recv) + + // next event + let (next_ev, next_mark) = self.next_token() + ev = next_ev + mark = next_mark + } + recv.on_event(ev, mark) +} + +///| +fn[R : MarkedEventReceiver] Parser::load_mapping( + self : Parser, + recv : R, +) -> Unit raise YamlError { + let (key_ev, key_mark) = self.next_token() + let mut key_ev = key_ev + let mut key_mark = key_mark + while key_ev != Event::MappingEnd { + // key + self.load_node(key_ev, key_mark, recv) + // value + let (ev, mark) = self.next_token() + self.load_node(ev, mark, recv) + + // next event + let (ev, mark) = self.next_token() + key_ev = ev + key_mark = mark + } + recv.on_event(key_ev, key_mark) +} + +///| +/// Skip the next token from the scanner. +fn Parser::skip(self : Parser) -> Unit { + self.token = None +} + +///| +fn Parser::state_machine(self : Parser) -> (Event, Marker) raise YamlError { + match self.state { + FlowMappingEmptyValue => self.flow_mapping_value(true) + FlowMappingValue => self.flow_mapping_value(false) + FlowMappingKey => self.flow_mapping_key(false) + FlowMappingFirstKey => self.flow_mapping_key(true) + FlowSequenceEntryMappingEnd => self.flow_sequence_entry_mapping_end() + FlowSequenceEntryMappingValue => self.flow_sequence_entry_mapping_value() + FlowSequenceEntryMappingKey => self.flow_sequence_entry_mapping_key() + FlowSequenceEntry => self.flow_sequence_entry(false) + FlowSequenceFirstEntry => self.flow_sequence_entry(true) + BlockMappingValue => self.block_mapping_value() + BlockMappingKey => self.block_mapping_key(false) + BlockMappingFirstKey => self.block_mapping_key(true) + IndentlessSequenceEntry => self.indentless_sequence_entry() + BlockSequenceEntry => self.block_sequence_entry(false) + BlockSequenceFirstEntry => self.block_sequence_entry(true) + BlockNode => self.parse_node(true, false) + DocumentEnd => self.document_end() + DocumentContent => self.document_content() + DocumentStart => self.document_start(false) + ImplicitDocumentStart => self.document_start(true) + StreamStart => self.stream_start() + // impossible case + End => panic() + } +} + +///| +fn Parser::flow_mapping_key( + self : Parser, + first : Bool, +) -> (Event, Marker) raise YamlError { + if first { + ignore(self.peek_token()) + self.skip() + } + let marker = match self.peek_token() { + { marker, token_type: TokenType::FlowMappingEnd } => marker + { marker, token_type: _ } => { + if !first { + match self.peek_token() { + { marker: _, token_type: TokenType::FlowEntry } => self.skip() + { marker, token_type: _ } => + raise YamlError::YamlError( + mark=marker, + info="while parsing a flow mapping, did not find expected ',' or '}'", + ) + } + } + match self.peek_token() { + { marker: _, token_type: TokenType::Key } => { + self.skip() + if self.peek_token() + is { + marker, + token_type: TokenType::Value + | TokenType::FlowEntry + | TokenType::FlowMappingEnd, + } { + self.state = State::FlowMappingValue + return (Event::empty_scalar(), marker) + } + self.push_state(State::FlowMappingValue) + return self.parse_node(false, false) + } + { marker: _, token_type: TokenType::Value } => { + self.state = State::FlowMappingValue + return (Event::empty_scalar(), marker) + } + { marker: _, token_type: TokenType::FlowMappingEnd } => () + _ => { + self.push_state(State::FlowMappingEmptyValue) + return self.parse_node(false, false) + } + } + marker + } + } + self.pop_state() + self.skip() + (Event::MappingEnd, marker) +} + +///| +fn Parser::flow_mapping_value( + self : Parser, + empty : Bool, +) -> (Event, Marker) raise YamlError { + let marker = { + if empty { + let { marker, token_type: _ } = self.peek_token() + self.state = State::FlowMappingKey + return (Event::empty_scalar(), marker) + } + match self.peek_token() { + { marker, token_type: TokenType::Value } => { + self.skip() + match self.peek_token().token_type { + TokenType::FlowEntry | TokenType::FlowMappingEnd => () + _ => { + self.push_state(State::FlowMappingKey) + return self.parse_node(false, false) + } + } + marker + } + { marker, token_type: _ } => marker + } + } + self.state = State::FlowMappingKey + (Event::empty_scalar(), marker) +} + +///| +fn Parser::block_mapping_key( + self : Parser, + first : Bool, +) -> (Event, Marker) raise YamlError { + // skip BlockMappingStart + if first { + ignore(self.peek_token()) + self.skip() + } + match self.peek_token() { + { marker: _, token_type: TokenType::Key } => { + self.skip() + if self.peek_token() + is { + marker, + token_type: TokenType::Key + | TokenType::Value + | TokenType::BlockEnd, + } { + self.state = State::BlockMappingValue + (Event::empty_scalar(), marker) + } else { + self.push_state(State::BlockMappingValue) + self.parse_node(true, true) + } + } + // libyaml failed to parse spec 1.2, ex8.18 + { marker, token_type: TokenType::Value } => { + self.state = State::BlockMappingValue + (Event::empty_scalar(), marker) + } + { marker, token_type: TokenType::BlockEnd } => { + self.pop_state() + self.skip() + (Event::MappingEnd, marker) + } + { marker, token_type: _ } => + raise YamlError::YamlError( + mark=marker, + info="while parsing a block mapping, did not find expected key", + ) + } +} + +///| +fn Parser::block_mapping_value( + self : Parser, +) -> (Event, Marker) raise YamlError { + match self.peek_token() { + { marker: _, token_type: TokenType::Value } => { + self.skip() + if self.peek_token() + is { + marker, + token_type: TokenType::Key + | TokenType::Value + | TokenType::BlockEnd, + } { + self.state = State::BlockMappingKey + (Event::empty_scalar(), marker) + } else { + self.push_state(State::BlockMappingKey) + self.parse_node(true, true) + } + } + { marker, token_type: _ } => { + self.state = State::BlockMappingKey + (Event::empty_scalar(), marker) + } + } +} + +///| +fn Parser::flow_sequence_entry( + self : Parser, + first : Bool, +) -> (Event, Marker) raise YamlError { + // skip FlowMappingStart + if first { + ignore(self.peek_token()) + self.skip() + } + match self.peek_token() { + { marker, token_type: TokenType::FlowSequenceEnd } => { + self.pop_state() + self.skip() + return (Event::SequenceEnd, marker) + } + { marker: _, token_type: TokenType::FlowEntry } if !first => self.skip() + { marker, token_type: _ } if !first => + raise YamlError::YamlError( + mark=marker, + info="while parsing a flow sequence, expected ',' or ']'", + ) + _ => () + } + match self.peek_token() { + { marker, token_type: TokenType::FlowSequenceEnd } => { + self.pop_state() + self.skip() + (Event::SequenceEnd, marker) + } + { marker, token_type: TokenType::Key } => { + self.state = State::FlowSequenceEntryMappingKey + self.skip() + (Event::MappingStart(id=0, tag=None), marker) + } + _ => { + self.push_state(State::FlowSequenceEntry) + self.parse_node(false, false) + } + } +} + +///| +fn Parser::flow_sequence_entry_mapping_key( + self : Parser, +) -> (Event, Marker) raise YamlError { + if self.peek_token() + is { + marker, + token_type: TokenType::Value + | TokenType::FlowEntry + | TokenType::FlowSequenceEnd, + } { + self.skip() + self.state = State::FlowSequenceEntryMappingValue + (Event::empty_scalar(), marker) + } else { + self.push_state(State::FlowSequenceEntryMappingValue) + self.parse_node(false, false) + } +} + +///| +fn Parser::flow_sequence_entry_mapping_value( + self : Parser, +) -> (Event, Marker) raise YamlError { + match self.peek_token() { + { marker: _, token_type: TokenType::Value } => { + self.skip() + self.state = State::FlowSequenceEntryMappingValue + if self.peek_token() + is { + marker, + token_type: TokenType::FlowEntry + | TokenType::FlowSequenceEnd, + } { + self.state = State::FlowSequenceEntryMappingEnd + (Event::empty_scalar(), marker) + } else { + self.push_state(State::FlowSequenceEntryMappingEnd) + self.parse_node(false, false) + } + } + { marker, token_type: _ } => { + self.state = State::FlowSequenceEntryMappingEnd + (Event::empty_scalar(), marker) + } + } +} + +///| +fn Parser::flow_sequence_entry_mapping_end(self : Parser) -> (Event, Marker) { + self.state = State::FlowSequenceEntry + (Event::MappingEnd, self.lexer.get_marker()) +} + +///| +fn Parser::indentless_sequence_entry( + self : Parser, +) -> (Event, Marker) raise YamlError { + match self.peek_token() { + { marker: _, token_type: TokenType::BlockEntry } => () + { marker, token_type: _ } => { + self.pop_state() + return (Event::SequenceEnd, marker) + } + } + self.skip() + if self.peek_token() + is { + marker, + token_type: TokenType::BlockEntry + | TokenType::Key + | TokenType::Value + | TokenType::BlockEnd, + } { + self.state = State::IndentlessSequenceEntry + (Event::SequenceEnd, marker) + } else { + self.push_state(State::IndentlessSequenceEntry) + self.parse_node(true, false) + } +} + +///| +fn Parser::block_sequence_entry( + self : Parser, + first : Bool, +) -> (Event, Marker) raise YamlError { + // BLOCK-SEQUENCE-START + if first { + let _ = self.peek_token() + self.skip() + } + match self.peek_token() { + { marker, token_type: TokenType::BlockEnd } => { + self.pop_state() + self.skip() + (Event::SequenceEnd, marker) + } + { marker: _, token_type: TokenType::BlockEntry } => { + self.skip() + if self.peek_token() + is { marker, token_type: TokenType::BlockEntry | TokenType::BlockEnd } { + self.state = State::BlockSequenceEntry + (Event::empty_scalar(), marker) + } else { + self.push_state(State::BlockSequenceEntry) + self.parse_node(true, false) + } + } + { marker, token_type: _ } => + raise YamlError::YamlError( + mark=marker, + info="while parsing a block collection, did not find expected '-' indicator", + ) + } +} + +///| +fn Parser::document_end(self : Parser) -> (Event, Marker) raise YamlError { + let mut explicit_end = false + let marker = match self.peek_token() { + { marker, token_type: TokenType::DocumentEnd } => { + explicit_end = true + self.skip() + marker + } + { marker, token_type: _ } => marker + } + if !self.keep_tags { + self.tags.clear() + } + if explicit_end { + self.state = State::ImplicitDocumentStart + } else { + if self.peek_token() + is { + marker, + token_type: TokenType::VersionDirective(..) + | TokenType::TagDirective(..), + } { + raise YamlError::YamlError( + mark=marker, + info="missing explicit document end marker before directive", + ) + } + self.state = State::DocumentStart + } + (Event::DocumentEnd, marker) +} + +///| +fn Parser::document_content(self : Parser) -> (Event, Marker) raise YamlError { + match self.peek_token() { + { + marker, + token_type: TokenType::VersionDirective(..) + | TokenType::TagDirective(..) + | TokenType::DocumentStart + | TokenType::DocumentEnd + | TokenType::StreamEnd, + } => { + self.pop_state() + // empty scalar + (Event::empty_scalar(), marker) + } + _ => self.parse_node(true, false) + } +} + +///| +fn Parser::parse_node( + self : Parser, + block : Bool, + indentless_sequence : Bool, +) -> (Event, Marker) raise YamlError { + let mut anchor_id = 0 + let mut tag = None + match self.peek_token() { + { marker: _, token_type: TokenType::Alias(_) } => { + self.pop_state() + if self.fetch_token() is { marker, token_type: TokenType::Alias(name) } { + match self.anchors.get(name) { + None => + raise YamlError::YamlError( + mark=marker, + info="while parsing node, found unknown anchor", + ) + Some(id) => return (Event::Alias(id~), marker) + } + } + panic() + } + { marker: _, token_type: TokenType::Anchor(_) } => + if self.fetch_token() is { marker, token_type: TokenType::Anchor(name) } { + anchor_id = self.register_anchor(name, marker) + if self.peek_token().token_type is TokenType::Tag(..) { + if self.fetch_token().token_type is TokenType::Tag(handle~, suffix~) { + tag = Some(self.resolve_tag(marker, handle, suffix)) + } else { + panic() + } + } + } else { + panic() + } + { marker, token_type: TokenType::Tag(..) } => + if self.fetch_token().token_type is TokenType::Tag(handle~, suffix~) { + tag = Some(self.resolve_tag(marker, handle, suffix)) + if self.peek_token().token_type is TokenType::Anchor(_) { + if self.fetch_token() + is { marker, token_type: TokenType::Anchor(name) } { + anchor_id = self.register_anchor(name, marker) + } else { + panic() + } + } + } else { + panic() + } + _ => () + } + match self.peek_token() { + { marker, token_type: TokenType::BlockEntry } if indentless_sequence => { + self.state = State::IndentlessSequenceEntry + (Event::SequenceStart(id=anchor_id, tag~), marker) + } + { marker: _, token_type: TokenType::Scalar(_) } => { + self.pop_state() + if self.fetch_token() + is { marker, token_type: TokenType::Scalar(style, value) } { + (Event::Scalar(value~, style~, id=anchor_id, tag~), marker) + } else { + panic() + } + } + { marker, token_type: TokenType::FlowSequenceStart } => { + self.state = State::FlowSequenceFirstEntry + (Event::SequenceStart(id=anchor_id, tag~), marker) + } + { marker, token_type: TokenType::FlowMappingStart } => { + self.state = State::FlowMappingFirstKey + (Event::MappingStart(id=anchor_id, tag~), marker) + } + { marker, token_type: TokenType::BlockSequenceStart } if block => { + self.state = State::BlockSequenceFirstEntry + (Event::SequenceStart(id=anchor_id, tag~), marker) + } + { marker, token_type: TokenType::BlockMappingStart } if block => { + self.state = State::BlockMappingFirstKey + (Event::MappingStart(id=anchor_id, tag~), marker) + } + // ex 7.2, an empty scalar can follow a secondary tag + { marker, token_type: _ } if tag is Some(_) || anchor_id > 0 => { + self.pop_state() + (Event::empty_scalar_with_anchor(anchor_id, tag), marker) + } + { marker, token_type: _ } => + raise YamlError::YamlError( + mark=marker, + info="while parsing a node, did not find expected node content", + ) + } +} + +///| +/// Resolve a tag from the handle and the suffix. +fn Parser::resolve_tag( + self : Parser, + mark : Marker, + handle : String, + suffix : String, +) -> Tag raise YamlError { + if handle == "!!" { + // "!!" is a shorthand for "tag:yaml.org,2002:". However, that default can be + // overridden. + match self.tags.get("!!") { + Some(prefix) => Tag::{ handle: prefix, suffix } + None => Tag::{ handle: "tag:yaml.org,2002:", suffix } + } + } else if handle.is_empty() && suffix == "!" { + // "!" introduces a local tag. Local tags may have their prefix overridden. + match self.tags.get("") { + Some(prefix) => Tag::{ handle: prefix, suffix } + None => Tag::{ handle: "", suffix } + } + } else { + // Lookup handle in our tag directives. + let prefix = self.tags.get(handle) + if prefix is Some(prefix) { + Tag::{ handle: prefix, suffix } + // Otherwise, it may be a local handle. With a local handle, the handle is set to + // "!" and the suffix to whatever follows it ("!foo" -> ("!", "foo")). + // If the handle is of the form "!foo!", this cannot be a local handle and we need + // to error. + } else if handle.length() >= 2 && + handle.has_prefix("!") && + handle.has_suffix("!") { + raise YamlError::YamlError(mark~, info="the handle wasn't declared") + } else { + Tag::{ handle, suffix } + } + } +} + +///| +fn Parser::register_anchor(self : Parser, name : String, _ : Marker) -> Int { + // anchors can be overridden/reused + let new_id = self.anchor_id + self.anchor_id += 1 + self.anchors[name] = new_id + new_id +} + +///| +fn Parser::fetch_token(self : Parser) -> Token { + let token = self.token + self.token = None + token.unwrap_or_else(() => { + println("fetch_token needs to be preceded by peek_token") + panic() + }) +} + +///| +fn Parser::pop_state(self : Parser) -> Unit { + self.state = self.states.pop().unwrap() +} + +///| +fn Parser::document_start( + self : Parser, + implicit : Bool, +) -> (Event, Marker) raise YamlError { + while self.peek_token().token_type is TokenType::DocumentEnd { + self.skip() + } + match self.peek_token() { + { marker, token_type: TokenType::StreamEnd } => { + self.state = State::End + self.skip() + (Event::StreamEnd, marker) + } + { + marker: _, + token_type: TokenType::VersionDirective(_) + | TokenType::TagDirective(_) + | TokenType::DocumentStart, + } => + // explicit document + self.explicit_document_start() + { marker, .. } if implicit => { + self.parser_process_directives() + self.push_state(State::DocumentEnd) + self.state = State::BlockNode + (Event::DocumentStart, marker) + } + _ => self.explicit_document_start() + } +} + +///| +fn Parser::push_state(self : Parser, state : State) -> Unit { + self.states.push(state) +} + +///| +fn Parser::explicit_document_start( + self : Parser, +) -> (Event, Marker) raise YamlError { + self.parser_process_directives() + match self.peek_token() { + { marker, token_type: TokenType::DocumentStart } => { + self.push_state(State::DocumentEnd) + self.state = State::DocumentContent + self.skip() + (Event::DocumentStart, marker) + } + { marker, .. } => + raise YamlError::YamlError( + mark=marker, + info="did not find expected ", + ) + } +} + +///| +fn Parser::parser_process_directives(self : Parser) -> Unit raise YamlError { + let mut version_directive_received = false + while true { + let tags = @hashmap.HashMap::new() + match self.peek_token() { + { marker, token_type: TokenType::VersionDirective(..) } => { + // XXX parsing with warning according to spec + if version_directive_received { + raise YamlError::YamlError( + mark=marker, + info="duplicate version directive", + ) + } + version_directive_received = true + } + { marker, token_type: TokenType::TagDirective(handle~, prefix~) } => { + if tags.contains(handle) { + raise YamlError::YamlError( + mark=marker, + info="the TAG directive must only be given at most once per handle in the same document", + ) + } + tags[handle] = prefix + } + _ => break + } + self.tags = tags + self.skip() + } +} + +///| +fn Parser::stream_start(self : Parser) -> (Event, Marker) raise YamlError { + match self.peek_token() { + { marker, token_type: TokenType::StreamStart } => { + self.state = State::ImplicitDocumentStart + self.skip() + (Event::StreamStart, marker) + } + { marker, .. } => + raise YamlError::YamlError( + mark=marker, + info="did not find expected ", + ) + } +} diff --git a/src/pkg.generated.mbti b/src/pkg.generated.mbti index 651fe8f..5cb2bb2 100644 --- a/src/pkg.generated.mbti +++ b/src/pkg.generated.mbti @@ -4,41 +4,39 @@ package "myfreess/yaml" // Values // Errors -pub suberror LexError { - LexError(mark~ : Marker, info~ : String) +pub suberror YamlError { + YamlError(mark~ : Marker, info~ : String) } -impl Show for LexError +impl Show for YamlError // Types and methods -pub enum Chomping { - Strip - Clip - Keep +pub enum Event { + StreamStart + StreamEnd + DocumentStart + DocumentEnd + Alias(id~ : Int) + Scalar(value~ : String, style~ : TScalarStyle, id~ : Int, tag~ : Tag?) + SequenceStart(id~ : Int, tag~ : Tag?) + SequenceEnd + MappingStart(id~ : Int, tag~ : Tag?) + MappingEnd } -impl Eq for Chomping - -type Lexer -fn Lexer::fetch_more_tokens(Self) -> Unit raise LexError -fn Lexer::fetch_next_token(Self) -> Unit raise LexError -fn Lexer::new(StringView) -> Self -fn Lexer::next(Self) -> Token? -fn Lexer::next_token(Self) -> Token? raise LexError +impl Eq for Event +impl Show for Event pub struct Marker { index : Int line : Int col : Int } +impl Eq for Marker impl Show for Marker -type SimpleKey - -pub enum SkipTabs { - Yes - No - Result(encountered_tab~ : Bool, has_onemore_whitespace~ : Bool) -} -impl Eq for SkipTabs +type Parser +fn Parser::keep_tags(Self, Bool) -> Unit +fn[R : MarkedEventReceiver] Parser::load(Self, R, Bool) -> Unit raise YamlError +fn Parser::new(StringView) -> Self pub enum TScalarStyle { Plain @@ -50,53 +48,37 @@ pub enum TScalarStyle { impl Eq for TScalarStyle impl Show for TScalarStyle -pub struct Token { - marker : Marker - token_type : TokenType -} -impl Show for Token - -pub enum TokenType { - StreamStart - StreamEnd - VersionDirective(major~ : Int, minor~ : Int) - TagDirective(handle~ : String, prefix~ : String) - DocumentStart - DocumentEnd - BlockSequenceStart - BlockMappingStart - BlockEnd - FlowSequenceStart - FlowSequenceEnd - FlowMappingStart - FlowMappingEnd - BlockEntry - FlowEntry - Key - Value - Alias(String) - Anchor(String) - Tag(handle~ : String, suffix~ : String) - Scalar(TScalarStyle, String) +pub struct Tag { + handle : String + suffix : String } -impl Eq for TokenType -impl Show for TokenType +impl Eq for Tag +impl Show for Tag pub(all) enum Yaml { - Real(String) + Real(Double, repr~ : String) Integer(Int64) String(String) Boolean(Bool) Array(Array[Yaml]) Map(Map[String, Yaml]) - Alias(UInt) Null BadValue } +fn Yaml::load_from_parser(Parser) -> Array[Self] raise YamlError +fn Yaml::load_from_string(String) -> Array[Self] raise YamlError impl Eq for Yaml impl Show for Yaml +impl ToJson for Yaml // Type aliases // Traits +pub trait EventReceiver { + on_event(Self, Event) -> Unit +} + +pub trait MarkedEventReceiver { + on_event(Self, Event, Marker) -> Unit +} diff --git a/src/scalar_style.mbt b/src/scalar_style.mbt new file mode 100644 index 0000000..564b3eb --- /dev/null +++ b/src/scalar_style.mbt @@ -0,0 +1,13 @@ +///| +pub enum TScalarStyle { + /// A YAML plain scalar. + Plain + /// A YAML single quoted scalar. + SingleQuoted + /// A YAML double quoted scalar. + DoubleQuoted + /// A YAML literal block (`|` block). + Literal + /// A YAML folded block (`>` block). + Folded +} derive(Eq, Show) diff --git a/src/tag.mbt b/src/tag.mbt new file mode 100644 index 0000000..36087b9 --- /dev/null +++ b/src/tag.mbt @@ -0,0 +1,7 @@ +///| +pub struct Tag { + /// Handle of the tag (`!` included). + handle : String + /// The suffix of the tag. + suffix : String +} derive(Eq, Show) diff --git a/src/yaml.mbt b/src/yaml.mbt index f4e1b2a..06f67ba 100644 --- a/src/yaml.mbt +++ b/src/yaml.mbt @@ -1,9 +1,8 @@ ///| /// YAML data structure representation pub(all) enum Yaml { - /// Float types are stored as String and parsed on demand. - /// Note that `Double` can be problematic for equality comparisons. - Real(String) + /// Float types + Real(Double, repr~ : String) /// YAML int is stored as Int64. Integer(Int64) /// YAML scalar. @@ -18,8 +17,6 @@ pub(all) enum Yaml { /// /// Note that YAML keys can be of any type, but restrict them to String here for simplicity. Map(Map[String, Yaml]) - /// Alias, not fully supported yet. - Alias(UInt) /// YAML null, e.g. `null` or `~`. Null /// Accessing a nonexistent node via indexing returns `BadValue`. This @@ -27,3 +24,239 @@ pub(all) enum Yaml { /// returns `BadValue`. BadValue } derive(Eq, Show) + +///| +pub impl ToJson for Yaml with to_json(self) { + // assume input not contain complicate yaml feature + match self { + Null => Json::null() + Map(yaml_map) => Json::object(yaml_map.map((_, yaml) => yaml.to_json())) + Array(yaml_array) => Json::array(yaml_array.map(yaml => yaml.to_json())) + Boolean(bool) => Json::boolean(bool) + String(str) => Json::string(str) + Integer(i64) => Json::number(i64.to_double()) + Real(f64, repr~) => Json::number(f64, repr~) + BadValue => panic() + } +} + +///| +fn Yaml::from_str(v : String) -> Yaml { + match v { + ['0', 'x', .. number] if (try? @strconv.parse_int64(number, base=16)) + is Ok(i) => Yaml::Integer(i) + ['0', 'o', .. number] if (try? @strconv.parse_int64(number, base=8)) + is Ok(i) => Yaml::Integer(i) + ['+', .. number] if (try? @strconv.parse_int64(number)) is Ok(i) => + Yaml::Integer(i) + "" | "~" | "null" => Yaml::Null + "true" | "True" | "TRUE" => Yaml::Boolean(true) + "false" | "False" | "FALSE" => Yaml::Boolean(false) + v => + if (try? @strconv.parse_int64(v)) is Ok(integer) { + Yaml::Integer(integer) + } else if parse_double(v) is Some(d) { + Yaml::Real(d, repr=v) + } else { + Yaml::String(v) + } + } +} + +///| +test "simple string to yaml" { + inspect(Yaml::from_str("42"), content="Integer(42)") + inspect(Yaml::from_str("0x2A"), content="Integer(42)") + inspect(Yaml::from_str("0o52"), content="Integer(42)") + inspect(Yaml::from_str("~"), content="Null") + inspect(Yaml::from_str("true"), content="Boolean(true)") + inspect(Yaml::from_str("True"), content="Boolean(true)") + inspect(Yaml::from_str("TRUE"), content="Boolean(true)") + inspect(Yaml::from_str("false"), content="Boolean(false)") + inspect(Yaml::from_str("False"), content="Boolean(false)") + inspect(Yaml::from_str("FALSE"), content="Boolean(false)") + assert_true(Yaml::from_str("3.14") is Yaml::Real(_)) + assert_true(Yaml::from_str("hello") is Yaml::String(_)) +} + +///| +fn parse_double(v : StringView) -> Double? { + match v { + ".inf" | ".Inf" | ".INF" | "+.inf" | "+.Inf" | "+.INF" => + Some(@double.infinity) + "-.inf" | "-.Inf" | "-.INF" => Some(@double.neg_infinity) + ".nan" | ".NaN" | ".NAN" => Some(@double.not_a_number) + v if v.iter().any(ch => ch.is_ascii_digit()) => + Some(@strconv.parse_double(v)) catch { + @strconv.StrConvError(_) => None + } + _ => None + } +} + +///| +priv struct YamlLoader { + /// The different YAML documents that are loaded. + docs : Array[Yaml] + /// stack of (current node, anchor_id) + doc_stack : Array[(Yaml, Int)] + key_stack : Array[String] + anchor_map : @sorted_map.SortedMap[Int, Yaml] + mut error : YamlError? +} + +///| +pub fn Yaml::load_from_parser(parser : Parser) -> Array[Yaml] raise YamlError { + let loader = YamlLoader::{ + docs: [], + doc_stack: [], + key_stack: [], + anchor_map: @sorted_map.new(), + error: None, + } + parser.load(loader, true) + if loader.error is Some(err) { + raise err + } else { + loader.docs + } +} + +///| +pub fn Yaml::load_from_string(source : String) -> Array[Yaml] raise YamlError { + let parser = Parser::new(source) + Yaml::load_from_parser(parser) +} + +///| +impl MarkedEventReceiver for YamlLoader with on_event(self, ev, mark) { + if self.error is Some(_) { + return + } + self.on_event_impl(ev, mark) catch { + YamlError(_) as e => self.error = Some(e) + } +} + +///| +fn YamlLoader::on_event_impl( + self : YamlLoader, + ev : Event, + mark : Marker, +) -> Unit raise YamlError { + match ev { + Event::DocumentStart | Event::StreamStart | Event::StreamEnd => () // do nothing + Event::DocumentEnd => + match self.doc_stack.length() { + // empty document + 0 => self.docs.push(Yaml::BadValue) + 1 => self.docs.push(self.doc_stack.pop().unwrap().0) + _ => panic() + } + Event::SequenceStart(id~, ..) => self.doc_stack.push((Yaml::Array([]), id)) + Event::SequenceEnd => { + let node = self.doc_stack.pop().unwrap() + self.insert_new_node(node, mark) + } + Event::MappingStart(id~, ..) => { + self.doc_stack.push((Yaml::Map({}), id)) + self.key_stack.push("") // place holder + } + Event::MappingEnd => { + guard self.key_stack.pop() is Some(_) + let node = self.doc_stack.pop().unwrap() + self.insert_new_node(node, mark) + } + Event::Scalar(value~, style~, id~, tag~) => { + let node = if style != TScalarStyle::Plain { + Yaml::String(value) + } else if tag is Some({ handle, suffix }) { + if handle == "tag:yaml.org,2002:" { + match suffix { + "bool" => + match value { + "true" | "True" | "TRUE" => Yaml::Boolean(true) + "false" | "False" | "FALSE" => Yaml::Boolean(false) + _ => Yaml::BadValue + } + "int" => + Yaml::Integer(@strconv.parse_int64(value)) catch { + @strconv.StrConvError(_) => Yaml::BadValue + } + "float" => + match parse_double(value) { + Some(d) => Yaml::Real(d, repr=value) + None => Yaml::BadValue + } + "null" => + match value { + "~" | "null" => Yaml::Null + _ => Yaml::BadValue + } + _ => Yaml::String(value) + } + } else { + Yaml::String(value) + } + } else { + // Datatype is not specified, or unrecognized + Yaml::from_str(value) + } + self.insert_new_node((node, id), mark) + } + Event::Alias(id~) => { + let n = match self.anchor_map.get(id) { + Some(v) => v + None => Yaml::BadValue + } + self.insert_new_node((n, 0), mark) + } + } +} + +///| +fn YamlLoader::insert_new_node( + self : YamlLoader, + node : (Yaml, Int), + mark : Marker, +) -> Unit raise YamlError { + // valid anchor id starts from 1 + if node.1 > 0 { + self.anchor_map[node.1] = node.0 + } + if self.doc_stack.is_empty() { + self.doc_stack.push(node) + } else { + let parent = self.doc_stack.last().unwrap() + match parent { + (Array(v), _) => v.push(node.0) + (Map(m), _) => { + let cur_key = self.key_stack.last().unwrap() + if cur_key.is_empty() { + // current node is a key + if node.0 is String(key) { + self.key_stack[self.key_stack.length() - 1] = key + } else { + raise YamlError::YamlError( + mark~, + info="yaml.mbt doesn't support non-string key", + ) + } + } else { + // current node is a value + let new_key = cur_key + self.key_stack[self.key_stack.length() - 1] = "" + if m.contains(new_key) { + raise YamlError::YamlError( + mark~, + info="\{new_key}: duplicated key in mapping", + ) + } else { + m[new_key] = node.0 + } + } + } + _ => panic() + } + } +}