@@ -43,6 +43,14 @@ typedef struct {
4343 size_t mark_index ;
4444} psych_fy_parser_t ;
4545
46+ static const struct fy_parse_cfg psych_parse_cfg = {
47+ /* Keep libfyaml's strict YAML 1.2 flow-indentation checks. This backend
48+ * exists to follow the 1.2 spec, so we reject malformed flow indentation
49+ * (e.g. wrongly indented flow sequences) rather than relaxing to libyaml's
50+ * 1.1-era leniency with FYPCF_SLOPPY_FLOW_INDENTATION. */
51+ .flags = FYPCF_QUIET | FYPCF_DEFAULT_VERSION_AUTO ,
52+ };
53+
4654static ssize_t io_reader (void * user , void * buf , size_t count )
4755{
4856 VALUE io = (VALUE )user ;
@@ -84,30 +92,40 @@ static VALUE allocate(VALUE klass)
8492 psych_fy_parser_t * parser ;
8593 VALUE obj = TypedData_Make_Struct (klass , psych_fy_parser_t , & psych_parser_type , parser );
8694
87- static const struct fy_parse_cfg cfg = {
88- .flags = FYPCF_QUIET | FYPCF_COLLECT_DIAG | FYPCF_DEFAULT_VERSION_AUTO ,
89- };
90- parser -> fyp = fy_parser_create (& cfg );
91- if (!parser -> fyp ) {
92- rb_raise (rb_eNoMemError , "could not create libfyaml parser" );
93- }
95+ parser -> fyp = NULL ;
9496
9597 return obj ;
9698}
9799
98- /* TODO: libfyaml's diagnostics are collected via fy_diag; reconstructing the
99- * libyaml-style problem/context/offset is left for a later pass. For now we
100- * raise a Psych::SyntaxError with the best-effort mark we tracked . */
100+ /* Reconstruct a Psych::SyntaxError from libfyaml's collected diagnostics. The
101+ * parser is created with FYPCF_COLLECT_DIAG, so the first collected error gives
102+ * us the message and position . */
101103static VALUE make_exception (psych_fy_parser_t * parser , VALUE path )
102104{
103105 VALUE ePsychSyntaxError = rb_const_get (mPsych , rb_intern ("SyntaxError" ));
106+ VALUE problem = Qnil ;
107+ size_t line = parser -> mark_line ;
108+ size_t column = parser -> mark_column ;
109+
110+ struct fy_diag * diag = fy_parser_get_diag (parser -> fyp );
111+ if (diag ) {
112+ void * iter = NULL ;
113+ struct fy_diag_error * err = fy_diag_errors_iterate (diag , & iter );
114+ if (err ) {
115+ if (err -> msg ) problem = rb_usascii_str_new2 (err -> msg );
116+ if (err -> line >= 0 ) line = (size_t )err -> line ;
117+ if (err -> column >= 0 ) column = (size_t )err -> column ;
118+ }
119+ fy_diag_unref (diag );
120+ }
121+ if (NIL_P (problem )) problem = rb_usascii_str_new2 ("could not parse YAML" );
104122
105123 return rb_funcall (ePsychSyntaxError , rb_intern ("new" ), 6 ,
106124 path ,
107- SIZET2NUM (parser -> mark_line + 1 ),
108- SIZET2NUM (parser -> mark_column + 1 ),
125+ SIZET2NUM (line ),
126+ SIZET2NUM (column ),
109127 SIZET2NUM (parser -> mark_index ),
110- rb_usascii_str_new2 ( "could not parse YAML" ) ,
128+ problem ,
111129 Qnil );
112130}
113131
@@ -245,9 +263,31 @@ static VALUE parse(VALUE self, VALUE handler, VALUE yaml, VALUE path)
245263
246264 TypedData_Get_Struct (self , psych_fy_parser_t , & psych_parser_type , parser );
247265
248- fy_parser_reset (parser -> fyp );
266+ /* Use a pristine parser for each parse, like fy-tool does. Reusing a
267+ * parser across documents via fy_parser_reset() left the default tag
268+ * handles unset for bare (no "---") tag-led documents. */
269+ if (parser -> fyp ) {
270+ fy_parser_destroy (parser -> fyp );
271+ parser -> fyp = NULL ;
272+ }
273+ parser -> fyp = fy_parser_create (& psych_parse_cfg );
274+ if (!parser -> fyp ) {
275+ rb_raise (rb_eNoMemError , "could not create libfyaml parser" );
276+ }
249277 parser -> mark_line = parser -> mark_column = parser -> mark_index = 0 ;
250278
279+ /* Make the parser's own diagnostic object collect errors instead of
280+ * printing them to stderr, so make_exception() can recover the message.
281+ * Replacing the diag with a freshly created one crashes libfyaml 0.9.6,
282+ * so mutate the existing default diag in place. */
283+ {
284+ struct fy_diag * diag = fy_parser_get_diag (parser -> fyp );
285+ if (diag ) {
286+ fy_diag_set_collect_errors (diag , true);
287+ fy_diag_unref (diag );
288+ }
289+ }
290+
251291 if (rb_respond_to (yaml , id_read )) {
252292 if (fy_parser_set_input_callback (parser -> fyp , (void * )yaml , io_reader ) != 0 ) {
253293 rb_raise (rb_eRuntimeError , "could not set libfyaml input" );
@@ -315,8 +355,12 @@ static VALUE parse(VALUE self, VALUE handler, VALUE yaml, VALUE path)
315355 void * iter = NULL ;
316356 const struct fy_tag * tag ;
317357 while ((tag = fy_document_state_tag_directive_iterate (ds , & iter )) != NULL ) {
318- /* skip the implicit defaults ("!" and "!!") */
319- if (tag -> handle && tag -> prefix ) {
358+ /* skip the implicit defaults ("!", "!!" and the empty
359+ * primary handle libfyaml reports) */
360+ if (!tag -> handle || tag -> handle [0 ] == '\0' ) {
361+ continue ;
362+ }
363+ if (tag -> prefix ) {
320364 if ((strcmp (tag -> handle , "!" ) == 0 && strcmp (tag -> prefix , "!" ) == 0 ) ||
321365 (strcmp (tag -> handle , "!!" ) == 0 &&
322366 strcmp (tag -> prefix , "tag:yaml.org,2002:" ) == 0 )) {
0 commit comments