Skip to content

Commit d383361

Browse files
hsbtclaude
andcommitted
Improve libfyaml parser fidelity and error reporting
Create a fresh parser per parse instead of reusing one via fy_parser_reset(), which left default tag handles unset and rejected bare ("---"-less) tag-led documents. Recover the real message and position by switching the parser's own diagnostic object to collect mode; creating a replacement diag crashes libfyaml 0.9.6. Drop the spurious empty tag directive libfyaml reports. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
1 parent b374869 commit d383361

1 file changed

Lines changed: 60 additions & 16 deletions

File tree

ext/psych/psych_parser_fy.c

Lines changed: 60 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,14 @@ typedef struct {
4343
size_t mark_index;
4444
} psych_fy_parser_t;
4545

46+
static const struct fy_parse_cfg psych_parse_cfg = {
47+
/* Keep libfyaml's strict YAML 1.2 flow-indentation checks. This backend
48+
* exists to follow the 1.2 spec, so we reject malformed flow indentation
49+
* (e.g. wrongly indented flow sequences) rather than relaxing to libyaml's
50+
* 1.1-era leniency with FYPCF_SLOPPY_FLOW_INDENTATION. */
51+
.flags = FYPCF_QUIET | FYPCF_DEFAULT_VERSION_AUTO,
52+
};
53+
4654
static ssize_t io_reader(void *user, void *buf, size_t count)
4755
{
4856
VALUE io = (VALUE)user;
@@ -84,30 +92,40 @@ static VALUE allocate(VALUE klass)
8492
psych_fy_parser_t *parser;
8593
VALUE obj = TypedData_Make_Struct(klass, psych_fy_parser_t, &psych_parser_type, parser);
8694

87-
static const struct fy_parse_cfg cfg = {
88-
.flags = FYPCF_QUIET | FYPCF_COLLECT_DIAG | FYPCF_DEFAULT_VERSION_AUTO,
89-
};
90-
parser->fyp = fy_parser_create(&cfg);
91-
if (!parser->fyp) {
92-
rb_raise(rb_eNoMemError, "could not create libfyaml parser");
93-
}
95+
parser->fyp = NULL;
9496

9597
return obj;
9698
}
9799

98-
/* TODO: libfyaml's diagnostics are collected via fy_diag; reconstructing the
99-
* libyaml-style problem/context/offset is left for a later pass. For now we
100-
* raise a Psych::SyntaxError with the best-effort mark we tracked. */
100+
/* Reconstruct a Psych::SyntaxError from libfyaml's collected diagnostics. The
101+
* parser is created with FYPCF_COLLECT_DIAG, so the first collected error gives
102+
* us the message and position. */
101103
static VALUE make_exception(psych_fy_parser_t *parser, VALUE path)
102104
{
103105
VALUE ePsychSyntaxError = rb_const_get(mPsych, rb_intern("SyntaxError"));
106+
VALUE problem = Qnil;
107+
size_t line = parser->mark_line;
108+
size_t column = parser->mark_column;
109+
110+
struct fy_diag *diag = fy_parser_get_diag(parser->fyp);
111+
if (diag) {
112+
void *iter = NULL;
113+
struct fy_diag_error *err = fy_diag_errors_iterate(diag, &iter);
114+
if (err) {
115+
if (err->msg) problem = rb_usascii_str_new2(err->msg);
116+
if (err->line >= 0) line = (size_t)err->line;
117+
if (err->column >= 0) column = (size_t)err->column;
118+
}
119+
fy_diag_unref(diag);
120+
}
121+
if (NIL_P(problem)) problem = rb_usascii_str_new2("could not parse YAML");
104122

105123
return rb_funcall(ePsychSyntaxError, rb_intern("new"), 6,
106124
path,
107-
SIZET2NUM(parser->mark_line + 1),
108-
SIZET2NUM(parser->mark_column + 1),
125+
SIZET2NUM(line),
126+
SIZET2NUM(column),
109127
SIZET2NUM(parser->mark_index),
110-
rb_usascii_str_new2("could not parse YAML"),
128+
problem,
111129
Qnil);
112130
}
113131

@@ -245,9 +263,31 @@ static VALUE parse(VALUE self, VALUE handler, VALUE yaml, VALUE path)
245263

246264
TypedData_Get_Struct(self, psych_fy_parser_t, &psych_parser_type, parser);
247265

248-
fy_parser_reset(parser->fyp);
266+
/* Use a pristine parser for each parse, like fy-tool does. Reusing a
267+
* parser across documents via fy_parser_reset() left the default tag
268+
* handles unset for bare (no "---") tag-led documents. */
269+
if (parser->fyp) {
270+
fy_parser_destroy(parser->fyp);
271+
parser->fyp = NULL;
272+
}
273+
parser->fyp = fy_parser_create(&psych_parse_cfg);
274+
if (!parser->fyp) {
275+
rb_raise(rb_eNoMemError, "could not create libfyaml parser");
276+
}
249277
parser->mark_line = parser->mark_column = parser->mark_index = 0;
250278

279+
/* Make the parser's own diagnostic object collect errors instead of
280+
* printing them to stderr, so make_exception() can recover the message.
281+
* Replacing the diag with a freshly created one crashes libfyaml 0.9.6,
282+
* so mutate the existing default diag in place. */
283+
{
284+
struct fy_diag *diag = fy_parser_get_diag(parser->fyp);
285+
if (diag) {
286+
fy_diag_set_collect_errors(diag, true);
287+
fy_diag_unref(diag);
288+
}
289+
}
290+
251291
if (rb_respond_to(yaml, id_read)) {
252292
if (fy_parser_set_input_callback(parser->fyp, (void *)yaml, io_reader) != 0) {
253293
rb_raise(rb_eRuntimeError, "could not set libfyaml input");
@@ -315,8 +355,12 @@ static VALUE parse(VALUE self, VALUE handler, VALUE yaml, VALUE path)
315355
void *iter = NULL;
316356
const struct fy_tag *tag;
317357
while ((tag = fy_document_state_tag_directive_iterate(ds, &iter)) != NULL) {
318-
/* skip the implicit defaults ("!" and "!!") */
319-
if (tag->handle && tag->prefix) {
358+
/* skip the implicit defaults ("!", "!!" and the empty
359+
* primary handle libfyaml reports) */
360+
if (!tag->handle || tag->handle[0] == '\0') {
361+
continue;
362+
}
363+
if (tag->prefix) {
320364
if ((strcmp(tag->handle, "!") == 0 && strcmp(tag->prefix, "!") == 0) ||
321365
(strcmp(tag->handle, "!!") == 0 &&
322366
strcmp(tag->prefix, "tag:yaml.org,2002:") == 0)) {

0 commit comments

Comments
 (0)