Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PRISM] Parse stdin on CLI with prism #10216

Merged
merged 1 commit into from Mar 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
89 changes: 69 additions & 20 deletions prism_compile.c
Expand Up @@ -8100,7 +8100,7 @@ pm_parse_result_free(pm_parse_result_t *result)
* as well.
*/
static bool
pm_parse_input_error_utf8_p(const pm_parser_t *parser, const pm_location_t *location)
pm_parse_process_error_utf8_p(const pm_parser_t *parser, const pm_location_t *location)
{
const size_t start_line = pm_newline_list_line_column(&parser->newline_list, location->start, 1).line;
const size_t end_line = pm_newline_list_line_column(&parser->newline_list, location->end, 1).line;
Expand All @@ -8122,7 +8122,7 @@ pm_parse_input_error_utf8_p(const pm_parser_t *parser, const pm_location_t *loca
* information as possible about the errors that were encountered.
*/
static VALUE
pm_parse_input_error(const pm_parse_result_t *result)
pm_parse_process_error(const pm_parse_result_t *result)
{
const pm_diagnostic_t *head = (const pm_diagnostic_t *) result->parser.error_list.head;
bool valid_utf8 = true;
Expand All @@ -8140,7 +8140,7 @@ pm_parse_input_error(const pm_parse_result_t *result)
// contain invalid byte sequences. So if any source examples include
// invalid UTF-8 byte sequences, we will skip showing source examples
// entirely.
if (valid_utf8 && !pm_parse_input_error_utf8_p(&result->parser, &error->location)) {
if (valid_utf8 && !pm_parse_process_error_utf8_p(&result->parser, &error->location)) {
valid_utf8 = false;
}
}
Expand Down Expand Up @@ -8172,19 +8172,18 @@ pm_parse_input_error(const pm_parse_result_t *result)
* result object is zeroed out.
*/
static VALUE
pm_parse_input(pm_parse_result_t *result, VALUE filepath)
pm_parse_process(pm_parse_result_t *result, pm_node_t *node)
{
// Set up the parser and parse the input.
pm_options_filepath_set(&result->options, RSTRING_PTR(filepath));
RB_GC_GUARD(filepath);

pm_parser_t *parser = &result->parser;
pm_parser_init(parser, pm_string_source(&result->input), pm_string_length(&result->input), &result->options);
const pm_node_t *node = pm_parse(parser);

// First, set up the scope node so that the AST node is attached and can be
// freed regardless of whether or we return an error.
pm_scope_node_t *scope_node = &result->node;
pm_scope_node_init(node, scope_node, NULL);

// If there are errors, raise an appropriate error and free the result.
if (result->parser.error_list.size > 0) {
VALUE error = pm_parse_input_error(result);
if (parser->error_list.size > 0) {
VALUE error = pm_parse_process_error(result);

// TODO: We need to set the backtrace.
// rb_funcallv(error, rb_intern("set_backtrace"), 1, &path);
Expand All @@ -8208,9 +8207,6 @@ pm_parse_input(pm_parse_result_t *result, VALUE filepath)

// Now set up the constant pool and intern all of the various constants into
// their corresponding IDs.
pm_scope_node_t *scope_node = &result->node;
pm_scope_node_init(node, scope_node, NULL);

scope_node->encoding = rb_enc_find(parser->encoding->name);
if (!scope_node->encoding) rb_bug("Encoding not found %s!", parser->encoding->name);

Expand Down Expand Up @@ -8300,7 +8296,13 @@ pm_load_file(pm_parse_result_t *result, VALUE filepath)
VALUE
pm_parse_file(pm_parse_result_t *result, VALUE filepath)
{
VALUE error = pm_parse_input(result, filepath);
pm_options_filepath_set(&result->options, RSTRING_PTR(filepath));
RB_GC_GUARD(filepath);

pm_parser_init(&result->parser, pm_string_source(&result->input), pm_string_length(&result->input), &result->options);
pm_node_t *node = pm_parse(&result->parser);

VALUE error = pm_parse_process(result, node);

// If we're parsing a filepath, then we need to potentially support the
// SCRIPT_LINES__ constant, which can be a hash that has an array of lines
Expand Down Expand Up @@ -8335,9 +8337,9 @@ pm_load_parse_file(pm_parse_result_t *result, VALUE filepath)

/**
* Parse the given source that corresponds to the given filepath and store the
* resulting scope node in the given parse result struct. This function could
* potentially raise a Ruby error. It is assumed that the parse result object is
* zeroed out.
* resulting scope node in the given parse result struct. It is assumed that the
* parse result object is zeroed out. If the string fails to parse, then a Ruby
* error is returned.
*/
VALUE
pm_parse_string(pm_parse_result_t *result, VALUE source, VALUE filepath)
Expand All @@ -8347,7 +8349,54 @@ pm_parse_string(pm_parse_result_t *result, VALUE source, VALUE filepath)
rb_encoding *encoding = rb_enc_get(source);
pm_options_encoding_set(&result->options, rb_enc_name(encoding));

return pm_parse_input(result, filepath);
pm_options_filepath_set(&result->options, RSTRING_PTR(filepath));
RB_GC_GUARD(filepath);

pm_parser_init(&result->parser, pm_string_source(&result->input), pm_string_length(&result->input), &result->options);
pm_node_t *node = pm_parse(&result->parser);

return pm_parse_process(result, node);
}

/**
* An implementation of fgets that is suitable for use with Ruby IO objects.
*/
static char *
pm_parse_stdin_fgets(char *string, int size, void *stream)
{
RUBY_ASSERT(size > 0);

VALUE line = rb_funcall((VALUE) stream, rb_intern("gets"), 1, INT2FIX(size - 1));
if (NIL_P(line)) {
return NULL;
}

const char *cstr = StringValueCStr(line);
size_t length = strlen(cstr);

memcpy(string, cstr, length);
string[length] = '\0';

return string;
}

/**
* Parse the source off STDIN and store the resulting scope node in the given
* parse result struct. It is assumed that the parse result object is zeroed
* out. If the stream fails to parse, then a Ruby error is returned.
*/
VALUE
pm_parse_stdin(pm_parse_result_t *result)
{
pm_buffer_t buffer;
pm_node_t *node = pm_parse_stream(&result->parser, &buffer, (void *) rb_stdin, pm_parse_stdin_fgets, &result->options);

// Copy the allocated buffer contents into the input string so that it gets
// freed. At this point we've handed over ownership, so we don't need to
// free the buffer itself.
pm_string_owned_init(&result->input, (uint8_t *) pm_buffer_value(&buffer), pm_buffer_length(&buffer));

return pm_parse_process(result, node);
}

#undef NEW_ISEQ
Expand Down
1 change: 1 addition & 0 deletions prism_compile.h
Expand Up @@ -51,6 +51,7 @@ VALUE pm_load_file(pm_parse_result_t *result, VALUE filepath);
VALUE pm_parse_file(pm_parse_result_t *result, VALUE filepath);
VALUE pm_load_parse_file(pm_parse_result_t *result, VALUE filepath);
VALUE pm_parse_string(pm_parse_result_t *result, VALUE source, VALUE filepath);
VALUE pm_parse_stdin(pm_parse_result_t *result);
void pm_parse_result_free(pm_parse_result_t *result);

rb_iseq_t *pm_iseq_new(pm_scope_node_t *node, VALUE name, VALUE path, VALUE realpath, const rb_iseq_t *parent, enum rb_iseq_type);
Expand Down
6 changes: 5 additions & 1 deletion ruby.c
Expand Up @@ -2122,7 +2122,11 @@ prism_script(ruby_cmdline_options_t *opt, pm_parse_result_t *result)

VALUE error;
if (strcmp(opt->script, "-") == 0) {
rb_raise(rb_eRuntimeError, "Prism support for streaming code from stdin is not currently supported");
pm_options_command_line_set(options, command_line);
pm_options_filepath_set(options, "-");

prism_opt_init(opt);
error = pm_parse_stdin(result);
}
else if (opt->e_script) {
command_line |= PM_OPTIONS_COMMAND_LINE_E;
Expand Down