Skip to content

Commit efdc2b7

Browse files
committed
Support parsing streams
1 parent f372c6f commit efdc2b7

File tree

9 files changed

+361
-34
lines changed

9 files changed

+361
-34
lines changed

docs/ruby_api.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ The full API is documented below.
2020
* `Prism.lex_file(filepath)` - parse the tokens corresponding to the given source file and return them as an array within a parse result
2121
* `Prism.parse(source)` - parse the syntax tree corresponding to the given source string and return it within a parse result
2222
* `Prism.parse_file(filepath)` - parse the syntax tree corresponding to the given source file and return it within a parse result
23+
* `Prism.parse_stream(io)` - parse the syntax tree corresponding to the source that is read out of the given IO object using the `#gets` method and return it within a parse result
2324
* `Prism.parse_lex(source)` - parse the syntax tree corresponding to the given source string and return it within a parse result, along with the tokens
2425
* `Prism.parse_lex_file(filepath)` - parse the syntax tree corresponding to the given source file and return it within a parse result, along with the tokens
2526
* `Prism.load(source, serialized)` - load the serialized syntax tree using the source as a reference into a syntax tree

ext/prism/extension.c

Lines changed: 78 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,24 @@ parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
504504
return warnings;
505505
}
506506

507+
/**
508+
* Create a new parse result from the given parser, value, encoding, and source.
509+
*/
510+
static VALUE
511+
parse_result_create(pm_parser_t *parser, VALUE value, rb_encoding *encoding, VALUE source) {
512+
VALUE result_argv[] = {
513+
value,
514+
parser_comments(parser, source),
515+
parser_magic_comments(parser, source),
516+
parser_data_loc(parser, source),
517+
parser_errors(parser, encoding, source),
518+
parser_warnings(parser, encoding, source),
519+
source
520+
};
521+
522+
return rb_class_new_instance(7, result_argv, rb_cPrismParseResult);
523+
}
524+
507525
/******************************************************************************/
508526
/* Lexing Ruby code */
509527
/******************************************************************************/
@@ -610,19 +628,11 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
610628
value = parse_lex_data.tokens;
611629
}
612630

613-
VALUE result_argv[] = {
614-
value,
615-
parser_comments(&parser, source),
616-
parser_magic_comments(&parser, source),
617-
parser_data_loc(&parser, source),
618-
parser_errors(&parser, parse_lex_data.encoding, source),
619-
parser_warnings(&parser, parse_lex_data.encoding, source),
620-
source
621-
};
622-
631+
VALUE result = parse_result_create(&parser, value, parse_lex_data.encoding, source);
623632
pm_node_destroy(&parser, node);
624633
pm_parser_free(&parser);
625-
return rb_class_new_instance(7, result_argv, rb_cPrismParseResult);
634+
635+
return result;
626636
}
627637

628638
/**
@@ -682,17 +692,8 @@ parse_input(pm_string_t *input, const pm_options_t *options) {
682692
rb_encoding *encoding = rb_enc_find(parser.encoding->name);
683693

684694
VALUE source = pm_source_new(&parser, encoding);
685-
VALUE result_argv[] = {
686-
pm_ast_new(&parser, node, encoding, source),
687-
parser_comments(&parser, source),
688-
parser_magic_comments(&parser, source),
689-
parser_data_loc(&parser, source),
690-
parser_errors(&parser, encoding, source),
691-
parser_warnings(&parser, encoding, source),
692-
source
693-
};
694-
695-
VALUE result = rb_class_new_instance(7, result_argv, rb_cPrismParseResult);
695+
VALUE value = pm_ast_new(&parser, node, encoding, source);
696+
VALUE result = parse_result_create(&parser, value, encoding, source) ;
696697

697698
pm_node_destroy(&parser, node);
698699
pm_parser_free(&parser);
@@ -751,6 +752,60 @@ parse(int argc, VALUE *argv, VALUE self) {
751752
return value;
752753
}
753754

755+
/**
756+
* An implementation of fgets that is suitable for use with Ruby IO objects.
757+
*/
758+
static char *
759+
parse_stream_fgets(char *restrict string, int size, void *restrict stream) {
760+
RUBY_ASSERT(size > 0);
761+
762+
VALUE line = rb_funcall((VALUE) stream, rb_intern("gets"), 1, INT2FIX(size - 1));
763+
if (NIL_P(line)) {
764+
return NULL;
765+
}
766+
767+
const char *cstr = StringValueCStr(line);
768+
size_t length = strlen(cstr);
769+
770+
memcpy(string, cstr, length);
771+
string[length] = '\0';
772+
773+
return string;
774+
}
775+
776+
/**
777+
* call-seq:
778+
* Prism::parse_stream(stream, **options) -> ParseResult
779+
*
780+
* Parse the given object that responds to `gets` and return a ParseResult
781+
* instance. The options that are supported are the same as Prism::parse.
782+
*/
783+
static VALUE
784+
parse_stream(int argc, VALUE *argv, VALUE self) {
785+
VALUE stream;
786+
VALUE keywords;
787+
rb_scan_args(argc, argv, "1:", &stream, &keywords);
788+
789+
pm_options_t options = { 0 };
790+
extract_options(&options, Qnil, keywords);
791+
792+
pm_parser_t parser;
793+
pm_buffer_t buffer;
794+
795+
pm_node_t *node = pm_parse_stream(&parser, &buffer, (void *) stream, parse_stream_fgets, &options);
796+
rb_encoding *encoding = rb_enc_find(parser.encoding->name);
797+
798+
VALUE source = pm_source_new(&parser, encoding);
799+
VALUE value = pm_ast_new(&parser, node, encoding, source);
800+
VALUE result = parse_result_create(&parser, value, encoding, source);
801+
802+
pm_node_destroy(&parser, node);
803+
pm_buffer_free(&buffer);
804+
pm_parser_free(&parser);
805+
806+
return result;
807+
}
808+
754809
/**
755810
* call-seq:
756811
* Prism::parse_file(filepath, **options) -> ParseResult
@@ -1271,6 +1326,7 @@ Init_prism(void) {
12711326
rb_define_singleton_method(rb_cPrism, "lex", lex, -1);
12721327
rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, -1);
12731328
rb_define_singleton_method(rb_cPrism, "parse", parse, -1);
1329+
rb_define_singleton_method(rb_cPrism, "parse_stream", parse_stream, -1);
12741330
rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, -1);
12751331
rb_define_singleton_method(rb_cPrism, "parse_comments", parse_comments, -1);
12761332
rb_define_singleton_method(rb_cPrism, "parse_file_comments", parse_file_comments, -1);

include/prism.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,36 @@ PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser);
7979
*/
8080
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser);
8181

82+
/**
83+
* This function is used in pm_parse_stream to retrieve a line of input from a
84+
* stream. It closely mirrors that of fgets so that fgets can be used as the
85+
* default implementation.
86+
*/
87+
typedef char * (pm_parse_stream_fgets_t)(char *restrict string, int size, void *restrict stream);
88+
89+
/**
90+
* Parse a stream of Ruby source and return the tree.
91+
*
92+
* @param parser The parser to use.
93+
* @param buffer The buffer to use.
94+
* @param stream The stream to parse.
95+
* @param fgets The function to use to read from the stream.
96+
* @param options The optional options to use when parsing.
97+
* @return The AST representing the source.
98+
*/
99+
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets, const pm_options_t *options);
100+
101+
/**
102+
* Parse and serialize the AST represented by the source that is read out of the
103+
* given stream into to the given buffer.
104+
*
105+
* @param buffer The buffer to serialize to.
106+
* @param stream The stream to parse.
107+
* @param fgets The function to use to read from the stream.
108+
* @param data The optional data to pass to the parser.
109+
*/
110+
PRISM_EXPORTED_FUNCTION void pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets, const char *data);
111+
82112
/**
83113
* Serialize the given list of comments to the given buffer.
84114
*

lib/prism/ffi.rb

Lines changed: 45 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,21 @@ module LibRubyParser # :nodoc:
2323
# size_t -> :size_t
2424
# void -> :void
2525
#
26-
def self.resolve_type(type)
26+
def self.resolve_type(type, callbacks)
2727
type = type.strip
28-
type.end_with?("*") ? :pointer : type.delete_prefix("const ").to_sym
28+
29+
if !type.end_with?("*")
30+
type.delete_prefix("const ").to_sym
31+
else
32+
type = type.delete_suffix("*").rstrip
33+
callbacks.include?(type.to_sym) ? type.to_sym : :pointer
34+
end
2935
end
3036

3137
# Read through the given header file and find the declaration of each of the
3238
# given functions. For each one, define a function with the same name and
3339
# signature as the C function.
34-
def self.load_exported_functions_from(header, *functions)
40+
def self.load_exported_functions_from(header, *functions, callbacks)
3541
File.foreach(File.expand_path("../../include/#{header}", __dir__)) do |line|
3642
# We only want to attempt to load exported functions.
3743
next unless line.start_with?("PRISM_EXPORTED_FUNCTION ")
@@ -55,24 +61,28 @@ def self.load_exported_functions_from(header, *functions)
5561

5662
# Resolve the type of the argument by dropping the name of the argument
5763
# first if it is present.
58-
arg_types.map! { |type| resolve_type(type.sub(/\w+$/, "")) }
64+
arg_types.map! { |type| resolve_type(type.sub(/\w+$/, ""), callbacks) }
5965

6066
# Attach the function using the FFI library.
61-
attach_function name, arg_types, resolve_type(return_type)
67+
attach_function name, arg_types, resolve_type(return_type, [])
6268
end
6369

6470
# If we didn't find all of the functions, raise an error.
6571
raise "Could not find functions #{functions.inspect}" unless functions.empty?
6672
end
6773

74+
callback :pm_parse_stream_fgets_t, [:pointer, :int, :pointer], :pointer
75+
6876
load_exported_functions_from(
6977
"prism.h",
7078
"pm_version",
7179
"pm_serialize_parse",
80+
"pm_serialize_parse_stream",
7281
"pm_serialize_parse_comments",
7382
"pm_serialize_lex",
7483
"pm_serialize_parse_lex",
75-
"pm_parse_success_p"
84+
"pm_parse_success_p",
85+
[:pm_parse_stream_fgets_t]
7686
)
7787

7888
load_exported_functions_from(
@@ -81,7 +91,8 @@ def self.load_exported_functions_from(header, *functions)
8191
"pm_buffer_init",
8292
"pm_buffer_value",
8393
"pm_buffer_length",
84-
"pm_buffer_free"
94+
"pm_buffer_free",
95+
[]
8596
)
8697

8798
load_exported_functions_from(
@@ -90,7 +101,8 @@ def self.load_exported_functions_from(header, *functions)
90101
"pm_string_free",
91102
"pm_string_source",
92103
"pm_string_length",
93-
"pm_string_sizeof"
104+
"pm_string_sizeof",
105+
[]
94106
)
95107

96108
# This object represents a pm_buffer_t. We only use it as an opaque pointer,
@@ -215,13 +227,36 @@ def parse(code, **options)
215227
end
216228

217229
# Mirror the Prism.parse_file API by using the serialization API. This uses
218-
# native strings instead of Ruby strings because it allows us to use mmap when
219-
# it is available.
230+
# native strings instead of Ruby strings because it allows us to use mmap
231+
# when it is available.
220232
def parse_file(filepath, **options)
221233
options[:filepath] = filepath
222234
LibRubyParser::PrismString.with_file(filepath) { |string| parse_common(string, string.read, options) }
223235
end
224236

237+
# Mirror the Prism.parse_stream API by using the serialization API.
238+
def parse_stream(stream, **options)
239+
LibRubyParser::PrismBuffer.with do |buffer|
240+
source = +""
241+
callback = -> (string, size, _) {
242+
raise "Expected size to be >= 0, got: #{size}" if size <= 0
243+
244+
if !(line = stream.gets(size - 1)).nil?
245+
source << line
246+
string.write_string("#{line}\x00", line.bytesize + 1)
247+
end
248+
}
249+
250+
# In the pm_serialize_parse_stream function it accepts a pointer to the
251+
# IO object as a void* and then passes it through to the callback as the
252+
# third argument, but it never touches it itself. As such, since we have
253+
# access to the IO object already through the closure of the lambda, we
254+
# can pass a null pointer here and not worry.
255+
LibRubyParser.pm_serialize_parse_stream(buffer.pointer, nil, callback, dump_options(options))
256+
Prism.load(source, buffer.read)
257+
end
258+
end
259+
225260
# Mirror the Prism.parse_comments API by using the serialization API.
226261
def parse_comments(code, **options)
227262
LibRubyParser::PrismString.with_string(code) { |string| parse_comments_common(string, code, options) }

rakelib/test.rake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ require "rake/testtask"
55
config = lambda do |t|
66
t.libs << "test"
77
t.libs << "lib"
8-
t.test_files = FileList["test/**/*_test.rb"]
8+
t.test_files = FileList["test/prism/parse_stream_test.rb"]
99
end
1010

1111
Rake::TestTask.new(:test, &config)

rbi/prism.rbi

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ module Prism
2828
sig { params(filepath: String, options: T::Hash[Symbol, T.untyped]).returns(Prism::ParseResult[Prism::ProgramNode]) }
2929
def self.parse_file(filepath, **options); end
3030

31+
sig { params(stream: T.any(IO, StringIO), options: T::Hash[Symbol, T.untyped]).returns(Prism::ParseResult[Prism::ProgramNode]) }
32+
def self.parse_stream(stream, **options); end
33+
3134
sig { params(source: String, options: T::Hash[Symbol, T.untyped]).returns(T::Array[Prism::Comment]) }
3235
def self.parse_comments(source, **options); end
3336

0 commit comments

Comments
 (0)