-
Notifications
You must be signed in to change notification settings - Fork 5.3k
/
parser.rb
196 lines (162 loc) · 6.02 KB
/
parser.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
# frozen_string_literal: true
require "parser"
module Prism
module Translation
# This class is the entry-point for converting a prism syntax tree into the
# whitequark/parser gem's syntax tree. It inherits from the base parser for
# the parser gem, and overrides the parse* methods to parse with prism and
# then translate.
class Parser < ::Parser::Base
# The parser gem has a list of diagnostics with a hard-coded set of error
# messages. We create our own diagnostic class in order to set our own
# error messages.
class Diagnostic < ::Parser::Diagnostic
# The message generated by prism.
attr_reader :message
# Initialize a new diagnostic with the given message and location.
def initialize(message, level, location)
@message = message
super(level, :prism_error, {}, location, [])
end
end
Racc_debug_parser = false # :nodoc:
def version # :nodoc:
34
end
# The default encoding for Ruby files is UTF-8.
def default_encoding
Encoding::UTF_8
end
def yyerror # :nodoc:
end
# Parses a source buffer and returns the AST.
def parse(source_buffer)
@source_buffer = source_buffer
source = source_buffer.source
offset_cache = build_offset_cache(source)
result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version)), offset_cache)
build_ast(result.value, offset_cache)
ensure
@source_buffer = nil
end
# Parses a source buffer and returns the AST and the source code comments.
def parse_with_comments(source_buffer)
@source_buffer = source_buffer
source = source_buffer.source
offset_cache = build_offset_cache(source)
result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version)), offset_cache)
[
build_ast(result.value, offset_cache),
build_comments(result.comments, offset_cache)
]
ensure
@source_buffer = nil
end
# Parses a source buffer and returns the AST, the source code comments,
# and the tokens emitted by the lexer.
def tokenize(source_buffer, recover = false)
@source_buffer = source_buffer
source = source_buffer.source
offset_cache = build_offset_cache(source)
result =
begin
unwrap(Prism.parse_lex(source, filepath: source_buffer.name, version: convert_for_prism(version)), offset_cache)
rescue ::Parser::SyntaxError
raise if !recover
end
program, tokens = result.value
ast = build_ast(program, offset_cache) if result.success?
[
ast,
build_comments(result.comments, offset_cache),
build_tokens(tokens, offset_cache)
]
ensure
@source_buffer = nil
end
# Since prism resolves num params for us, we don't need to support this
# kind of logic here.
def try_declare_numparam(node)
node.children[0].match?(/\A_[1-9]\z/)
end
private
# This is a hook to allow consumers to disable some errors if they don't
# want them to block creating the syntax tree.
def valid_error?(error)
true
end
# If there was a error generated during the parse, then raise an
# appropriate syntax error. Otherwise return the result.
def unwrap(result, offset_cache)
result.errors.each do |error|
next unless valid_error?(error)
location = build_range(error.location, offset_cache)
diagnostics.process(Diagnostic.new(error.message, :error, location))
end
result.warnings.each do |warning|
next unless valid_warning?(warning)
location = build_range(warning.location, offset_cache)
diagnostics.process(Diagnostic.new(warning.message, :warning, location))
end
result
end
# Prism deals with offsets in bytes, while the parser gem deals with
# offsets in characters. We need to handle this conversion in order to
# build the parser gem AST.
#
# If the bytesize of the source is the same as the length, then we can
# just use the offset directly. Otherwise, we build an array where the
# index is the byte offset and the value is the character offset.
def build_offset_cache(source)
if source.bytesize == source.length
-> (offset) { offset }
else
offset_cache = []
offset = 0
source.each_char do |char|
char.bytesize.times { offset_cache << offset }
offset += 1
end
offset_cache << offset
end
end
# Build the parser gem AST from the prism AST.
def build_ast(program, offset_cache)
program.accept(Compiler.new(self, offset_cache))
end
# Build the parser gem comments from the prism comments.
def build_comments(comments, offset_cache)
comments.map do |comment|
::Parser::Source::Comment.new(build_range(comment.location, offset_cache))
end
end
# Build the parser gem tokens from the prism tokens.
def build_tokens(tokens, offset_cache)
Lexer.new(source_buffer, tokens.map(&:first), offset_cache).to_a
end
# Build a range from a prism location.
def build_range(location, offset_cache)
::Parser::Source::Range.new(
source_buffer,
offset_cache[location.start_offset],
offset_cache[location.end_offset]
)
end
# Converts the version format handled by Parser to the format handled by Prism.
def convert_for_prism(version)
case version
when 33
"3.3.0"
when 34
"3.4.0"
else
"latest"
end
end
require_relative "parser/compiler"
require_relative "parser/lexer"
private_constant :Compiler
private_constant :Lexer
end
end
end