Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement named references #41

Merged
merged 7 commits into from
Jun 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 29 additions & 21 deletions lib/lrama/grammar.rb
Original file line number Diff line number Diff line change
Expand Up @@ -155,16 +155,16 @@ def translated_printer_code(tag)
last_column = ref.last_column

case
when ref.number == "$" && ref.type == :dollar # $$
when ref.value == "$" && ref.type == :dollar # $$
# Omit "<>"
member = tag.s_value[1..-2]
str = "((*yyvaluep).#{member})"
when ref.number == "$" && ref.type == :at # @$
when ref.value == "$" && ref.type == :at # @$
str = "(*yylocationp)"
when ref.type == :dollar # $n
raise "$#{ref.number} can not be used in %printer."
raise "$#{ref.value} can not be used in %printer."
when ref.type == :at # @n
raise "@#{ref.number} can not be used in %printer."
raise "@#{ref.value} can not be used in %printer."
else
raise "Unexpected. #{self}, #{ref}"
end
Expand All @@ -190,19 +190,19 @@ def translated_user_code
last_column = ref.last_column

case
when ref.number == "$" && ref.type == :dollar # $$
when ref.value == "$" && ref.type == :dollar # $$
# Omit "<>"
member = ref.tag.s_value[1..-2]
str = "(yyval.#{member})"
when ref.number == "$" && ref.type == :at # @$
when ref.value == "$" && ref.type == :at # @$
str = "(yyloc)"
when ref.type == :dollar # $n
i = -ref.position_in_rhs + ref.number
i = -ref.position_in_rhs + ref.value
# Omit "<>"
member = ref.tag.s_value[1..-2]
str = "(yyvsp[#{i}].#{member})"
when ref.type == :at # @n
i = -ref.position_in_rhs + ref.number
i = -ref.position_in_rhs + ref.value
str = "(yylsp[#{i}])"
else
raise "Unexpected. #{self}, #{ref}"
Expand All @@ -226,14 +226,14 @@ def translated_initial_action_code
last_column = ref.last_column

case
when ref.number == "$" && ref.type == :dollar # $$
when ref.value == "$" && ref.type == :dollar # $$
str = "yylval"
when ref.number == "$" && ref.type == :at # @$
when ref.value == "$" && ref.type == :at # @$
str = "yylloc"
when ref.type == :dollar # $n
raise "$#{ref.number} can not be used in initial_action."
raise "$#{ref.value} can not be used in initial_action."
when ref.type == :at # @n
raise "@#{ref.number} can not be used in initial_action."
raise "@#{ref.value} can not be used in initial_action."
else
raise "Unexpected. #{self}, #{ref}"
end
Expand All @@ -247,7 +247,7 @@ def translated_initial_action_code

# type: :dollar or :at
# ex_tag: "$<tag>1" (Optional)
Reference = Struct.new(:type, :number, :ex_tag, :first_column, :last_column, :referring_symbol, :position_in_rhs, keyword_init: true) do
Reference = Struct.new(:type, :value, :ex_tag, :first_column, :last_column, :referring_symbol, :position_in_rhs, keyword_init: true) do
def tag
if ex_tag
ex_tag
Expand Down Expand Up @@ -382,8 +382,8 @@ def add_rule(lhs:, rhs:, lineno:)
end

def build_references(token_code)
token_code.references.map! do |type, number, tag, first_column, last_column|
Reference.new(type: type, number: number, ex_tag: tag, first_column: first_column, last_column: last_column)
token_code.references.map! do |type, value, tag, first_column, last_column|
Reference.new(type: type, value: value, ex_tag: tag, first_column: first_column, last_column: last_column)
end

token_code
Expand Down Expand Up @@ -627,15 +627,23 @@ def normalize_rules
ref.position_in_rhs = i - 1
next if ref.type == :at
# $$, $n, @$, @n can be used in any actions
number = ref.number

if number == "$"
if ref.value == "$"
# TODO: Should be postponed after middle actions are extracted?
ref.referring_symbol = lhs
else
raise "Can not refer following component. #{number} >= #{i}. #{token}" if number >= i
rhs1[number - 1].referred = true
ref.referring_symbol = rhs1[number - 1]
elsif ref.value.is_a?(Integer)
raise "Can not refer following component. #{ref.value} >= #{i}. #{token}" if ref.value >= i
rhs1[ref.value - 1].referred = true
ref.referring_symbol = rhs1[ref.value - 1]
elsif ref.value.is_a?(String)
target_tokens = ([lhs] + rhs1 + [code]).compact.first(i)
referring_symbol_candidate = target_tokens.filter {|token| token.referred_by?(ref.value) }
raise "Referring symbol `#{ref.value}` is duplicated. #{token}" if referring_symbol_candidate.size >= 2
raise "Referring symbol `#{ref.value}` is not found. #{token}" if referring_symbol_candidate.count == 0

referring_symbol = referring_symbol_candidate.first
referring_symbol.referred = true
ref.referring_symbol = referring_symbol
end
end
end
Expand Down
36 changes: 35 additions & 1 deletion lib/lrama/lexer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class Lexer
include Lrama::Report::Duration

# s_value is semantic value
Token = Struct.new(:type, :s_value, keyword_init: true) do
Token = Struct.new(:type, :s_value, :alias, keyword_init: true) do
Type = Struct.new(:id, :name, keyword_init: true)

attr_accessor :line, :column, :referred
Expand All @@ -18,6 +18,31 @@ def to_s
"#{super} line: #{line}, column: #{column}"
end

def referred_by?(string)
[self.s_value, self.alias].include?(string)
end

def ==(other)
self.class == other.class && self.type == other.type && self.s_value == other.s_value
end

def numberize_references(lhs, rhs)
self.references.map! {|ref|
ref_name = ref[1]
if ref_name.is_a?(String) && ref_name != '$'
value =
if lhs.referred_by?(ref_name)
'$'
else
rhs.find_index {|token| token.referred_by?(ref_name) } + 1
end
[ref[0], value, ref[2], ref[3], ref[4]]
else
ref
end
}
end

@i = 0
@types = []

Expand Down Expand Up @@ -47,6 +72,7 @@ def self.define_type(name)
define_type(:Number) # 0
define_type(:Ident_Colon) # k_if:, k_if : (spaces can be there)
define_type(:Ident) # api.pure, tNUMBER
define_type(:Named_Ref) # [foo]
define_type(:Semicolon) # ;
define_type(:Bar) # |
define_type(:String) # "str"
Expand Down Expand Up @@ -166,10 +192,15 @@ def lex_common(lines, tokens)
tokens << create_token(Token::Number, Integer(ss[0]), line, ss.pos - column)
when ss.scan(/(<[a-zA-Z0-9_]+>)/)
tokens << create_token(Token::Tag, ss[0], line, ss.pos - column)
when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]\s*:/)
tokens << create_token(Token::Ident_Colon, ss[1], line, ss.pos - column)
tokens << create_token(Token::Named_Ref, ss[2], line, ss.pos - column)
when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)\s*:/)
tokens << create_token(Token::Ident_Colon, ss[1], line, ss.pos - column)
when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/)
tokens << create_token(Token::Ident, ss[0], line, ss.pos - column)
when ss.scan(/\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/)
tokens << create_token(Token::Named_Ref, ss[1], line, ss.pos - column)
when ss.scan(/%expect/)
tokens << create_token(Token::P_expect, ss[0], line, ss.pos - column)
when ss.scan(/%define/)
Expand Down Expand Up @@ -257,6 +288,9 @@ def lex_user_code(ss, line, column, lines)
when ss.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
references << [:dollar, Integer(ss[2]), tag, str.length, str.length + ss[0].length - 1]
when ss.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_.][-a-zA-Z0-9_.]*)/) # $foo, $expr, $<long>program
tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
references << [:dollar, ss[2], tag, str.length, str.length + ss[0].length - 1]
when ss.scan(/@\$/) # @$
references << [:at, "$", nil, str.length, str.length + ss[0].length - 1]
when ss.scan(/@(\d)+/) # @1
Expand Down
13 changes: 10 additions & 3 deletions lib/lrama/parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,11 @@ def parse_grammar_rule(ts, grammar)
# LHS
lhs = ts.consume!(T::Ident_Colon) # class:
lhs.type = T::Ident
if named_ref = ts.consume(T::Named_Ref)
lhs.alias = named_ref.s_value
end

rhs = parse_grammar_rule_rhs(ts, grammar)
rhs = parse_grammar_rule_rhs(ts, grammar, lhs)

grammar.add_rule(lhs: lhs, rhs: rhs, lineno: rhs.first ? rhs.first.line : lhs.line)

Expand All @@ -186,7 +189,7 @@ def parse_grammar_rule(ts, grammar)
# |
bar_lineno = ts.current_token.line
ts.next
rhs = parse_grammar_rule_rhs(ts, grammar)
rhs = parse_grammar_rule_rhs(ts, grammar, lhs)
grammar.add_rule(lhs: lhs, rhs: rhs, lineno: rhs.first ? rhs.first.line : bar_lineno)
when T::Semicolon
# ;
Expand All @@ -205,7 +208,7 @@ def parse_grammar_rule(ts, grammar)
end
end

def parse_grammar_rule_rhs(ts, grammar)
def parse_grammar_rule_rhs(ts, grammar, lhs)
a = []
prec_seen = false
code_after_prec = false
Expand Down Expand Up @@ -244,9 +247,13 @@ def parse_grammar_rule_rhs(ts, grammar)
end

code = ts.current_token
code.numberize_references(lhs, a)
grammar.build_references(code)
a << code
ts.next
when T::Named_Ref
ts.previous_token.alias = ts.current_token.s_value
ts.next
when T::Bar
# |
break
Expand Down
4 changes: 4 additions & 0 deletions lib/lrama/parser/token_scanner.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ def current_type
current_token && current_token.type
end

def previous_token
@tokens[@index - 1]
end

def next
token = current_token
@index += 1
Expand Down
59 changes: 59 additions & 0 deletions spec/lrama/lexer_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,65 @@ class : keyword_class tSTRING keyword_end %prec tPLUS
])
end

it "named references" do
y = <<~INPUT
%{
// Prologue
%}

%token NUM

%%

line: expr
{ printf("\t%.10g\n", $expr); }
;

expr[result]: NUM
| expr[left] expr[right] '+'
{ $result = $left + $right; }
;
%%
INPUT
lexer = Lrama::Lexer.new(y)

expect(lexer.grammar_rules_tokens).to eq([
T.new(type: T::Ident_Colon, s_value: "line"),
T.new(type: T::Ident, s_value: "expr"),
T.new(type: T::User_code, s_value: %Q({ printf("\t%.10g\n", $expr); })),
T.new(type: T::Semicolon, s_value: ";"),

T.new(type: T::Ident_Colon, s_value: "expr"),
T.new(type: T::Named_Ref, s_value: "result"),

T.new(type: T::Ident, s_value: "NUM"),

T.new(type: T::Bar, s_value: "|"),
T.new(type: T::Ident, s_value: "expr"),
T.new(type: T::Named_Ref, s_value: "left"),
T.new(type: T::Ident, s_value: "expr"),
T.new(type: T::Named_Ref, s_value: "right"),
T.new(type: T::Char, s_value: "'+'"),
T.new(type: T::User_code, s_value: "{ $result = $left + $right; }"),
T.new(type: T::Semicolon, s_value: ";"),
])

user_codes = lexer.grammar_rules_tokens.select do |t|
t.type == T::User_code
end

expect(user_codes.map(&:references)).to eq([
[
[:dollar, "expr", nil, 20, 24],
],
[
[:dollar, "result", nil, 2, 8],
[:dollar, "left", nil, 12, 16],
[:dollar, "right", nil, 20, 25],
]
])
end

describe "user codes" do
it "parses comments correctly" do
y = <<~INPUT
Expand Down
Loading