Skip to content

Commit

Permalink
Merge pull request #41 from junk0612/implement-named-references
Browse files Browse the repository at this point in the history
Implement named references
  • Loading branch information
yui-knk committed Jun 13, 2023
2 parents 9f3bc6b + faa4225 commit dd57055
Show file tree
Hide file tree
Showing 6 changed files with 270 additions and 25 deletions.
50 changes: 29 additions & 21 deletions lib/lrama/grammar.rb
Original file line number Diff line number Diff line change
Expand Up @@ -155,16 +155,16 @@ def translated_printer_code(tag)
last_column = ref.last_column

case
when ref.number == "$" && ref.type == :dollar # $$
when ref.value == "$" && ref.type == :dollar # $$
# Omit "<>"
member = tag.s_value[1..-2]
str = "((*yyvaluep).#{member})"
when ref.number == "$" && ref.type == :at # @$
when ref.value == "$" && ref.type == :at # @$
str = "(*yylocationp)"
when ref.type == :dollar # $n
raise "$#{ref.number} can not be used in %printer."
raise "$#{ref.value} can not be used in %printer."
when ref.type == :at # @n
raise "@#{ref.number} can not be used in %printer."
raise "@#{ref.value} can not be used in %printer."
else
raise "Unexpected. #{self}, #{ref}"
end
Expand All @@ -190,19 +190,19 @@ def translated_user_code
last_column = ref.last_column

case
when ref.number == "$" && ref.type == :dollar # $$
when ref.value == "$" && ref.type == :dollar # $$
# Omit "<>"
member = ref.tag.s_value[1..-2]
str = "(yyval.#{member})"
when ref.number == "$" && ref.type == :at # @$
when ref.value == "$" && ref.type == :at # @$
str = "(yyloc)"
when ref.type == :dollar # $n
i = -ref.position_in_rhs + ref.number
i = -ref.position_in_rhs + ref.value
# Omit "<>"
member = ref.tag.s_value[1..-2]
str = "(yyvsp[#{i}].#{member})"
when ref.type == :at # @n
i = -ref.position_in_rhs + ref.number
i = -ref.position_in_rhs + ref.value
str = "(yylsp[#{i}])"
else
raise "Unexpected. #{self}, #{ref}"
Expand All @@ -226,14 +226,14 @@ def translated_initial_action_code
last_column = ref.last_column

case
when ref.number == "$" && ref.type == :dollar # $$
when ref.value == "$" && ref.type == :dollar # $$
str = "yylval"
when ref.number == "$" && ref.type == :at # @$
when ref.value == "$" && ref.type == :at # @$
str = "yylloc"
when ref.type == :dollar # $n
raise "$#{ref.number} can not be used in initial_action."
raise "$#{ref.value} can not be used in initial_action."
when ref.type == :at # @n
raise "@#{ref.number} can not be used in initial_action."
raise "@#{ref.value} can not be used in initial_action."
else
raise "Unexpected. #{self}, #{ref}"
end
Expand All @@ -247,7 +247,7 @@ def translated_initial_action_code

# type: :dollar or :at
# ex_tag: "$<tag>1" (Optional)
Reference = Struct.new(:type, :number, :ex_tag, :first_column, :last_column, :referring_symbol, :position_in_rhs, keyword_init: true) do
Reference = Struct.new(:type, :value, :ex_tag, :first_column, :last_column, :referring_symbol, :position_in_rhs, keyword_init: true) do
def tag
if ex_tag
ex_tag
Expand Down Expand Up @@ -382,8 +382,8 @@ def add_rule(lhs:, rhs:, lineno:)
end

def build_references(token_code)
token_code.references.map! do |type, number, tag, first_column, last_column|
Reference.new(type: type, number: number, ex_tag: tag, first_column: first_column, last_column: last_column)
token_code.references.map! do |type, value, tag, first_column, last_column|
Reference.new(type: type, value: value, ex_tag: tag, first_column: first_column, last_column: last_column)
end

token_code
Expand Down Expand Up @@ -627,15 +627,23 @@ def normalize_rules
ref.position_in_rhs = i - 1
next if ref.type == :at
# $$, $n, @$, @n can be used in any actions
number = ref.number

if number == "$"
if ref.value == "$"
# TODO: Should be postponed after middle actions are extracted?
ref.referring_symbol = lhs
else
raise "Can not refer following component. #{number} >= #{i}. #{token}" if number >= i
rhs1[number - 1].referred = true
ref.referring_symbol = rhs1[number - 1]
elsif ref.value.is_a?(Integer)
raise "Can not refer following component. #{ref.value} >= #{i}. #{token}" if ref.value >= i
rhs1[ref.value - 1].referred = true
ref.referring_symbol = rhs1[ref.value - 1]
elsif ref.value.is_a?(String)
target_tokens = ([lhs] + rhs1 + [code]).compact.first(i)
referring_symbol_candidate = target_tokens.filter {|token| token.referred_by?(ref.value) }
raise "Referring symbol `#{ref.value}` is duplicated. #{token}" if referring_symbol_candidate.size >= 2
raise "Referring symbol `#{ref.value}` is not found. #{token}" if referring_symbol_candidate.count == 0

referring_symbol = referring_symbol_candidate.first
referring_symbol.referred = true
ref.referring_symbol = referring_symbol
end
end
end
Expand Down
36 changes: 35 additions & 1 deletion lib/lrama/lexer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class Lexer
include Lrama::Report::Duration

# s_value is semantic value
Token = Struct.new(:type, :s_value, keyword_init: true) do
Token = Struct.new(:type, :s_value, :alias, keyword_init: true) do
Type = Struct.new(:id, :name, keyword_init: true)

attr_accessor :line, :column, :referred
Expand All @@ -18,6 +18,31 @@ def to_s
"#{super} line: #{line}, column: #{column}"
end

def referred_by?(string)
[self.s_value, self.alias].include?(string)
end

def ==(other)
self.class == other.class && self.type == other.type && self.s_value == other.s_value
end

def numberize_references(lhs, rhs)
self.references.map! {|ref|
ref_name = ref[1]
if ref_name.is_a?(String) && ref_name != '$'
value =
if lhs.referred_by?(ref_name)
'$'
else
rhs.find_index {|token| token.referred_by?(ref_name) } + 1
end
[ref[0], value, ref[2], ref[3], ref[4]]
else
ref
end
}
end

@i = 0
@types = []

Expand Down Expand Up @@ -47,6 +72,7 @@ def self.define_type(name)
define_type(:Number) # 0
define_type(:Ident_Colon) # k_if:, k_if : (spaces can be there)
define_type(:Ident) # api.pure, tNUMBER
define_type(:Named_Ref) # [foo]
define_type(:Semicolon) # ;
define_type(:Bar) # |
define_type(:String) # "str"
Expand Down Expand Up @@ -166,10 +192,15 @@ def lex_common(lines, tokens)
tokens << create_token(Token::Number, Integer(ss[0]), line, ss.pos - column)
when ss.scan(/(<[a-zA-Z0-9_]+>)/)
tokens << create_token(Token::Tag, ss[0], line, ss.pos - column)
when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]\s*:/)
tokens << create_token(Token::Ident_Colon, ss[1], line, ss.pos - column)
tokens << create_token(Token::Named_Ref, ss[2], line, ss.pos - column)
when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)\s*:/)
tokens << create_token(Token::Ident_Colon, ss[1], line, ss.pos - column)
when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/)
tokens << create_token(Token::Ident, ss[0], line, ss.pos - column)
when ss.scan(/\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/)
tokens << create_token(Token::Named_Ref, ss[1], line, ss.pos - column)
when ss.scan(/%expect/)
tokens << create_token(Token::P_expect, ss[0], line, ss.pos - column)
when ss.scan(/%define/)
Expand Down Expand Up @@ -257,6 +288,9 @@ def lex_user_code(ss, line, column, lines)
when ss.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
references << [:dollar, Integer(ss[2]), tag, str.length, str.length + ss[0].length - 1]
when ss.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_.][-a-zA-Z0-9_.]*)/) # $foo, $expr, $<long>program
tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
references << [:dollar, ss[2], tag, str.length, str.length + ss[0].length - 1]
when ss.scan(/@\$/) # @$
references << [:at, "$", nil, str.length, str.length + ss[0].length - 1]
when ss.scan(/@(\d)+/) # @1
Expand Down
13 changes: 10 additions & 3 deletions lib/lrama/parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,11 @@ def parse_grammar_rule(ts, grammar)
# LHS
lhs = ts.consume!(T::Ident_Colon) # class:
lhs.type = T::Ident
if named_ref = ts.consume(T::Named_Ref)
lhs.alias = named_ref.s_value
end

rhs = parse_grammar_rule_rhs(ts, grammar)
rhs = parse_grammar_rule_rhs(ts, grammar, lhs)

grammar.add_rule(lhs: lhs, rhs: rhs, lineno: rhs.first ? rhs.first.line : lhs.line)

Expand All @@ -186,7 +189,7 @@ def parse_grammar_rule(ts, grammar)
# |
bar_lineno = ts.current_token.line
ts.next
rhs = parse_grammar_rule_rhs(ts, grammar)
rhs = parse_grammar_rule_rhs(ts, grammar, lhs)
grammar.add_rule(lhs: lhs, rhs: rhs, lineno: rhs.first ? rhs.first.line : bar_lineno)
when T::Semicolon
# ;
Expand All @@ -205,7 +208,7 @@ def parse_grammar_rule(ts, grammar)
end
end

def parse_grammar_rule_rhs(ts, grammar)
def parse_grammar_rule_rhs(ts, grammar, lhs)
a = []
prec_seen = false
code_after_prec = false
Expand Down Expand Up @@ -244,9 +247,13 @@ def parse_grammar_rule_rhs(ts, grammar)
end

code = ts.current_token
code.numberize_references(lhs, a)
grammar.build_references(code)
a << code
ts.next
when T::Named_Ref
ts.previous_token.alias = ts.current_token.s_value
ts.next
when T::Bar
# |
break
Expand Down
4 changes: 4 additions & 0 deletions lib/lrama/parser/token_scanner.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ def current_type
current_token && current_token.type
end

def previous_token
@tokens[@index - 1]
end

def next
token = current_token
@index += 1
Expand Down
59 changes: 59 additions & 0 deletions spec/lrama/lexer_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,65 @@ class : keyword_class tSTRING keyword_end %prec tPLUS
])
end

it "named references" do
y = <<~INPUT
%{
// Prologue
%}
%token NUM
%%
line: expr
{ printf("\t%.10g\n", $expr); }
;
expr[result]: NUM
| expr[left] expr[right] '+'
{ $result = $left + $right; }
;
%%
INPUT
lexer = Lrama::Lexer.new(y)

expect(lexer.grammar_rules_tokens).to eq([
T.new(type: T::Ident_Colon, s_value: "line"),
T.new(type: T::Ident, s_value: "expr"),
T.new(type: T::User_code, s_value: %Q({ printf("\t%.10g\n", $expr); })),
T.new(type: T::Semicolon, s_value: ";"),

T.new(type: T::Ident_Colon, s_value: "expr"),
T.new(type: T::Named_Ref, s_value: "result"),

T.new(type: T::Ident, s_value: "NUM"),

T.new(type: T::Bar, s_value: "|"),
T.new(type: T::Ident, s_value: "expr"),
T.new(type: T::Named_Ref, s_value: "left"),
T.new(type: T::Ident, s_value: "expr"),
T.new(type: T::Named_Ref, s_value: "right"),
T.new(type: T::Char, s_value: "'+'"),
T.new(type: T::User_code, s_value: "{ $result = $left + $right; }"),
T.new(type: T::Semicolon, s_value: ";"),
])

user_codes = lexer.grammar_rules_tokens.select do |t|
t.type == T::User_code
end

expect(user_codes.map(&:references)).to eq([
[
[:dollar, "expr", nil, 20, 24],
],
[
[:dollar, "result", nil, 2, 8],
[:dollar, "left", nil, 12, 16],
[:dollar, "right", nil, 20, 25],
]
])
end

describe "user codes" do
it "parses comments correctly" do
y = <<~INPUT
Expand Down
Loading

0 comments on commit dd57055

Please sign in to comment.