From 5fcd3cce2f1b051a3af07569cabb3589774d5c68 Mon Sep 17 00:00:00 2001 From: Junichi Kobayashi Date: Mon, 1 Apr 2024 23:16:00 +0900 Subject: [PATCH 1/6] Set %define when parsing grammar files --- lib/lrama/grammar.rb | 4 +++- lib/lrama/parser.rb | 9 +++++++-- parser.y | 2 +- spec/lrama/parser_spec.rb | 1 + 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/lib/lrama/grammar.rb b/lib/lrama/grammar.rb index a816b826..cb178188 100644 --- a/lib/lrama/grammar.rb +++ b/lib/lrama/grammar.rb @@ -30,7 +30,8 @@ class Grammar :after_shift, :before_reduce, :after_reduce, :after_shift_error_token, :after_pop_stack, :symbols_resolver, :types, :rules, :rule_builders, - :sym_to_rules, :no_stdlib + :sym_to_rules, :no_stdlib, + :define def_delegators "@symbols_resolver", :symbols, :nterms, :terms, :add_nterm, :add_term, :find_symbol_by_number!, :find_symbol_by_id!, :token_to_symbol, @@ -59,6 +60,7 @@ def initialize(rule_counter) @accept_symbol = nil @aux = Auxiliary.new @no_stdlib = false + @define = {} append_special_symbols end diff --git a/lib/lrama/parser.rb b/lib/lrama/parser.rb index 04603105..d0ad540d 100644 --- a/lib/lrama/parser.rb +++ b/lib/lrama/parser.rb @@ -920,7 +920,7 @@ def raise_parse_error(error_message, location) 1, 63, :_reduce_none, 1, 63, :_reduce_none, 2, 63, :_reduce_13, - 3, 63, :_reduce_none, + 3, 63, :_reduce_14, 2, 63, :_reduce_none, 2, 63, :_reduce_16, 2, 63, :_reduce_17, @@ -1309,7 +1309,12 @@ def _reduce_13(val, _values, result) end .,., -# reduce 14 omitted +module_eval(<<'.,.,', 'parser.y', 34) + def _reduce_14(val, _values, result) + @grammar.define[val[1].s_value] = val[2]&.s_value + result + end +.,., # reduce 15 omitted diff --git a/parser.y b/parser.y index bc507084..75a55c46 100644 --- a/parser.y +++ b/parser.y @@ -32,7 +32,7 @@ rule | rule_declaration | inline_declaration | "%expect" INTEGER { @grammar.expect = val[1] } - | "%define" variable value + | "%define" variable value { @grammar.define[val[1].s_value] = val[2]&.s_value } | "%param" params | "%lex-param" params { diff --git a/spec/lrama/parser_spec.rb b/spec/lrama/parser_spec.rb index 2fddc68b..a84580c1 100644 --- a/spec/lrama/parser_spec.rb +++ b/spec/lrama/parser_spec.rb @@ -57,6 +57,7 @@ CODE expect(grammar.expect).to eq(0) + expect(grammar.define).to eq({'api.pure' => nil, 'parse.error' => 'verbose'}) expect(grammar.printers).to eq([ Printer.new( ident_or_tags: [T::Tag.new(s_value: "")], From a8b6ff474540d14c9dea7ec402bdb8af5292069c Mon Sep 17 00:00:00 2001 From: Junichi Kobayashi Date: Tue, 2 Apr 2024 00:58:02 +0900 Subject: [PATCH 2/6] Add compute_ielr --- lib/lrama/command.rb | 1 + lib/lrama/grammar.rb | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/lib/lrama/command.rb b/lib/lrama/command.rb index 12fc4fc7..bf1a567b 100644 --- a/lib/lrama/command.rb +++ b/lib/lrama/command.rb @@ -33,6 +33,7 @@ def run(argv) end states = Lrama::States.new(grammar, warning, trace_state: (options.trace_opts[:automaton] || options.trace_opts[:closure])) states.compute + states.compute_ielr if grammar.ielr_defined? context = Lrama::Context.new(states) if options.report_file diff --git a/lib/lrama/grammar.rb b/lib/lrama/grammar.rb index cb178188..ba11dc24 100644 --- a/lib/lrama/grammar.rb +++ b/lib/lrama/grammar.rb @@ -169,6 +169,10 @@ def find_rules_by_symbol(sym) @sym_to_rules[sym.number] end + def ielr_defined? + @define.key?('lr.type') && @define['lr.type'] == 'ielr' + end + private def compute_nullable From 84737cf8551ac6d97d5fcb67876e46cfb9b1be14 Mon Sep 17 00:00:00 2001 From: Junichi Kobayashi Date: Tue, 23 Apr 2024 14:25:54 +0900 Subject: [PATCH 3/6] Support IELR(1) parser generation --- lib/lrama/state.rb | 45 +++++++++ lib/lrama/states.rb | 224 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 269 insertions(+) diff --git a/lib/lrama/state.rb b/lib/lrama/state.rb index ceb74d85..f6063dec 100644 --- a/lib/lrama/state.rb +++ b/lib/lrama/state.rb @@ -82,6 +82,11 @@ def transitions @transitions ||= shifts.map {|shift| [shift, @items_to_state[shift.next_items]] } end + def update_transition(shift, next_state) + set_items_to_state(shift.next_items, next_state) + @transitions = shifts.map {|sh| [sh, @items_to_state[sh.next_items]] } + end + def selected_term_transitions term_transitions.reject do |shift, next_state| shift.not_selected @@ -140,5 +145,45 @@ def rr_conflicts conflict.type == :reduce_reduce end end + + def always_follows(shift, next_state) + internal_dependencies(shift, next_state).union(successor_dependencies(shift, next_state)).reduce([]) {|result, transition| result += transition[1].term_transitions.map {|shift, _| shift.next_sym } } + end + + def internal_dependencies(shift, next_state) + nterm_transitions.select {|other_shift, _| + @items.find {|item| item.next_sym == shift.next_sym && item.lhs == other_shift.next_sym && item.symbols_after_dot.all?(&:nullable) } + }.reduce([[shift, next_state]]) {|result, transition| + result += internal_follows(*transition) + } + end + + def successor_dependencies(shift, next_state) + next_state.nterm_transitions.select {|other_shift, _| + other_shift.next_sym.nullable + }.reduce([[shift, next_state]]) {|result, transition| + result += successor_dependencies(*transition) + } + end + + def inadequacy_list + return @inadequacy_list if @inadequacy_list + + list = shifts.to_h {|shift| [shift.next_sym, [[shift, nil]]] } + reduces.each do |reduce| + reduce_list = (reduce.look_ahead || []).to_h {|sym| [sym, [[reduce, reduce.item]]] } + list.merge!(reduce_list) {|_, list_value, reduce_value| list_value + reduce_value } + end + + @inadequacy_list = {self => list.select {|_, actions| actions.size > 1 }} + end + + def follow_kernel?(item) + item.symbols_after_dot.all?(&:nullable) + end + + def follow_kernel_items(shift, next_state, item) + internal_dependencies(shift, next_state).any? {|shift, _| shift.next_sym == item.next_sym } && item.symbols_after_dot.all?(&:nullable) + end end end diff --git a/lib/lrama/states.rb b/lib/lrama/states.rb index 290e996b..99cd2df8 100644 --- a/lib/lrama/states.rb +++ b/lib/lrama/states.rb @@ -93,6 +93,10 @@ def compute check_conflicts end + def compute_ielr + report_duration(:split_states) { split_states } + end + def reporter StatesReporter.new(self) end @@ -552,5 +556,225 @@ def check_conflicts end end end + + def split_states + @item_lookahead_set = {} + @lalr_isocores = Hash.new {|hash, key| hash[key] = key } + @ielr_isocores = Hash.new {|hash, key| hash[key] = [key] } + @lookaheads_recomputed = Hash.new {|hash, key| hash[key] = false } + @predecessors = {} + @states.each do |state| + state.transitions.each do |shift, next_state| + compute_state(state, shift, next_state) + end + end + end + + def merge_lookaheads(state, k) + return if state.kernels.all? {|item| (k[item] - item_lookahead_set(state)[item]).empty? } + + state.transitions.each do |shift, next_state| + next if @lookaheads_recomputed[next_state] + compute_state(state, shift, next_state) + end + end + + def compute_state(state, shift, next_state) + k = propagate_lookaheads(state, next_state) + s = @ielr_isocores[next_state].find {|st| is_compatible(st, k) } + + if s.nil? + s = @ielr_isocores[next_state].last + new_state = State.new(@states.count, s.accessing_symbol, s.kernels) + new_state.closure = s.closure + new_state.compute_shifts_reduces + s.transitions.each do |sh, next_state| + new_state.set_items_to_state(sh.next_items, next_state) + end + @states << new_state + @lalr_isocores[new_state] = s + @ielr_isocores[s] << new_state + @ielr_isocores[s].each do |st| + @ielr_isocores[st] = @ielr_isocores[s] + end + @lookaheads_recomputed[new_state] = true + @item_lookahead_set[new_state] = k + state.update_transition(shift, new_state) + elsif(!@lookaheads_recomputed[s]) + @item_lookahead_set[s] = k + @lookaheads_recomputed[s] = true + else + merge_lookaheads(s, k) + end + end + + def propagate_lookaheads(state, next_state) + next_state.kernels.to_h {|item| + lookahead_sets = + if item.position == 1 + compute_goto_follow_set(@lalr_isocores[state], item.lhs) + else + kernel = state.kernels.find {|k| k.rule == item.rule && k.position == item.position - 1 } + item_lookahead_set(state)[kernel] + end + + [item, lookahead_sets & lookahead_set_filters(next_state)[item]] + } + end + + def lookahead_set_filters(state) + state.kernels.to_h {|kernel| + [kernel, + annotation_list(@lalr_isocores[state])[@lalr_isocores[state]].filter_map {|token, actions| + token if token.term? && actions.any? {|item, _| item == kernel } + }] + } + end + + def is_compatible(state, k) + @lookaheads_recomputed[state] || + annotation_list(@lalr_isocores[state])[@lalr_isocores[state]].all? {|token, actions| + a = dominant_contribution(state, token, actions, item_lookahead_set(state)) + b = dominant_contribution(state, token, actions, k) + a.empty? || b.empty? || a == b + } + end + + def dominant_contribution(state, token, actions, lookaheads) + actions.filter_map {|action, items| + action if items.empty? || items.any? {|item, bool| bool && lookaheads[item].include?(token) } + }.reject {|action| + if action.is_a?(State::Shift) + action.not_selected + elsif action.is_a?(State::Reduce) + action.not_selected_symbols.include?(token) + end + } + end + + def compute_goto_follow_set(state, nterm_token) + shift, next_state = state.nterm_transitions.find {|shift, _| shift.next_sym == nterm_token } + return [] if shift.nil? && nterm_token.id.s_value == '$accept' + state.always_follows(shift, next_state).union(state.kernels.select {|item| + state.follow_kernel_items(shift, next_state, item) + }.reduce([]) {|result, item| + result.union(item_lookahead_set(state)[item]) + }) + end + + def item_lookahead_set(state) + @item_lookahead_set[state] ||= + state.kernels.to_h {|item| + value = + if item.position > 1 + prev_state, prev_item = predecessor_with_item(state, item) + item_lookahead_set(prev_state)[prev_item] + elsif item.position == 1 + prev_state = predecessors(state).find {|p| p.shifts.any? {|shift| shift.next_sym == item.lhs } } + shift, next_state = prev_state.nterm_transitions.find {|shift, _| shift.next_sym == item.lhs } + goto_follows(prev_state, shift, next_state) + else + [] + end + [item, value] + } + end + + def predecessors(state) + @predecessors[state] ||= @states.select {|prev| prev.transitions.any? {|_, to_state| to_state == state } } + end + + def predecessor_with_item(state, item) + predecessors(state).each do |state| + state.kernels.each do |kernel| + return [state, kernel] if kernel.rule == item.rule && kernel.position == item.position - 1 + end + end + end + + def goto_follows(state, shift, next_state) + compute_include_dependencies(state, shift, next_state).reduce([]) {|result, goto| + st, sh, next_st = goto + result.union(st.always_follows(sh, next_st)) + } + end + + def compute_include_dependencies(state, shift, next_state) + internal = state.internal_dependencies(shift, next_state).map {|sh, next_st| [state, sh, next_st] } + + item = state.kernels.find {|kernel| kernel.next_sym == shift.next_sym } + return internal unless item.symbols_after_dot.all?(&:nullable) + + s, i = state, item + while i.position > 0 + s = predecessors(s).find {|p| p.kernels.find {|item| item.rule == i.rule && item.position == i.position - 1 } } + i = s.kernels.find {|item| item.rule == i.rule && item.position == i.position - 1 } + end + + p_shift, p_next_state = s.transitions.find {|sh, _| sh.next_sym == item.lhs } + internal.union([[s, p_shift, p_next_state]]) + end + + def annotation_list(state) + manifestations = state.inadequacy_list.transform_values {|hash| hash.to_h {|token, actions| [token, annotate_manifestation(state, token, actions)] } } + state.transitions.reduce(manifestations) {|item, transition| + item.merge(annotate_predecessor(state, transition[1], annotation_list(transition[1])[transition[1]])) {|state, annotations, other_annotations| + annotations.merge(other_annotations) {|token, actions, other_actions| + actions.merge(other_actions) {|action, items, other_items| + items.merge(other_items) {|item, bool, other_bool| + raise if bool != other_bool + bool + } + } + } + } + } + end + + def annotate_manifestation(state, token, actions) + actions.to_h {|action, item| + [action, + if action.is_a?(State::Shift) + {} + elsif action.is_a?(State::Reduce) + if item.empty_rule? + compute_lhs_contributions(state, item.lhs, token) + else + state.kernels.to_h {|kernel| [kernel, kernel.rule == item.rule && kernel.end_of_rule?] } + end + end + ] + } + end + + def compute_lhs_contributions(state, sym, token) + shift, next_state = state.nterm_transitions.find {|sh, _| sh.next_sym == sym } + if state.always_follows(shift, next_state).include?(token) + {} + else + state.kernels.to_h {|kernel| [kernel, state.follow_kernel?(kernel) && item_lookahead_set(state)[kernel].include?(token)] } + end + end + + def annotate_predecessor(state, next_state, annotation_list) + {state => annotation_list.to_h {|token, actions| + next [token, {}] if actions.empty? || actions.any? {|action, hash| + hash.keys.any? {|item| hash[item] && item.position == 1 && compute_lhs_contributions(state, item.lhs, token).empty? } + } + [token, actions.to_h {|action, hash| + [action, hash.to_h {|item, _| + kernel = state.kernels.find {|k| k.rule == item.rule && k.position == item.position - 1 } + [kernel, + hash[item] && + ( + !kernel.nil? && (item_lookahead_set(state)[kernel].include?(token)) || + (item.position == 1 && compute_lhs_contributions(state, item.lhs, token)[item]) + ) + ] + }] + }] + } + } + end end end From 9ae33e96650f84ed5b030ef0d5f0d9323613b118 Mon Sep 17 00:00:00 2001 From: Junichi Kobayashi Date: Fri, 3 May 2024 01:09:19 +0900 Subject: [PATCH 4/6] Parse --define options --- lib/lrama/command.rb | 2 +- lib/lrama/grammar.rb | 4 ++-- lib/lrama/option_parser.rb | 1 + lib/lrama/options.rb | 9 ++++++++- lib/lrama/parser.rb | 5 +++-- parser.y | 5 +++-- 6 files changed, 18 insertions(+), 8 deletions(-) diff --git a/lib/lrama/command.rb b/lib/lrama/command.rb index bf1a567b..7d41eef8 100644 --- a/lib/lrama/command.rb +++ b/lib/lrama/command.rb @@ -18,7 +18,7 @@ def run(argv) text = options.y.read options.y.close if options.y != STDIN begin - grammar = Lrama::Parser.new(text, options.grammar_file, options.debug).parse + grammar = Lrama::Parser.new(text, options.grammar_file, options.debug, options.define).parse unless grammar.no_stdlib stdlib_grammar = Lrama::Parser.new(File.read(STDLIB_FILE_PATH), STDLIB_FILE_PATH, options.debug).parse grammar.insert_before_parameterizing_rules(stdlib_grammar.parameterizing_rules) diff --git a/lib/lrama/grammar.rb b/lib/lrama/grammar.rb index ba11dc24..e4f764c1 100644 --- a/lib/lrama/grammar.rb +++ b/lib/lrama/grammar.rb @@ -39,7 +39,7 @@ class Grammar :fill_printer, :fill_destructor, :fill_error_token, :sort_by_number! - def initialize(rule_counter) + def initialize(rule_counter, define = {}) @rule_counter = rule_counter # Code defined by "%code" @@ -60,7 +60,7 @@ def initialize(rule_counter) @accept_symbol = nil @aux = Auxiliary.new @no_stdlib = false - @define = {} + @define = define append_special_symbols end diff --git a/lib/lrama/option_parser.rb b/lib/lrama/option_parser.rb index 1e4d448f..5dc4a25d 100644 --- a/lib/lrama/option_parser.rb +++ b/lib/lrama/option_parser.rb @@ -59,6 +59,7 @@ def parse_by_option_parser(argv) o.on('-S', '--skeleton=FILE', 'specify the skeleton to use') {|v| @options.skeleton = v } o.on('-t', 'reserved, do nothing') { } o.on('--debug', 'display debugging outputs of internal parser') {|v| @options.debug = true } + o.on('-D', '--define=NAME[=VALUE]', "similar to '%define NAME VALUE'") {|v| @options.define = v } o.separator '' o.separator 'Output:' o.on('-H', '--header=[FILE]', 'also produce a header file named FILE') {|v| @options.header = true; @options.header_file = v } diff --git a/lib/lrama/options.rb b/lib/lrama/options.rb index 739ca16f..84414021 100644 --- a/lib/lrama/options.rb +++ b/lib/lrama/options.rb @@ -5,10 +5,11 @@ class Options :report_file, :outfile, :error_recovery, :grammar_file, :trace_opts, :report_opts, :y, - :debug + :debug, :define def initialize @skeleton = "bison/yacc.c" + @define = {} @header = false @header_file = nil @report_file = nil @@ -20,5 +21,11 @@ def initialize @y = STDIN @debug = false end + + def define=(v) + v.split(',').each do |p_define| + @define.store *p_define.split('=') + end + end end end diff --git a/lib/lrama/parser.rb b/lib/lrama/parser.rb index d0ad540d..2c9f5619 100644 --- a/lib/lrama/parser.rb +++ b/lib/lrama/parser.rb @@ -662,17 +662,18 @@ class Parser < Racc::Parser include Lrama::Report::Duration -def initialize(text, path, debug = false) +def initialize(text, path, debug = false, define = {}) @grammar_file = Lrama::Lexer::GrammarFile.new(path, text) @yydebug = debug @rule_counter = Lrama::Grammar::Counter.new(0) @midrule_action_counter = Lrama::Grammar::Counter.new(1) + @define = define end def parse report_duration(:parse) do @lexer = Lrama::Lexer.new(@grammar_file) - @grammar = Lrama::Grammar.new(@rule_counter) + @grammar = Lrama::Grammar.new(@rule_counter, @define) @precedence_number = 0 reset_precs do_parse diff --git a/parser.y b/parser.y index 75a55c46..aaf6c951 100644 --- a/parser.y +++ b/parser.y @@ -536,17 +536,18 @@ end include Lrama::Report::Duration -def initialize(text, path, debug = false) +def initialize(text, path, debug = false, define = {}) @grammar_file = Lrama::Lexer::GrammarFile.new(path, text) @yydebug = debug @rule_counter = Lrama::Grammar::Counter.new(0) @midrule_action_counter = Lrama::Grammar::Counter.new(1) + @define = define end def parse report_duration(:parse) do @lexer = Lrama::Lexer.new(@grammar_file) - @grammar = Lrama::Grammar.new(@rule_counter) + @grammar = Lrama::Grammar.new(@rule_counter, @define) @precedence_number = 0 reset_precs do_parse From 7ac228cf7e1604f4f55dc9726e54b145a46a12b7 Mon Sep 17 00:00:00 2001 From: Junichi Kobayashi Date: Thu, 23 May 2024 01:53:44 +0900 Subject: [PATCH 5/6] Refactor predecessors --- lib/lrama/state.rb | 7 ++++++- lib/lrama/states.rb | 21 ++++++++++++++------- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/lib/lrama/state.rb b/lib/lrama/state.rb index f6063dec..a68b5b73 100644 --- a/lib/lrama/state.rb +++ b/lib/lrama/state.rb @@ -7,7 +7,7 @@ module Lrama class State attr_reader :id, :accessing_symbol, :kernels, :conflicts, :resolved_conflicts, - :default_reduction_rule, :closure, :items + :default_reduction_rule, :closure, :items, :predecessors attr_accessor :shifts, :reduces def initialize(id, accessing_symbol, kernels) @@ -21,6 +21,7 @@ def initialize(id, accessing_symbol, kernels) @conflicts = [] @resolved_conflicts = [] @default_reduction_rule = nil + @predecessors = [] end def closure=(closure) @@ -185,5 +186,9 @@ def follow_kernel?(item) def follow_kernel_items(shift, next_state, item) internal_dependencies(shift, next_state).any? {|shift, _| shift.next_sym == item.next_sym } && item.symbols_after_dot.all?(&:nullable) end + + def append_predecessor(prev_state) + @predecessors << prev_state + end end end diff --git a/lib/lrama/states.rb b/lib/lrama/states.rb index 99cd2df8..656a3e22 100644 --- a/lib/lrama/states.rb +++ b/lib/lrama/states.rb @@ -94,6 +94,7 @@ def compute end def compute_ielr + report_duration(:compute_predecessors) { compute_predecessors } report_duration(:split_states) { split_states } end @@ -557,12 +558,22 @@ def check_conflicts end end + def compute_predecessors + queue = [@states.first] + until queue.empty? + state = queue.shift + state.transitions.each do |_, next_state| + next_state.append_predecessor(state) + queue << next_state + end + end + end + def split_states @item_lookahead_set = {} @lalr_isocores = Hash.new {|hash, key| hash[key] = key } @ielr_isocores = Hash.new {|hash, key| hash[key] = [key] } @lookaheads_recomputed = Hash.new {|hash, key| hash[key] = false } - @predecessors = {} @states.each do |state| state.transitions.each do |shift, next_state| compute_state(state, shift, next_state) @@ -670,7 +681,7 @@ def item_lookahead_set(state) prev_state, prev_item = predecessor_with_item(state, item) item_lookahead_set(prev_state)[prev_item] elsif item.position == 1 - prev_state = predecessors(state).find {|p| p.shifts.any? {|shift| shift.next_sym == item.lhs } } + prev_state = state.predecessors.find {|p| p.shifts.any? {|shift| shift.next_sym == item.lhs } } shift, next_state = prev_state.nterm_transitions.find {|shift, _| shift.next_sym == item.lhs } goto_follows(prev_state, shift, next_state) else @@ -680,12 +691,8 @@ def item_lookahead_set(state) } end - def predecessors(state) - @predecessors[state] ||= @states.select {|prev| prev.transitions.any? {|_, to_state| to_state == state } } - end - def predecessor_with_item(state, item) - predecessors(state).each do |state| + state.predecessors.each do |state| state.kernels.each do |kernel| return [state, kernel] if kernel.rule == item.rule && kernel.position == item.position - 1 end From a49fe821c5889ef40598aa63753f0e20eb351d2e Mon Sep 17 00:00:00 2001 From: Junichi Kobayashi Date: Wed, 26 Jun 2024 02:15:30 +0900 Subject: [PATCH 6/6] wip --- lib/lrama/option_parser.rb | 2 +- lib/lrama/options.rb | 6 - lib/lrama/state.rb | 142 +++++++++++++++++++- lib/lrama/state/inadequacy_annotation.rb | 9 ++ lib/lrama/states.rb | 161 ++++++----------------- lib/lrama/states/item.rb | 4 + sample/calc.y | 1 + spec/fixtures/integration/calculator.y | 1 + spec/fixtures/integration/ielr.y | 62 +++++++++ spec/lrama/state_spec.rb | 29 ++++ spec/lrama/states_spec.rb | 13 ++ 11 files changed, 292 insertions(+), 138 deletions(-) create mode 100644 lib/lrama/state/inadequacy_annotation.rb create mode 100644 spec/fixtures/integration/ielr.y create mode 100644 spec/lrama/state_spec.rb diff --git a/lib/lrama/option_parser.rb b/lib/lrama/option_parser.rb index 5dc4a25d..71c55558 100644 --- a/lib/lrama/option_parser.rb +++ b/lib/lrama/option_parser.rb @@ -59,7 +59,7 @@ def parse_by_option_parser(argv) o.on('-S', '--skeleton=FILE', 'specify the skeleton to use') {|v| @options.skeleton = v } o.on('-t', 'reserved, do nothing') { } o.on('--debug', 'display debugging outputs of internal parser') {|v| @options.debug = true } - o.on('-D', '--define=NAME[=VALUE]', "similar to '%define NAME VALUE'") {|v| @options.define = v } + o.on('-D', '--define=NAME[=VALUE]', Array, "similar to '%define NAME VALUE'") {|v| @options.define = v } o.separator '' o.separator 'Output:' o.on('-H', '--header=[FILE]', 'also produce a header file named FILE') {|v| @options.header = true; @options.header_file = v } diff --git a/lib/lrama/options.rb b/lib/lrama/options.rb index 84414021..7d83420b 100644 --- a/lib/lrama/options.rb +++ b/lib/lrama/options.rb @@ -21,11 +21,5 @@ def initialize @y = STDIN @debug = false end - - def define=(v) - v.split(',').each do |p_define| - @define.store *p_define.split('=') - end - end end end diff --git a/lib/lrama/state.rb b/lib/lrama/state.rb index a68b5b73..8f3e95ad 100644 --- a/lib/lrama/state.rb +++ b/lib/lrama/state.rb @@ -3,12 +3,13 @@ require "lrama/state/resolved_conflict" require "lrama/state/shift" require "lrama/state/shift_reduce_conflict" +require "lrama/state/inadequacy_annotation" module Lrama class State attr_reader :id, :accessing_symbol, :kernels, :conflicts, :resolved_conflicts, :default_reduction_rule, :closure, :items, :predecessors - attr_accessor :shifts, :reduces + attr_accessor :shifts, :reduces, :lalr_isocore def initialize(id, accessing_symbol, kernels) @id = id @@ -22,6 +23,7 @@ def initialize(id, accessing_symbol, kernels) @resolved_conflicts = [] @default_reduction_rule = nil @predecessors = [] + @lalr_isocore = self end def closure=(closure) @@ -155,7 +157,7 @@ def internal_dependencies(shift, next_state) nterm_transitions.select {|other_shift, _| @items.find {|item| item.next_sym == shift.next_sym && item.lhs == other_shift.next_sym && item.symbols_after_dot.all?(&:nullable) } }.reduce([[shift, next_state]]) {|result, transition| - result += internal_follows(*transition) + result += internal_dependencies(*transition) } end @@ -167,16 +169,111 @@ def successor_dependencies(shift, next_state) } end + def inspect + "#{id} -> #{@kernels.map(&:to_s).join(', ')}" + end + def inadequacy_list return @inadequacy_list if @inadequacy_list - list = shifts.to_h {|shift| [shift.next_sym, [[shift, nil]]] } - reduces.each do |reduce| - reduce_list = (reduce.look_ahead || []).to_h {|sym| [sym, [[reduce, reduce.item]]] } - list.merge!(reduce_list) {|_, list_value, reduce_value| list_value + reduce_value } + shift_contributions = shifts.to_h {|shift| + [shift.next_sym, [shift]] + } + reduce_contributions = reduces.map {|reduce| + (reduce.look_ahead || []).to_h {|sym| + [sym, [reduce]] + } + }.reduce(Hash.new([])) {|hash, cont| + hash.merge(cont) {|_, a, b| a.union(b) } + } + + list = shift_contributions.merge(reduce_contributions) {|_, a, b| a.union(b) } + @inadequacy_list = list.select {|token, actions| token.term? && actions.size > 1 } + end + + def annotate_manifestation + inadequacy_list.map {|token, actions| + actions.map {|action| + if action.is_a?(Shift) + [InadequacyAnnotation.new(token: token, action: action, item: nil, contributed: false)] + elsif action.is_a?(Reduce) + if action.rule.empty_rule? + lhs_contributions(action.rule.lhs, token).map {|kernel, contributed| + InadequacyAnnotation.new(token: token, action: action, item: kernel, contributed: contributed) + } + else + kernels.map {|kernel| + contributed = kernel.rule == action.rule && kernel.end_of_rule? + InadequacyAnnotation.new(token: token, action: action, item: kernel, contributed: contributed) + } + end + end + } + } + end + + def annotate_predecessor(annotation_list) + annotation_list.reduce([]) {|annotation| + next [token, {}] if annotation.no_contributions? || actions.any? {|action, hash| + p action, hash + hash.keys.any? {|item| hash[item] && item.position == 1 && compute_lhs_contributions(state, item.lhs, token).empty? } + } + [ + token, actions.to_h {|action, hash| + [ + action, hash.to_h {|item, _| + kernel = state.kernels.find {|k| k.rule == item.rule && k.position == item.position - 1 } + [kernel, + hash[item] && + ( + !kernel.nil? && (state.item_lookahead_set[kernel].include?(token)) || + (item.position == 1 && compute_lhs_contributions(state, item.lhs, token)[item]) + ) + ] + } + ] + } + ] + } + end + + def item_lookahead_set + @item_lookahead_set ||= + kernels.to_h {|item| + value = + if item.position > 1 + prev_state, prev_item = predecessor_with_item(item) + prev_state.item_lookahead_set[prev_item] + elsif item.position == 1 + prev_state = predecessors.find {|p| p.shifts.any? {|shift| shift.next_sym == item.lhs } } + shift, next_state = prev_state.nterm_transitions.find {|shift, _| shift.next_sym == item.lhs } + prev_state.goto_follows(shift, next_state) + else + [] + end + [item, value] + } + end + + def item_lookahead_set=(k) + @item_lookahead_set = k + end + + def predecessor_with_item(item) + predecessors.each do |state| + state.kernels.each do |kernel| + return [state, kernel] if kernel.rule == item.rule && kernel.position == item.position - 1 + end end + end - @inadequacy_list = {self => list.select {|_, actions| actions.size > 1 }} + def lhs_contributions(sym, token) + shift, next_state = nterm_transitions.find {|sh, _| sh.next_sym == sym } + if always_follows(shift, next_state).include?(token) + [] + else + kernels.map {|kernel| [kernel, follow_kernel?(kernel) && item_lookahead_set[kernel].include?(token)] } + end end def follow_kernel?(item) @@ -187,8 +284,39 @@ def follow_kernel_items(shift, next_state, item) internal_dependencies(shift, next_state).any? {|shift, _| shift.next_sym == item.next_sym } && item.symbols_after_dot.all?(&:nullable) end + def next_terms + shifts.filter_map {|shift| shift.next_sym.term? && shift.next_sym } + end + def append_predecessor(prev_state) @predecessors << prev_state + @predecessors.uniq! + end + + def goto_follows(shift, next_state) + include_dependencies(shift, next_state).reduce([]) {|result, goto| + st, sh, next_st = goto + result.union(st.always_follows(sh, next_st)) + } + end + + def include_dependencies(shift, next_state) + internal = internal_dependencies(shift, next_state).map {|sh, next_st| [self, sh, next_st] } + pred = predecessor_dependencies(shift, next_state) + + return internal if pred.empty? + dependency = internal.union(pred) + + dependency.reduce(dependency) {|result, goto| result.union(compute_include_dependencies(*goto)) } + end + + def predecessor_dependencies(shift, next_state) + item = kernels.find {|kernel| kernel.next_sym == shift.next_sym } + return [] unless item.symbols_after_transition.all?(&:nullable) + + st = @predecessors.find {|p| p.items.find {|i| i.rule == item.rule && i.position == item.position - 1 } } + sh, next_st = s.nterm_transitions.find {|shift, _| shift.next_token == item.lhs } + [[s, sh, next_st]] end end end diff --git a/lib/lrama/state/inadequacy_annotation.rb b/lib/lrama/state/inadequacy_annotation.rb new file mode 100644 index 00000000..7a1f518d --- /dev/null +++ b/lib/lrama/state/inadequacy_annotation.rb @@ -0,0 +1,9 @@ +module Lrama + class State + class InadequacyAnnotation < Struct.new(:token, :action, :item, :contributed, keyword_init: true) + def no_contributions? + item.nil? && !contributed + end + end + end +end diff --git a/lib/lrama/states.rb b/lib/lrama/states.rb index 656a3e22..7ca627f7 100644 --- a/lib/lrama/states.rb +++ b/lib/lrama/states.rb @@ -96,6 +96,14 @@ def compute def compute_ielr report_duration(:compute_predecessors) { compute_predecessors } report_duration(:split_states) { split_states } + @states.each {|state| p state, state.transitions, state.item_lookahead_set } + report_duration(:compute_direct_read_sets) { compute_direct_read_sets } + report_duration(:compute_reads_relation) { compute_reads_relation } + report_duration(:compute_read_sets) { compute_read_sets } + report_duration(:compute_includes_relation) { compute_includes_relation } + report_duration(:compute_lookback_relation) { compute_lookback_relation } + report_duration(:compute_follow_sets) { compute_follow_sets } + report_duration(:compute_look_ahead_sets) { compute_look_ahead_sets } end def reporter @@ -570,19 +578,23 @@ def compute_predecessors end def split_states - @item_lookahead_set = {} - @lalr_isocores = Hash.new {|hash, key| hash[key] = key } @ielr_isocores = Hash.new {|hash, key| hash[key] = [key] } @lookaheads_recomputed = Hash.new {|hash, key| hash[key] = false } - @states.each do |state| - state.transitions.each do |shift, next_state| - compute_state(state, shift, next_state) + transition_queue = [] + @states.first.transitions.each do |shift, next_state| + transition_queue << [@states.first, shift, next_state] + end + until transition_queue.empty? + state, shift, next_state = transition_queue.shift + compute_state(state, shift, next_state) + next_state.transitions.each do |sh, next_st| + transition_queue << [next_state, sh, next_st] end end end def merge_lookaheads(state, k) - return if state.kernels.all? {|item| (k[item] - item_lookahead_set(state)[item]).empty? } + return if state.kernels.all? {|item| (k[item] - state.item_lookahead_set[item]).empty? } state.transitions.each do |shift, next_state| next if @lookaheads_recomputed[next_state] @@ -603,18 +615,19 @@ def compute_state(state, shift, next_state) new_state.set_items_to_state(sh.next_items, next_state) end @states << new_state - @lalr_isocores[new_state] = s + new_state.lalr_isocore = s @ielr_isocores[s] << new_state @ielr_isocores[s].each do |st| @ielr_isocores[st] = @ielr_isocores[s] end @lookaheads_recomputed[new_state] = true - @item_lookahead_set[new_state] = k + new_state.item_lookahead_set = k state.update_transition(shift, new_state) elsif(!@lookaheads_recomputed[s]) - @item_lookahead_set[s] = k + s.item_lookahead_set = k @lookaheads_recomputed[s] = true else + state.update_transition(shift, s) merge_lookaheads(s, k) end end @@ -623,29 +636,33 @@ def propagate_lookaheads(state, next_state) next_state.kernels.to_h {|item| lookahead_sets = if item.position == 1 - compute_goto_follow_set(@lalr_isocores[state], item.lhs) + compute_goto_follow_set(state.lalr_isocore, item.lhs) else kernel = state.kernels.find {|k| k.rule == item.rule && k.position == item.position - 1 } - item_lookahead_set(state)[kernel] + state.item_lookahead_set[kernel] end + # p [state, lookahead_sets, lookahead_set_filters(next_state)[item]] + [item, lookahead_sets & lookahead_set_filters(next_state)[item]] } end def lookahead_set_filters(state) + p state state.kernels.to_h {|kernel| + # p [state, kernel, annotation_list(@lalr_isocores[state])] [kernel, - annotation_list(@lalr_isocores[state])[@lalr_isocores[state]].filter_map {|token, actions| + annotation_list(state.lalr_isocore).filter_map {|token, actions| token if token.term? && actions.any? {|item, _| item == kernel } }] } end def is_compatible(state, k) - @lookaheads_recomputed[state] || - annotation_list(@lalr_isocores[state])[@lalr_isocores[state]].all? {|token, actions| - a = dominant_contribution(state, token, actions, item_lookahead_set(state)) + !@lookaheads_recomputed[state] || + annotation_list(state.lalr_isocores).all? {|token, actions| + a = dominant_contribution(state, token, actions, state.item_lookahead_set) b = dominant_contribution(state, token, actions, k) a.empty? || b.empty? || a == b } @@ -669,119 +686,15 @@ def compute_goto_follow_set(state, nterm_token) state.always_follows(shift, next_state).union(state.kernels.select {|item| state.follow_kernel_items(shift, next_state, item) }.reduce([]) {|result, item| - result.union(item_lookahead_set(state)[item]) + result.union(state.item_lookahead_set[item]) }) end - def item_lookahead_set(state) - @item_lookahead_set[state] ||= - state.kernels.to_h {|item| - value = - if item.position > 1 - prev_state, prev_item = predecessor_with_item(state, item) - item_lookahead_set(prev_state)[prev_item] - elsif item.position == 1 - prev_state = state.predecessors.find {|p| p.shifts.any? {|shift| shift.next_sym == item.lhs } } - shift, next_state = prev_state.nterm_transitions.find {|shift, _| shift.next_sym == item.lhs } - goto_follows(prev_state, shift, next_state) - else - [] - end - [item, value] - } - end - - def predecessor_with_item(state, item) - state.predecessors.each do |state| - state.kernels.each do |kernel| - return [state, kernel] if kernel.rule == item.rule && kernel.position == item.position - 1 - end - end - end - - def goto_follows(state, shift, next_state) - compute_include_dependencies(state, shift, next_state).reduce([]) {|result, goto| - st, sh, next_st = goto - result.union(st.always_follows(sh, next_st)) - } - end - - def compute_include_dependencies(state, shift, next_state) - internal = state.internal_dependencies(shift, next_state).map {|sh, next_st| [state, sh, next_st] } - - item = state.kernels.find {|kernel| kernel.next_sym == shift.next_sym } - return internal unless item.symbols_after_dot.all?(&:nullable) - - s, i = state, item - while i.position > 0 - s = predecessors(s).find {|p| p.kernels.find {|item| item.rule == i.rule && item.position == i.position - 1 } } - i = s.kernels.find {|item| item.rule == i.rule && item.position == i.position - 1 } - end - - p_shift, p_next_state = s.transitions.find {|sh, _| sh.next_sym == item.lhs } - internal.union([[s, p_shift, p_next_state]]) - end - def annotation_list(state) - manifestations = state.inadequacy_list.transform_values {|hash| hash.to_h {|token, actions| [token, annotate_manifestation(state, token, actions)] } } - state.transitions.reduce(manifestations) {|item, transition| - item.merge(annotate_predecessor(state, transition[1], annotation_list(transition[1])[transition[1]])) {|state, annotations, other_annotations| - annotations.merge(other_annotations) {|token, actions, other_actions| - actions.merge(other_actions) {|action, items, other_items| - items.merge(other_items) {|item, bool, other_bool| - raise if bool != other_bool - bool - } - } - } - } - } - end - - def annotate_manifestation(state, token, actions) - actions.to_h {|action, item| - [action, - if action.is_a?(State::Shift) - {} - elsif action.is_a?(State::Reduce) - if item.empty_rule? - compute_lhs_contributions(state, item.lhs, token) - else - state.kernels.to_h {|kernel| [kernel, kernel.rule == item.rule && kernel.end_of_rule?] } - end - end - ] - } - end - - def compute_lhs_contributions(state, sym, token) - shift, next_state = state.nterm_transitions.find {|sh, _| sh.next_sym == sym } - if state.always_follows(shift, next_state).include?(token) - {} - else - state.kernels.to_h {|kernel| [kernel, state.follow_kernel?(kernel) && item_lookahead_set(state)[kernel].include?(token)] } - end - end - - def annotate_predecessor(state, next_state, annotation_list) - {state => annotation_list.to_h {|token, actions| - next [token, {}] if actions.empty? || actions.any? {|action, hash| - hash.keys.any? {|item| hash[item] && item.position == 1 && compute_lhs_contributions(state, item.lhs, token).empty? } - } - [token, actions.to_h {|action, hash| - [action, hash.to_h {|item, _| - kernel = state.kernels.find {|k| k.rule == item.rule && k.position == item.position - 1 } - [kernel, - hash[item] && - ( - !kernel.nil? && (item_lookahead_set(state)[kernel].include?(token)) || - (item.position == 1 && compute_lhs_contributions(state, item.lhs, token)[item]) - ) - ] - }] - }] - } - } + manifestations = state.annotate_manifestation + predecessors = state.transitions.map {|_, next_state| state.annotate_predecessor(annotation_list(next_state)) } + p state, state.inadequacy_list, manifestations, predecessors + manifestations + predecessors end end end diff --git a/lib/lrama/states/item.rb b/lib/lrama/states/item.rb index 31b74b9d..8a36bc1e 100644 --- a/lib/lrama/states/item.rb +++ b/lib/lrama/states/item.rb @@ -62,6 +62,10 @@ def symbols_after_dot rhs[position..-1] end + def symbols_after_transition + rhs[position+1..-1] + end + def to_s "#{lhs.id.s_value}: #{display_name}" end diff --git a/sample/calc.y b/sample/calc.y index 5c291105..10da33b2 100644 --- a/sample/calc.y +++ b/sample/calc.y @@ -25,6 +25,7 @@ static int yyerror(YYLTYPE *loc, const char *str); %union { int val; } +%define lr.type ielr %token LF %token NUM %type expr diff --git a/spec/fixtures/integration/calculator.y b/spec/fixtures/integration/calculator.y index 21e38ea8..6dffb307 100644 --- a/spec/fixtures/integration/calculator.y +++ b/spec/fixtures/integration/calculator.y @@ -16,6 +16,7 @@ static int yyerror(YYLTYPE *loc, const char *str); %type expr %left '+' '-' %left '*' '/' +%define lr.type ielr %% diff --git a/spec/fixtures/integration/ielr.y b/spec/fixtures/integration/ielr.y new file mode 100644 index 00000000..d8680a30 --- /dev/null +++ b/spec/fixtures/integration/ielr.y @@ -0,0 +1,62 @@ +%{ +#include +#include +#include "y.tab.h" +#define YYDEBUG 1 +static int yylex(YYSTYPE *val, YYLTYPE *loc); +static int yyerror(YYLTYPE *loc, const char *str); +%} + +%union { + int val; +} + +%token a +%token b +%token c +%define lr.type ielr + +%% +S: a A B a + | b A B b +A: a C D E +B: c + | // empty +C: D +D: a +E: a + | // empty + +%% + +static int yylex(YYSTYPE *yylval, YYLTYPE *loc) { + int c = getchar(); + printf("%c\n", c); + int val; + + switch (c) { + case ' ': case '\t': + return yylex(yylval, loc); + + case 'a': case 'b': case 'c': + return c; + + case '\n': + exit(0); + + default: + fprintf(stderr, "unknown character: %c\n", c); + exit(1); + } +} + +static int yyerror(YYLTYPE *loc, const char *str) { + fprintf(stderr, "parse error: %s\n", str); + return 0; +} + +int main() { + printf("Enter the formula:\n"); + yyparse(); + return 0; +} diff --git a/spec/lrama/state_spec.rb b/spec/lrama/state_spec.rb new file mode 100644 index 00000000..df20b1df --- /dev/null +++ b/spec/lrama/state_spec.rb @@ -0,0 +1,29 @@ +RSpec.describe Lrama::State do + let(:grammar) { <<-FILE } + %union { + int val; + } + + %token a + %token b + %token c + %define lr.type ielr + + %% + S: a A B a + | b A B b + A: a C D E + B: c + | // empty + C: D + D: a + E: a + | // empty + %% + FILE + + + describe '#internal_dependencies' do + + end +end diff --git a/spec/lrama/states_spec.rb b/spec/lrama/states_spec.rb index e611c22c..22825321 100644 --- a/spec/lrama/states_spec.rb +++ b/spec/lrama/states_spec.rb @@ -1910,4 +1910,17 @@ class : keyword_class tSTRING keyword_end %prec tPLUS end end end + + describe '#compute_ielr' do + it 'recompute states' do + path = "integration/ielr.y" + y = File.read(fixture_path(path)) + grammar = Lrama::Parser.new(y, path).parse + grammar.prepare + grammar.validate! + states = Lrama::States.new(grammar, warning) + states.compute + states.compute_ielr + end + end end