diff --git a/lib/lrama/command.rb b/lib/lrama/command.rb index 0095c1a1..3ff39d57 100644 --- a/lib/lrama/command.rb +++ b/lib/lrama/command.rb @@ -19,7 +19,7 @@ def run(argv) text = options.y.read options.y.close if options.y != STDIN begin - grammar = Lrama::Parser.new(text, options.grammar_file, options.debug).parse + grammar = Lrama::Parser.new(text, options.grammar_file, options.debug, options.define).parse unless grammar.no_stdlib stdlib_grammar = Lrama::Parser.new(File.read(STDLIB_FILE_PATH), STDLIB_FILE_PATH, options.debug).parse grammar.insert_before_parameterizing_rules(stdlib_grammar.parameterizing_rules) @@ -34,6 +34,7 @@ def run(argv) end states = Lrama::States.new(grammar, trace_state: (options.trace_opts[:automaton] || options.trace_opts[:closure])) states.compute + states.compute_ielr if grammar.ielr_defined? context = Lrama::Context.new(states) if options.report_file diff --git a/lib/lrama/grammar.rb b/lib/lrama/grammar.rb index 3724f828..955d47dd 100644 --- a/lib/lrama/grammar.rb +++ b/lib/lrama/grammar.rb @@ -28,14 +28,14 @@ class Grammar attr_reader :percent_codes, :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux, :parameterizing_rule_resolver attr_accessor :union, :expect, :printers, :error_tokens, :lex_param, :parse_param, :initial_action, :after_shift, :before_reduce, :after_reduce, :after_shift_error_token, :after_pop_stack, - :symbols_resolver, :types, :rules, :rule_builders, :sym_to_rules, :no_stdlib, :locations + :symbols_resolver, :types, :rules, :rule_builders, :sym_to_rules, :no_stdlib, :locations, :define def_delegators "@symbols_resolver", :symbols, :nterms, :terms, :add_nterm, :add_term, :find_symbol_by_number!, :find_symbol_by_id!, :token_to_symbol, :find_symbol_by_s_value!, :fill_symbol_number, :fill_nterm_type, :fill_printer, :fill_destructor, :fill_error_token, :sort_by_number! - def initialize(rule_counter) + def initialize(rule_counter, define = {}) @rule_counter = rule_counter # Code defined by "%code" @@ -57,6 +57,7 @@ def initialize(rule_counter) @aux = Auxiliary.new @no_stdlib = false @locations = false + @define = define append_special_symbols end @@ -171,6 +172,10 @@ def find_rules_by_symbol(sym) @sym_to_rules[sym.number] end + def ielr_defined? + @define.key?('lr.type') && @define['lr.type'] == 'ielr' + end + private def compute_nullable diff --git a/lib/lrama/option_parser.rb b/lib/lrama/option_parser.rb index 0727d1b3..d6f4b32b 100644 --- a/lib/lrama/option_parser.rb +++ b/lib/lrama/option_parser.rb @@ -61,6 +61,7 @@ def parse_by_option_parser(argv) o.on('-S', '--skeleton=FILE', 'specify the skeleton to use') {|v| @options.skeleton = v } o.on('-t', 'reserved, do nothing') { } o.on('--debug', 'display debugging outputs of internal parser') {|v| @options.debug = true } + o.on('-D', '--define=NAME[=VALUE]', Array, "similar to '%define NAME VALUE'") {|v| @options.define = v } o.separator '' o.separator 'Output:' o.on('-H', '--header=[FILE]', 'also produce a header file named FILE') {|v| @options.header = true; @options.header_file = v } diff --git a/lib/lrama/options.rb b/lib/lrama/options.rb index ccd76803..08f75a77 100644 --- a/lib/lrama/options.rb +++ b/lib/lrama/options.rb @@ -7,10 +7,11 @@ class Options :report_file, :outfile, :error_recovery, :grammar_file, :trace_opts, :report_opts, - :diagnostic, :y, :debug + :diagnostic, :y, :debug, :define def initialize @skeleton = "bison/yacc.c" + @define = {} @header = false @header_file = nil @report_file = nil diff --git a/lib/lrama/parser.rb b/lib/lrama/parser.rb index 6a35dba2..589e3389 100644 --- a/lib/lrama/parser.rb +++ b/lib/lrama/parser.rb @@ -658,17 +658,18 @@ class Parser < Racc::Parser include Lrama::Report::Duration -def initialize(text, path, debug = false) +def initialize(text, path, debug = false, define = {}) @grammar_file = Lrama::Lexer::GrammarFile.new(path, text) @yydebug = debug @rule_counter = Lrama::Grammar::Counter.new(0) @midrule_action_counter = Lrama::Grammar::Counter.new(1) + @define = define end def parse report_duration(:parse) do @lexer = Lrama::Lexer.new(@grammar_file) - @grammar = Lrama::Grammar.new(@rule_counter) + @grammar = Lrama::Grammar.new(@rule_counter, @define) @precedence_number = 0 reset_precs do_parse @@ -906,7 +907,7 @@ def raise_parse_error(error_message, location) 2, 71, :_reduce_15, 1, 58, :_reduce_none, 2, 58, :_reduce_17, - 3, 58, :_reduce_none, + 3, 58, :_reduce_18, 2, 58, :_reduce_none, 2, 58, :_reduce_20, 2, 58, :_reduce_21, @@ -1313,7 +1314,12 @@ def _reduce_17(val, _values, result) end .,., -# reduce 18 omitted +module_eval(<<'.,.,', 'parser.y', 27) + def _reduce_18(val, _values, result) + @grammar.define[val[1].s_value] = val[2]&.s_value + result + end +.,., # reduce 19 omitted diff --git a/lib/lrama/state.rb b/lib/lrama/state.rb index c2623746..5123d136 100644 --- a/lib/lrama/state.rb +++ b/lib/lrama/state.rb @@ -5,12 +5,13 @@ require_relative "state/resolved_conflict" require_relative "state/shift" require_relative "state/shift_reduce_conflict" +require_relative "state/inadequacy_annotation" module Lrama class State attr_reader :id, :accessing_symbol, :kernels, :conflicts, :resolved_conflicts, - :default_reduction_rule, :closure, :items - attr_accessor :shifts, :reduces + :default_reduction_rule, :closure, :items, :predecessors + attr_accessor :shifts, :reduces, :lalr_isocore def initialize(id, accessing_symbol, kernels) @id = id @@ -23,6 +24,8 @@ def initialize(id, accessing_symbol, kernels) @conflicts = [] @resolved_conflicts = [] @default_reduction_rule = nil + @predecessors = [] + @lalr_isocore = self end def closure=(closure) @@ -84,6 +87,11 @@ def transitions @transitions ||= shifts.map {|shift| [shift, @items_to_state[shift.next_items]] } end + def update_transition(shift, next_state) + set_items_to_state(shift.next_items, next_state) + @transitions = shifts.map {|sh| [sh, @items_to_state[sh.next_items]] } + end + def selected_term_transitions term_transitions.reject do |shift, next_state| shift.not_selected @@ -142,5 +150,175 @@ def rr_conflicts conflict.type == :reduce_reduce end end + + def always_follows(shift, next_state) + internal_dependencies(shift, next_state).union(successor_dependencies(shift, next_state)).reduce([]) {|result, transition| result += transition[1].term_transitions.map {|shift, _| shift.next_sym } } + end + + def internal_dependencies(shift, next_state) + nterm_transitions.select {|other_shift, _| + @items.find {|item| item.next_sym == shift.next_sym && item.lhs == other_shift.next_sym && item.symbols_after_dot.all?(&:nullable) } + }.reduce([[shift, next_state]]) {|result, transition| + result += internal_dependencies(*transition) + } + end + + def successor_dependencies(shift, next_state) + next_state.nterm_transitions.select {|other_shift, _| + other_shift.next_sym.nullable + }.reduce([[shift, next_state]]) {|result, transition| + result += successor_dependencies(*transition) + } + end + + def inspect + "#{id} -> #{@kernels.map(&:to_s).join(', ')}" + end + + def inadequacy_list + return @inadequacy_list if @inadequacy_list + + shift_contributions = shifts.to_h {|shift| + [shift.next_sym, [shift]] + } + reduce_contributions = reduces.map {|reduce| + (reduce.look_ahead || []).to_h {|sym| + [sym, [reduce]] + } + }.reduce(Hash.new([])) {|hash, cont| + hash.merge(cont) {|_, a, b| a.union(b) } + } + + list = shift_contributions.merge(reduce_contributions) {|_, a, b| a.union(b) } + @inadequacy_list = list.select {|token, actions| token.term? && actions.size > 1 } + end + + def annotate_manifestation + inadequacy_list.map {|token, actions| + actions.map {|action| + if action.is_a?(Shift) + [InadequacyAnnotation.new(token: token, action: action, item: nil, contributed: false)] + elsif action.is_a?(Reduce) + if action.rule.empty_rule? + lhs_contributions(action.rule.lhs, token).map {|kernel, contributed| + InadequacyAnnotation.new(token: token, action: action, item: kernel, contributed: contributed) + } + else + kernels.map {|kernel| + contributed = kernel.rule == action.rule && kernel.end_of_rule? + InadequacyAnnotation.new(token: token, action: action, item: kernel, contributed: contributed) + } + end + end + } + } + end + + def annotate_predecessor(annotation_list) + annotation_list.reduce([]) {|annotation| + next [token, {}] if annotation.no_contributions? || actions.any? {|action, hash| + p action, hash + hash.keys.any? {|item| hash[item] && item.position == 1 && compute_lhs_contributions(state, item.lhs, token).empty? } + } + [ + token, actions.to_h {|action, hash| + [ + action, hash.to_h {|item, _| + kernel = state.kernels.find {|k| k.rule == item.rule && k.position == item.position - 1 } + [kernel, + hash[item] && + ( + !kernel.nil? && (state.item_lookahead_set[kernel].include?(token)) || + (item.position == 1 && compute_lhs_contributions(state, item.lhs, token)[item]) + ) + ] + } + ] + } + ] + } + end + + def item_lookahead_set + @item_lookahead_set ||= + kernels.to_h {|item| + value = + if item.position > 1 + prev_state, prev_item = predecessor_with_item(item) + prev_state.item_lookahead_set[prev_item] + elsif item.position == 1 + prev_state = predecessors.find {|p| p.shifts.any? {|shift| shift.next_sym == item.lhs } } + shift, next_state = prev_state.nterm_transitions.find {|shift, _| shift.next_sym == item.lhs } + prev_state.goto_follows(shift, next_state) + else + [] + end + [item, value] + } + end + + def item_lookahead_set=(k) + @item_lookahead_set = k + end + + def predecessor_with_item(item) + predecessors.each do |state| + state.kernels.each do |kernel| + return [state, kernel] if kernel.rule == item.rule && kernel.position == item.position - 1 + end + end + end + + def lhs_contributions(sym, token) + shift, next_state = nterm_transitions.find {|sh, _| sh.next_sym == sym } + if always_follows(shift, next_state).include?(token) + [] + else + kernels.map {|kernel| [kernel, follow_kernel?(kernel) && item_lookahead_set[kernel].include?(token)] } + end + end + + def follow_kernel?(item) + item.symbols_after_dot.all?(&:nullable) + end + + def follow_kernel_items(shift, next_state, item) + internal_dependencies(shift, next_state).any? {|shift, _| shift.next_sym == item.next_sym } && item.symbols_after_dot.all?(&:nullable) + end + + def next_terms + shifts.filter_map {|shift| shift.next_sym.term? && shift.next_sym } + end + + def append_predecessor(prev_state) + @predecessors << prev_state + @predecessors.uniq! + end + + def goto_follows(shift, next_state) + include_dependencies(shift, next_state).reduce([]) {|result, goto| + st, sh, next_st = goto + result.union(st.always_follows(sh, next_st)) + } + end + + def include_dependencies(shift, next_state) + internal = internal_dependencies(shift, next_state).map {|sh, next_st| [self, sh, next_st] } + pred = predecessor_dependencies(shift, next_state) + + return internal if pred.empty? + dependency = internal.union(pred) + + dependency.reduce(dependency) {|result, goto| result.union(compute_include_dependencies(*goto)) } + end + + def predecessor_dependencies(shift, next_state) + item = kernels.find {|kernel| kernel.next_sym == shift.next_sym } + return [] unless item.symbols_after_transition.all?(&:nullable) + + st = @predecessors.find {|p| p.items.find {|i| i.rule == item.rule && i.position == item.position - 1 } } + sh, next_st = s.nterm_transitions.find {|shift, _| shift.next_token == item.lhs } + [[s, sh, next_st]] + end end end diff --git a/lib/lrama/state/inadequacy_annotation.rb b/lib/lrama/state/inadequacy_annotation.rb new file mode 100644 index 00000000..7a1f518d --- /dev/null +++ b/lib/lrama/state/inadequacy_annotation.rb @@ -0,0 +1,9 @@ +module Lrama + class State + class InadequacyAnnotation < Struct.new(:token, :action, :item, :contributed, keyword_init: true) + def no_contributions? + item.nil? && !contributed + end + end + end +end diff --git a/lib/lrama/states.rb b/lib/lrama/states.rb index 0ed4bff9..ecfd5a86 100644 --- a/lib/lrama/states.rb +++ b/lib/lrama/states.rb @@ -92,6 +92,19 @@ def compute report_duration(:compute_default_reduction) { compute_default_reduction } end + def compute_ielr + report_duration(:compute_predecessors) { compute_predecessors } + report_duration(:split_states) { split_states } + @states.each {|state| p state, state.transitions, state.item_lookahead_set } + report_duration(:compute_direct_read_sets) { compute_direct_read_sets } + report_duration(:compute_reads_relation) { compute_reads_relation } + report_duration(:compute_read_sets) { compute_read_sets } + report_duration(:compute_includes_relation) { compute_includes_relation } + report_duration(:compute_lookback_relation) { compute_lookback_relation } + report_duration(:compute_follow_sets) { compute_follow_sets } + report_duration(:compute_look_ahead_sets) { compute_look_ahead_sets } + end + def reporter StatesReporter.new(self) end @@ -524,5 +537,136 @@ def compute_default_reduction end.first end end + + def compute_predecessors + queue = [@states.first] + until queue.empty? + state = queue.shift + state.transitions.each do |_, next_state| + next_state.append_predecessor(state) + queue << next_state + end + end + end + + def split_states + @ielr_isocores = Hash.new {|hash, key| hash[key] = [key] } + @lookaheads_recomputed = Hash.new {|hash, key| hash[key] = false } + transition_queue = [] + @states.first.transitions.each do |shift, next_state| + transition_queue << [@states.first, shift, next_state] + end + until transition_queue.empty? + state, shift, next_state = transition_queue.shift + compute_state(state, shift, next_state) + next_state.transitions.each do |sh, next_st| + transition_queue << [next_state, sh, next_st] + end + end + end + + def merge_lookaheads(state, k) + return if state.kernels.all? {|item| (k[item] - state.item_lookahead_set[item]).empty? } + + state.transitions.each do |shift, next_state| + next if @lookaheads_recomputed[next_state] + compute_state(state, shift, next_state) + end + end + + def compute_state(state, shift, next_state) + k = propagate_lookaheads(state, next_state) + s = @ielr_isocores[next_state].find {|st| is_compatible(st, k) } + + if s.nil? + s = @ielr_isocores[next_state].last + new_state = State.new(@states.count, s.accessing_symbol, s.kernels) + new_state.closure = s.closure + new_state.compute_shifts_reduces + s.transitions.each do |sh, next_state| + new_state.set_items_to_state(sh.next_items, next_state) + end + @states << new_state + new_state.lalr_isocore = s + @ielr_isocores[s] << new_state + @ielr_isocores[s].each do |st| + @ielr_isocores[st] = @ielr_isocores[s] + end + @lookaheads_recomputed[new_state] = true + new_state.item_lookahead_set = k + state.update_transition(shift, new_state) + elsif(!@lookaheads_recomputed[s]) + s.item_lookahead_set = k + @lookaheads_recomputed[s] = true + else + state.update_transition(shift, s) + merge_lookaheads(s, k) + end + end + + def propagate_lookaheads(state, next_state) + next_state.kernels.to_h {|item| + lookahead_sets = + if item.position == 1 + compute_goto_follow_set(state.lalr_isocore, item.lhs) + else + kernel = state.kernels.find {|k| k.rule == item.rule && k.position == item.position - 1 } + state.item_lookahead_set[kernel] + end + + # p [state, lookahead_sets, lookahead_set_filters(next_state)[item]] + + [item, lookahead_sets & lookahead_set_filters(next_state)[item]] + } + end + + def lookahead_set_filters(state) + p state + state.kernels.to_h {|kernel| + # p [state, kernel, annotation_list(@lalr_isocores[state])] + [kernel, + annotation_list(state.lalr_isocore).filter_map {|token, actions| + token if token.term? && actions.any? {|item, _| item == kernel } + }] + } + end + + def is_compatible(state, k) + !@lookaheads_recomputed[state] || + annotation_list(state.lalr_isocores).all? {|token, actions| + a = dominant_contribution(state, token, actions, state.item_lookahead_set) + b = dominant_contribution(state, token, actions, k) + a.empty? || b.empty? || a == b + } + end + + def dominant_contribution(state, token, actions, lookaheads) + actions.filter_map {|action, items| + action if items.empty? || items.any? {|item, bool| bool && lookaheads[item].include?(token) } + }.reject {|action| + if action.is_a?(State::Shift) + action.not_selected + elsif action.is_a?(State::Reduce) + action.not_selected_symbols.include?(token) + end + } + end + + def compute_goto_follow_set(state, nterm_token) + shift, next_state = state.nterm_transitions.find {|shift, _| shift.next_sym == nterm_token } + return [] if shift.nil? && nterm_token.id.s_value == '$accept' + state.always_follows(shift, next_state).union(state.kernels.select {|item| + state.follow_kernel_items(shift, next_state, item) + }.reduce([]) {|result, item| + result.union(state.item_lookahead_set[item]) + }) + end + + def annotation_list(state) + manifestations = state.annotate_manifestation + predecessors = state.transitions.map {|_, next_state| state.annotate_predecessor(annotation_list(next_state)) } + p state, state.inadequacy_list, manifestations, predecessors + manifestations + predecessors + end end end diff --git a/lib/lrama/states/item.rb b/lib/lrama/states/item.rb index 5074e943..08a8df91 100644 --- a/lib/lrama/states/item.rb +++ b/lib/lrama/states/item.rb @@ -64,6 +64,10 @@ def symbols_after_dot # steep:ignore rhs[position..-1] end + def symbols_after_transition + rhs[position+1..-1] + end + def to_s "#{lhs.id.s_value}: #{display_name}" end diff --git a/parser.y b/parser.y index 52603a72..cadc4cb6 100644 --- a/parser.y +++ b/parser.y @@ -25,7 +25,7 @@ rule bison_declaration: grammar_declaration | "%expect" INTEGER { @grammar.expect = val[1] } - | "%define" variable value + | "%define" variable value { @grammar.define[val[1].s_value] = val[2]&.s_value } | "%param" param+ | "%lex-param" param+ { @@ -417,17 +417,18 @@ end include Lrama::Report::Duration -def initialize(text, path, debug = false) +def initialize(text, path, debug = false, define = {}) @grammar_file = Lrama::Lexer::GrammarFile.new(path, text) @yydebug = debug @rule_counter = Lrama::Grammar::Counter.new(0) @midrule_action_counter = Lrama::Grammar::Counter.new(1) + @define = define end def parse report_duration(:parse) do @lexer = Lrama::Lexer.new(@grammar_file) - @grammar = Lrama::Grammar.new(@rule_counter) + @grammar = Lrama::Grammar.new(@rule_counter, @define) @precedence_number = 0 reset_precs do_parse diff --git a/sample/calc.y b/sample/calc.y index b4c4ca30..f9915d87 100644 --- a/sample/calc.y +++ b/sample/calc.y @@ -25,6 +25,7 @@ static int yyerror(YYLTYPE *loc, const char *str); %union { int val; } +%define lr.type ielr %token LF %token NUM %type expr diff --git a/spec/fixtures/integration/calculator.y b/spec/fixtures/integration/calculator.y index 54b86b7a..d3ec5c06 100644 --- a/spec/fixtures/integration/calculator.y +++ b/spec/fixtures/integration/calculator.y @@ -16,6 +16,7 @@ static int yyerror(YYLTYPE *loc, const char *str); %type expr %left '+' '-' %left '*' '/' +%define lr.type ielr %locations diff --git a/spec/fixtures/integration/ielr.y b/spec/fixtures/integration/ielr.y new file mode 100644 index 00000000..d8680a30 --- /dev/null +++ b/spec/fixtures/integration/ielr.y @@ -0,0 +1,62 @@ +%{ +#include +#include +#include "y.tab.h" +#define YYDEBUG 1 +static int yylex(YYSTYPE *val, YYLTYPE *loc); +static int yyerror(YYLTYPE *loc, const char *str); +%} + +%union { + int val; +} + +%token a +%token b +%token c +%define lr.type ielr + +%% +S: a A B a + | b A B b +A: a C D E +B: c + | // empty +C: D +D: a +E: a + | // empty + +%% + +static int yylex(YYSTYPE *yylval, YYLTYPE *loc) { + int c = getchar(); + printf("%c\n", c); + int val; + + switch (c) { + case ' ': case '\t': + return yylex(yylval, loc); + + case 'a': case 'b': case 'c': + return c; + + case '\n': + exit(0); + + default: + fprintf(stderr, "unknown character: %c\n", c); + exit(1); + } +} + +static int yyerror(YYLTYPE *loc, const char *str) { + fprintf(stderr, "parse error: %s\n", str); + return 0; +} + +int main() { + printf("Enter the formula:\n"); + yyparse(); + return 0; +} diff --git a/spec/lrama/parser_spec.rb b/spec/lrama/parser_spec.rb index 8b14ee5f..db6c2205 100644 --- a/spec/lrama/parser_spec.rb +++ b/spec/lrama/parser_spec.rb @@ -59,6 +59,7 @@ CODE expect(grammar.expect).to eq(0) + expect(grammar.define).to eq({'api.pure' => nil, 'parse.error' => 'verbose'}) expect(grammar.printers).to eq([ Printer.new( ident_or_tags: [T::Tag.new(s_value: "")], diff --git a/spec/lrama/state_spec.rb b/spec/lrama/state_spec.rb new file mode 100644 index 00000000..df20b1df --- /dev/null +++ b/spec/lrama/state_spec.rb @@ -0,0 +1,29 @@ +RSpec.describe Lrama::State do + let(:grammar) { <<-FILE } + %union { + int val; + } + + %token a + %token b + %token c + %define lr.type ielr + + %% + S: a A B a + | b A B b + A: a C D E + B: c + | // empty + C: D + D: a + E: a + | // empty + %% + FILE + + + describe '#internal_dependencies' do + + end +end diff --git a/spec/lrama/states_spec.rb b/spec/lrama/states_spec.rb index 953ac54c..c4d4cf0f 100644 --- a/spec/lrama/states_spec.rb +++ b/spec/lrama/states_spec.rb @@ -1787,4 +1787,17 @@ class go to state 5 STR end end + + describe '#compute_ielr' do + it 'recompute states' do + path = "integration/ielr.y" + y = File.read(fixture_path(path)) + grammar = Lrama::Parser.new(y, path).parse + grammar.prepare + grammar.validate! + states = Lrama::States.new(grammar, warning) + states.compute + states.compute_ielr + end + end end