lib/lrama/states.rb

require "forwardable"
require "lrama/report/duration"
require "lrama/states/item"

module Lrama
  # States is passed to a template file
  #
  # "Efficient Computation of LALR(1) Look-Ahead Sets"
  #   https://dl.acm.org/doi/pdf/10.1145/69622.357187
  class States
    extend Forwardable
    include Lrama::Report::Duration

    def_delegators "@grammar", :symbols, :terms, :nterms, :rules,
      :accept_symbol, :eof_symbol, :undef_symbol, :find_symbol_by_s_value!

    attr_reader :states, :reads_relation, :includes_relation, :lookback_relation

    def initialize(grammar, warning, trace_state: false)
      @grammar = grammar
      @warning = warning
      @trace_state = trace_state

      @states = []

      # `DR(p, A) = {t ∈ T | p -(A)-> r -(t)-> }`
      #   where p is state, A is nterm, t is term.
      #
      # `@direct_read_sets` is a hash whose
      # key is [state.id, nterm.token_id],
      # value is bitmap of term.
      @direct_read_sets = {}

      # Reads relation on nonterminal transitions (pair of state and nterm)
      # `(p, A) reads (r, C) iff p -(A)-> r -(C)-> and C =>* ε`
      #   where p, r are state, A, C are nterm.
      #
      # `@reads_relation` is a hash whose
      # key is [state.id, nterm.token_id],
      # value is array of [state.id, nterm.token_id].
      @reads_relation = {}

      # `@read_sets` is a hash whose
      # key is [state.id, nterm.token_id],
      # value is bitmap of term.
      @read_sets = {}

      # `(p, A) includes (p', B) iff B -> βAγ, γ =>* ε, p' -(β)-> p`
      #   where p, p' are state, A, B are nterm, β, γ is sequence of symbol.
      #
      # `@includes_relation` is a hash whose
      # key is [state.id, nterm.token_id],
      # value is array of [state.id, nterm.token_id].
      @includes_relation = {}

      # `(q, A -> ω) lookback (p, A) iff p -(ω)-> q`
      #   where p, q are state, A -> ω is rule, A is nterm, ω is sequence of symbol.
      #
      # `@lookback_relation` is a hash whose
      # key is [state.id, rule.id],
      # value is array of [state.id, nterm.token_id].
      @lookback_relation = {}

      # `@follow_sets` is a hash whose
      # key is [state.id, rule.id],
      # value is bitmap of term.
      @follow_sets = {}

      # `LA(q, A -> ω) = ∪{Follow(p, A) | (q, A -> ω) lookback (p, A)`
      #
      # `@la` is a hash whose
      # key is [state.id, rule.id],
      # value is bitmap of term.
      @la = {}
    end

    def compute
      # Look Ahead Sets
      report_duration(:compute_lr0_states) { compute_lr0_states }
      report_duration(:compute_direct_read_sets) { compute_direct_read_sets }
      report_duration(:compute_reads_relation) { compute_reads_relation }
      report_duration(:compute_read_sets) { compute_read_sets }
      report_duration(:compute_includes_relation) { compute_includes_relation }
      report_duration(:compute_lookback_relation) { compute_lookback_relation }
      report_duration(:compute_follow_sets) { compute_follow_sets }
      report_duration(:compute_look_ahead_sets) { compute_look_ahead_sets }

      # Conflicts
      report_duration(:compute_conflicts) { compute_conflicts }

      report_duration(:compute_default_reduction) { compute_default_reduction }

      check_conflicts
    end

    def compute_ielr
      report_duration(:compute_predecessors) { compute_predecessors }
      report_duration(:split_states) { split_states }
      @states.each {|state| p state, state.transitions, state.item_lookahead_set }
      report_duration(:compute_direct_read_sets) { compute_direct_read_sets }
      report_duration(:compute_reads_relation) { compute_reads_relation }
      report_duration(:compute_read_sets) { compute_read_sets }
      report_duration(:compute_includes_relation) { compute_includes_relation }
      report_duration(:compute_lookback_relation) { compute_lookback_relation }
      report_duration(:compute_follow_sets) { compute_follow_sets }
      report_duration(:compute_look_ahead_sets) { compute_look_ahead_sets }
    end

    def reporter
      StatesReporter.new(self)
    end

    def states_count
      @states.count
    end

    def direct_read_sets
      @direct_read_sets.transform_values do |v|
        bitmap_to_terms(v)
      end
    end

    def read_sets
      @read_sets.transform_values do |v|
        bitmap_to_terms(v)
      end
    end

    def follow_sets
      @follow_sets.transform_values do |v|
        bitmap_to_terms(v)
      end
    end

    def la
      @la.transform_values do |v|
        bitmap_to_terms(v)
      end
    end

    private

    def sr_conflicts
      @states.flat_map(&:sr_conflicts)
    end

    def rr_conflicts
      @states.flat_map(&:rr_conflicts)
    end

    def trace_state
      if @trace_state
        yield STDERR
      end
    end

    def create_state(accessing_symbol, kernels, states_created)
      # A item can appear in some states,
      # so need to use `kernels` (not `kernels.first`) as a key.
      #
      # For example...
      #
      # %%
      # program: '+' strings_1
      #        | '-' strings_2
      #        ;
      #
      # strings_1: string_1
      #          ;
      #
      # strings_2: string_1
      #          | string_2
      #          ;
      #
      # string_1: string
      #         ;
      #
      # string_2: string '+'
      #         ;
      #
      # string: tSTRING
      #       ;
      # %%
      #
      # For these grammar, there are 2 states
      #
      # State A
      #    string_1: string •
      #
      # State B
      #    string_1: string •
      #    string_2: string • '+'
      #
      return [states_created[kernels], false] if states_created[kernels]

      state = State.new(@states.count, accessing_symbol, kernels)
      @states << state
      states_created[kernels] = state

      return [state, true]
    end

    def setup_state(state)
      # closure
      closure = []
      visited = {}
      queued = {}
      items = state.kernels.dup

      items.each do |item|
        queued[item] = true
      end

      while (item = items.shift) do
        visited[item] = true

        if (sym = item.next_sym) && sym.nterm?
          @grammar.find_rules_by_symbol!(sym).each do |rule|
            i = Item.new(rule: rule, position: 0)
            next if queued[i]
            closure << i
            items << i
            queued[i] = true
          end
        end
      end

      state.closure = closure.sort_by {|i| i.rule.id }

      # Trace
      trace_state do |out|
        out << "Closure: input\n"
        state.kernels.each do |item|
          out << "  #{item.display_rest}\n"
        end
        out << "\n\n"
        out << "Closure: output\n"
        state.items.each do |item|
          out << "  #{item.display_rest}\n"
        end
        out << "\n\n"
      end

      # shift & reduce
      state.compute_shifts_reduces
    end

    def enqueue_state(states, state)
      # Trace
      previous = state.kernels.first.previous_sym
      trace_state do |out|
        out << sprintf("state_list_append (state = %d, symbol = %d (%s))",
          @states.count, previous.number, previous.display_name)
      end

      states << state
    end

    def compute_lr0_states
      # State queue
      states = []
      states_created = {}

      state, _ = create_state(symbols.first, [Item.new(rule: @grammar.rules.first, position: 0)], states_created)
      enqueue_state(states, state)

      while (state = states.shift) do
        # Trace
        #
        # Bison 3.8.2 renders "(reached by "end-of-input")" for State 0 but
        # I think it is not correct...
        previous = state.kernels.first.previous_sym
        trace_state do |out|
          out << "Processing state #{state.id} (reached by #{previous.display_name})\n"
        end

        setup_state(state)

        state.shifts.each do |shift|
          new_state, created = create_state(shift.next_sym, shift.next_items, states_created)
          state.set_items_to_state(shift.next_items, new_state)
          enqueue_state(states, new_state) if created
        end
      end
    end

    def nterm_transitions
      a = []

      @states.each do |state|
        state.nterm_transitions.each do |shift, next_state|
          nterm = shift.next_sym
          a << [state, nterm, next_state]
        end
      end

      a
    end

    def compute_direct_read_sets
      @states.each do |state|
        state.nterm_transitions.each do |shift, next_state|
          nterm = shift.next_sym

          ary = next_state.term_transitions.map do |shift, _|
            shift.next_sym.number
          end

          key = [state.id, nterm.token_id]
          @direct_read_sets[key] = Bitmap.from_array(ary)
        end
      end
    end

    def compute_reads_relation
      @states.each do |state|
        state.nterm_transitions.each do |shift, next_state|
          nterm = shift.next_sym
          next_state.nterm_transitions.each do |shift2, _next_state2|
            nterm2 = shift2.next_sym
            if nterm2.nullable
              key = [state.id, nterm.token_id]
              @reads_relation[key] ||= []
              @reads_relation[key] << [next_state.id, nterm2.token_id]
            end
          end
        end
      end
    end

    def compute_read_sets
      sets = nterm_transitions.map do |state, nterm, next_state|
        [state.id, nterm.token_id]
      end

      @read_sets = Digraph.new(sets, @reads_relation, @direct_read_sets).compute
    end

    # Execute transition of state by symbols
    # then return final state.
    def transition(state, symbols)
      symbols.each do |sym|
        state = state.transition(sym)
      end

      state
    end

    def compute_includes_relation
      @states.each do |state|
        state.nterm_transitions.each do |shift, next_state|
          nterm = shift.next_sym
          @grammar.find_rules_by_symbol!(nterm).each do |rule|
            i = rule.rhs.count - 1

            while (i > -1) do
              sym = rule.rhs[i]

              break if sym.term?
              state2 = transition(state, rule.rhs[0...i])
              # p' = state, B = nterm, p = state2, A = sym
              key = [state2.id, sym.token_id]
              # TODO: need to omit if state == state2 ?
              @includes_relation[key] ||= []
              @includes_relation[key] << [state.id, nterm.token_id]
              break if !sym.nullable
              i -= 1
            end
          end
        end
      end
    end

    def compute_lookback_relation
      @states.each do |state|
        state.nterm_transitions.each do |shift, next_state|
          nterm = shift.next_sym
          @grammar.find_rules_by_symbol!(nterm).each do |rule|
            state2 = transition(state, rule.rhs)
            # p = state, A = nterm, q = state2, A -> ω = rule
            key = [state2.id, rule.id]
            @lookback_relation[key] ||= []
            @lookback_relation[key] << [state.id, nterm.token_id]
          end
        end
      end
    end

    def compute_follow_sets
      sets = nterm_transitions.map do |state, nterm, next_state|
        [state.id, nterm.token_id]
      end

      @follow_sets = Digraph.new(sets, @includes_relation, @read_sets).compute
    end

    def compute_look_ahead_sets
      @states.each do |state|
        rules.each do |rule|
          ary = @lookback_relation[[state.id, rule.id]]
          next if !ary

          ary.each do |state2_id, nterm_token_id|
            # q = state, A -> ω = rule, p = state2, A = nterm
            follows = @follow_sets[[state2_id, nterm_token_id]]

            next if follows == 0

            key = [state.id, rule.id]
            @la[key] ||= 0
            look_ahead = @la[key] | follows
            @la[key] |= look_ahead

            # No risk of conflict when
            # * the state only has single reduce
            # * the state only has nterm_transitions (GOTO)
            next if state.reduces.count == 1 && state.term_transitions.count == 0

            state.set_look_ahead(rule, bitmap_to_terms(look_ahead))
          end
        end
      end
    end

    def bitmap_to_terms(bit)
      ary = Bitmap.to_array(bit)
      ary.map do |i|
        @grammar.find_symbol_by_number!(i)
      end
    end

    def compute_conflicts
      compute_shift_reduce_conflicts
      compute_reduce_reduce_conflicts
    end

    def compute_shift_reduce_conflicts
      states.each do |state|
        state.shifts.each do |shift|
          state.reduces.each do |reduce|
            sym = shift.next_sym

            next unless reduce.look_ahead
            next if !reduce.look_ahead.include?(sym)

            # Shift/Reduce conflict
            shift_prec = sym.precedence
            reduce_prec = reduce.item.rule.precedence

            # Can resolve only when both have prec
            unless shift_prec && reduce_prec
              state.conflicts << State::ShiftReduceConflict.new(symbols: [sym], shift: shift, reduce: reduce)
              next
            end

            case
            when shift_prec < reduce_prec
              # Reduce is selected
              state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :reduce)
              shift.not_selected = true
              next
            when shift_prec > reduce_prec
              # Shift is selected
              state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :shift)
              reduce.add_not_selected_symbol(sym)
              next
            end

            # shift_prec == reduce_prec, then check associativity
            case sym.precedence.type
            when :precedence
              # %precedence only specifies precedence and not specify associativity
              # then a conflict is unresolved if precedence is same.
              state.conflicts << State::ShiftReduceConflict.new(symbols: [sym], shift: shift, reduce: reduce)
              next
            when :right
              # Shift is selected
              state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :shift, same_prec: true)
              reduce.add_not_selected_symbol(sym)
              next
            when :left
              # Reduce is selected
              state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :reduce, same_prec: true)
              shift.not_selected = true
              next
            when :nonassoc
              # Can not resolve
              #
              # nonassoc creates "run-time" error, precedence creates "compile-time" error.
              # Then omit both the shift and reduce.
              #
              # https://www.gnu.org/software/bison/manual/html_node/Using-Precedence.html
              state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :error)
              shift.not_selected = true
              reduce.add_not_selected_symbol(sym)
            else
              raise "Unknown precedence type. #{sym}"
            end
          end
        end
      end
    end

    def compute_reduce_reduce_conflicts
      states.each do |state|
        count = state.reduces.count

        for i in 0...count do
          reduce1 = state.reduces[i]
          next if reduce1.look_ahead.nil?

          for j in (i+1)...count do
            reduce2 = state.reduces[j]
            next if reduce2.look_ahead.nil?

            intersection = reduce1.look_ahead & reduce2.look_ahead

            if !intersection.empty?
              state.conflicts << State::ReduceReduceConflict.new(symbols: intersection, reduce1: reduce1, reduce2: reduce2)
            end
          end
        end
      end
    end

    def compute_default_reduction
      states.each do |state|
        next if state.reduces.empty?
        # Do not set, if conflict exist
        next if !state.conflicts.empty?
        # Do not set, if shift with `error` exists.
        next if state.shifts.map(&:next_sym).include?(@grammar.error_symbol)

        state.default_reduction_rule = state.reduces.map do |r|
          [r.rule, r.rule.id, (r.look_ahead || []).count]
        end.min_by do |rule, rule_id, count|
          [-count, rule_id]
        end.first
      end
    end

    def check_conflicts
      sr_count = sr_conflicts.count
      rr_count = rr_conflicts.count

      if @grammar.expect

        expected_sr_conflicts = @grammar.expect
        expected_rr_conflicts = 0

        if expected_sr_conflicts != sr_count
          @warning.error("shift/reduce conflicts: #{sr_count} found, #{expected_sr_conflicts} expected")
        end

        if expected_rr_conflicts != rr_count
          @warning.error("reduce/reduce conflicts: #{rr_count} found, #{expected_rr_conflicts} expected")
        end
      else
        if sr_count != 0
          @warning.warn("shift/reduce conflicts: #{sr_count} found")
        end

        if rr_count != 0
          @warning.warn("reduce/reduce conflicts: #{rr_count} found")
        end
      end
    end

    def compute_predecessors
      queue = [@states.first]
      until queue.empty?
        state = queue.shift
        state.transitions.each do |_, next_state|
          next_state.append_predecessor(state)
          queue << next_state
        end
      end
    end

    def split_states
      @ielr_isocores = Hash.new {|hash, key| hash[key] = [key] }
      @lookaheads_recomputed = Hash.new {|hash, key| hash[key] = false }
      transition_queue = []
      @states.first.transitions.each do |shift, next_state|
        transition_queue << [@states.first, shift, next_state]
      end
      until transition_queue.empty?
        state, shift, next_state = transition_queue.shift
        compute_state(state, shift, next_state)
        next_state.transitions.each do |sh, next_st|
          transition_queue << [next_state, sh, next_st]
        end
      end
    end

    def merge_lookaheads(state, k)
      return if state.kernels.all? {|item| (k[item] - state.item_lookahead_set[item]).empty? }

      state.transitions.each do |shift, next_state|
        next if @lookaheads_recomputed[next_state]
        compute_state(state, shift, next_state)
      end
    end

    def compute_state(state, shift, next_state)
      k = propagate_lookaheads(state, next_state)
      s = @ielr_isocores[next_state].find {|st| is_compatible(st, k) }

      if s.nil?
        s = @ielr_isocores[next_state].last
        new_state = State.new(@states.count, s.accessing_symbol, s.kernels)
        new_state.closure = s.closure
        new_state.compute_shifts_reduces
        s.transitions.each do |sh, next_state|
          new_state.set_items_to_state(sh.next_items, next_state)
        end
        @states << new_state
        new_state.lalr_isocore = s
        @ielr_isocores[s] << new_state
        @ielr_isocores[s].each do |st|
          @ielr_isocores[st] = @ielr_isocores[s]
        end
        @lookaheads_recomputed[new_state] = true
        new_state.item_lookahead_set = k
        state.update_transition(shift, new_state)
      elsif(!@lookaheads_recomputed[s])
        s.item_lookahead_set = k
        @lookaheads_recomputed[s] = true
      else
        state.update_transition(shift, s)
        merge_lookaheads(s, k)
      end
    end

    def propagate_lookaheads(state, next_state)
      next_state.kernels.to_h {|item|
        lookahead_sets =
          if item.position == 1
            compute_goto_follow_set(state.lalr_isocore, item.lhs)
          else
            kernel = state.kernels.find {|k| k.rule == item.rule && k.position == item.position - 1 }
            state.item_lookahead_set[kernel]
          end

        # p [state, lookahead_sets, lookahead_set_filters(next_state)[item]]

        [item, lookahead_sets & lookahead_set_filters(next_state)[item]]
      }
    end

    def lookahead_set_filters(state)
      p state
      state.kernels.to_h {|kernel|
        # p [state, kernel, annotation_list(@lalr_isocores[state])]
        [kernel,
         annotation_list(state.lalr_isocore).filter_map {|token, actions|
          token if token.term? && actions.any? {|item, _| item == kernel }
        }]
      }
    end

    def is_compatible(state, k)
      !@lookaheads_recomputed[state] ||
        annotation_list(state.lalr_isocores).all? {|token, actions|
          a = dominant_contribution(state, token, actions, state.item_lookahead_set)
          b = dominant_contribution(state, token, actions, k)
          a.empty? || b.empty? || a == b
        }
    end

    def dominant_contribution(state, token, actions, lookaheads)
      actions.filter_map {|action, items|
        action if items.empty? || items.any? {|item, bool| bool && lookaheads[item].include?(token) }
      }.reject {|action|
        if action.is_a?(State::Shift)
          action.not_selected
        elsif action.is_a?(State::Reduce)
          action.not_selected_symbols.include?(token)
        end
      }
    end

    def compute_goto_follow_set(state, nterm_token)
      shift, next_state = state.nterm_transitions.find {|shift, _| shift.next_sym == nterm_token }
      return [] if shift.nil? && nterm_token.id.s_value == '$accept'
      state.always_follows(shift, next_state).union(state.kernels.select {|item|
        state.follow_kernel_items(shift, next_state, item)
      }.reduce([]) {|result, item|
        result.union(state.item_lookahead_set[item])
      })
    end

    def annotation_list(state)
      manifestations = state.annotate_manifestation
      predecessors = state.transitions.map {|_, next_state| state.annotate_predecessor(annotation_list(next_state)) }
      p state, state.inadequacy_list, manifestations, predecessors
      manifestations + predecessors
    end
  end
end