In [1]:
using DataStructures: PriorityQueue

In [2]:
using LogProbs

In [31]:
using StatsFuns.RFunctions: gammarand
using StatsFuns: lbeta

In [4]:
import Base: +, -, *, /, zero, one, <
import Base: length, insert!, isempty, convert, getindex, promote_rule, range
import DataStructures: enqueue!, dequeue!

# Grammar (eg. PCFG)

In [28]:
################################
### Conditional Distribution ###
################################

struct SimpleCond{C, D, S} # context, distribution, support
    dists   :: Dict{C, D}
    support :: S
    SimpleCond(dists::Dict{C, D}, support::S) where {C, D, S} =
        new{C, D, S}(dists, unique(support))
end

function SimpleCond(dists::AbstractDict)
    SimpleCond(
        dists,
        vcat([collect(support(dist)) for dist in values(dists)]...)
    )
end

sample(sc::SimpleCond, context, args...) = sample(sc.dists[context], args...)
logscore(sc::SimpleCond, obs, context) = logscore(sc.dists[context], obs)
rm_obs!(sc::SimpleCond, obs, context) = rm_obs!(sc.dists[context], obs)

score_type(::SimpleCond) = LogProb

function add_obs!(cond::SimpleCond{C,D,S}, obs, context) where {C,D,S}
    if !haskey(cond.dists, context)
        cond.dists[context] = D(cond.support)
    end
    add_obs!(cond.dists[context], obs)
end

#############################
### Dirichlet Multinomial ###
#############################

abstract type Distribution{T} end

mutable struct DirCat{T, C} <: Distribution{T}
    counts :: Dict{T, C}
end

DirCat(support, priors) = DirCat(Dict(x => p for (x,p) in zip(support, priors)))
support(dc::DirCat) = keys(dc.counts)

function sample(dc::DirCat)
    weights = [gammarand(c, 1) for c in values(dc.counts)]
    categorical_sample(keys(dc.counts), weights)
end

function logscore(dc::DirCat, obs)
    LogProb(lbeta(sum(values(dc.counts)), 1) - lbeta(dc.counts[obs], 1))
end

function add_obs!(dc::DirCat, obs)
    dc.counts[obs] += 1
end

function rm_obs!(dc::DirCat, obs)
    dc.counts[obs] -= 1
end

##############
### CFRule ###
##############

mutable struct RunningCounter
    n :: Int
end

RunningCounter() = RunningCounter(0)
count!(c::RunningCounter) = c.n += 1

rule_counter = RunningCounter()

struct CFRule{LHS, RHS} # left hand side and right hand side of the rule
    mappings ::Dict{LHS, Vector{RHS}}
    name :: Symbol
end

==(r1::CFRule, r2::CFRule) = r1.name == r2.name
hash(r::CFRule, h::UInt) = hash(hash(CFRule, hash(r.name)), h)

Base.show(io::IO, r::CFRule) = print(io, "CFRule($(r.name))")

CFRule(pairs::Pair...) =
    CFRule(Dict(pairs...), Symbol("rule", count!(rule_counter)))
CFRule(g::Base.Generator) =
    CFRule(Dict(g), Symbol("rule", count!(rule_counter)))
CFRule(f::Function, lhss, name) =
    CFRule(Dict(lhs => f(lhs) for lhs in lhss), name)
CFRule(f::Function, lhss) =
    CFRule(Dict(lhs => f(lhs) for lhs in lhss), Symbol("rule", count!(rule_counter)))

lhss(r::CFRule) = keys(r.mappings) # aka domain
isapplicable(r::CFRule, lhs) = haskey(r.mappings, lhs)
(r::CFRule)(lhs) = r.mappings[lhs]

###############
### CFState ###
###############

mutable struct CompletionAutomaton{Cat,Comp} # category, completion
    transitions :: Vector{Dict{Cat, Int}}
    completions :: Vector{Vector{Comp}}
end

CompletionAutomaton(Cat::Type, Comp::Type) =
    CompletionAutomaton([Dict{Cat, Int}()], [Vector{Comp}()])

number_of_states(ca::CompletionAutomaton) = length(ca.transitions)
isfinal(ca::CompletionAutomaton, s) = isempty(ca.transitions[s])
is_possible_transition(ca::CompletionAutomaton, s, c) = haskey(ca.transitions[s], c)
transition(ca::CompletionAutomaton, s, c) = ca.transitions[s][c]
completions(ca::CompletionAutomaton, s) = ca.completions[s]

function add_completion!(ca::CompletionAutomaton{Cat,Comp}, comp, categories) where {Cat,Comp}
    s = 1
    for c in categories
        if is_possible_transition(ca, s, c)
            s = transition(ca, s, c)
        else
            push!(ca.transitions, Dict{Cat,Int}())
            push!(ca.completions, Vector{Comp}())
            s = ca.transitions[s][c] = number_of_states(ca)
        end
    end
    push!(ca.completions[s], comp)
end

function add_rule!(ca::CompletionAutomaton, r::CFRule)
    for lhs in lhss(r)
        add_completion!(ca, (lhs, r), r(lhs))
    end
end


add_rule! (generic function with 1 method)

In [6]:
#################
### CFGrammar ###
#################

struct CFGrammar{C, T, Cond, F}
    comp_automtn  :: CompletionAutomaton{C, Tuple{C, CFRule{C, C}}}
    startsymbols  :: Vector{C}
    terminal_dict :: Dict{T, Vector{Tuple{C, CFRule{C, T}}}}
    cond          :: Cond # conditional scoring
    dependent_components::F
end

function CFGrammar(
        category_rules::Vector{CFRule{C, C}},
        terminal_rules::Vector{CFRule{C, T}},
        startsymbols  ::Vector{C},
        dependent_components=identity::Function
    ) where {C, T}
    comp_automtn = CompletionAutomaton(C, Tuple{C, CFRule{C, C}})
    for r in category_rules
        add_rule!(comp_automtn, r)
    end

    terminal_dict = Dict{T, Vector{Tuple{C, CFRule{C, T}}}}()
    for r in terminal_rules
        for lhs in lhss(r)
        t = r(lhs)[1]
            if haskey(terminal_dict, t)
                push!(terminal_dict[t], (lhs, r))
            else
                terminal_dict[t] = [(lhs, r)]
            end
        end
    end

    applicable_rules = Dict{C, Vector{CFRule}}()
    for r in CFRule[category_rules; terminal_rules]
        for c in lhss(r)
            if haskey(applicable_rules, c)
                push!(applicable_rules[c], r)
            else
                applicable_rules[c] = CFRule[r]
            end
        end
    end

    cond = SimpleCond(
        Dict(
            dependent_components(c) => let rules = applicable_rules[c]
                n = length(rules)
                k = count(isa.(rules, CFRule{C, T})) # number terminal rules
                DirCat(rules, [fill(1.0, n-k); fill(1/k, k)])
            end
            for c in keys(applicable_rules)
        )
    )

    CFGrammar(comp_automtn, startsymbols, terminal_dict, cond, dependent_components)
end

dependent_components(g::CFGrammar, c) = g.dependent_components(c)

startstate(g::CFGrammar) = 1
startsymbols(g::CFGrammar) = g.startsymbols

isfinal(g::CFGrammar, s) = isfinal(g.comp_automtn, s)
is_possible_transition(g::CFGrammar, s, c) = is_possible_transition(g.comp_automtn, s, c)
transition(g::CFGrammar, s, c) = transition(g.comp_automtn, s, c)

completions(g::CFGrammar, s::Int) =
    ((c, r, score(g, c, r)) for (c, r) in completions(g.comp_automtn, s))
completions(g::CFGrammar, t) =
    ((c, r, score(g, c, r)) for (c, r) in g.terminal_dict[t])

score(g::CFGrammar, c, r) = logscore(g.cond, r, dependent_components(g, c))

@inline function types(grammar::CFGrammar{C, T, Cond}) where {C, T, Cond}
    C, T, CFRule{C, C}, CFRule{C, T}, Int, LogProb
end

types (generic function with 1 method)

## Interface types

### Completion

In [7]:
###################
### Completions ###
###################

struct TerminalCompletion{T,TR,S}
    terminal :: T
    rule     :: TR
    score    :: S
end
terminal(comp::TerminalCompletion) = comp.terminal
rule(comp::TerminalCompletion) = comp.rule
score(comp::TerminalCompletion) = comp.score

struct EdgeCompletion{E,CR,S}
    edge   :: E
    rule   :: CR
    score  :: S
    inloop :: Bool
end
edge(comp::EdgeCompletion) = comp.edge
rule(comp::EdgeCompletion) = comp.rule
score(comp::EdgeCompletion) = comp.score
inloop(comp::EdgeCompletion) = comp.inloop
EdgeCompletion(edge, rule, score) = EdgeCompletion(edge, rule, score, false)
@inline function ==(c1::EdgeCompletion, c2::EdgeCompletion)
    c1.edge == c2.edge && c1.rule == c2.rule
end

== (generic function with 2 methods)

### Traversal

In [8]:
#################
### Traversal ###
#################

struct Traversal{E,CO,S}
    edge   :: Union{E,Nothing}
    cont   :: CO
    score  :: S
    inloop :: Bool
end
Traversal(edge, cont, score) = Traversal(edge, cont, score, false)
Traversal(edge, cont) = Traversal(edge, cont, score(edge) * score(cont), false)
hasedge(trav::Traversal) = !isnothing(trav.edge)
edge(trav::Traversal) = get(trav.edge)
cont(trav::Traversal) = trav.cont
score(trav::Traversal) = trav.score
inloop(trav::Traversal) = trav.inloop

@inline function ==(t1::Traversal, t2::Traversal)
    if hasedge(t1)
        if hasedge(t2)
            get(t1.edge) == get(t2.edge) && t1.cont == t2.cont
        else
            false
        end
    else
        if hasedge(t2)
            false
        else
            t1.cont == t2.cont
        end
    end
end

@inline function Traversal(cont)
    Traversal(nothing, cont, score(cont), false)
end

Traversal

# Parser types

### Range

In [9]:
##############
### ModInt ###
##############

struct ModInt{n} <: Number
  val::Int
  ModInt{n}(val) where {n} = new(mod(val,n))
end

show(io::IO, a::ModInt{n}) where n = print(io, "$(a.val) mod $n")

+(a::ModInt{n}, b::ModInt{n}) where n = ModInt{n}(a.val + b.val)
-(a::ModInt{n}) where n = - a.val
-(a::ModInt{n}, b::ModInt{n}) where n = ModInt{n}(a.val - b.val)
*(a::ModInt{n}, b::ModInt{n}) where n = ModInt{n}(a.val * b.val)
/(a::ModInt{n}, b::ModInt{n}) where n = a * invmod(b, n)

<(a::ModInt{n}, b::ModInt{n}) where n = a.val < b.val

one(a::ModInt{n}) where n = ModInt{n}(1)
zero(a::ModInt{n}) where n = ModInt{n}(0)

convert(::Type{ModInt{n}}, x::Int) where n = ModInt{n}(x)
convert(::Type{Int}, x::ModInt) = x.val

getindex(t::Union{Tuple, Array}, i::ModInt) = getindex(t, i.val + 1)

promote_rule(::Type{ModInt{n}}, ::Type{Int}) where n = ModInt{n}


#############
### Range ###
#############

abstract type ItemRange end

ItemRange(s::Int, e::Int, n::Int, cyclic::Bool) =
    cyclic ? CyclicRange(s, e, n) : IntervalRange(s, e)

start(r::ItemRange) = r.start
_end(r::ItemRange)  = r._end

struct IntervalRange <: ItemRange
    start :: Int
    _end  :: Int
end

length(r::IntervalRange) = _end(r) - start(r)
concatenable(r1::IntervalRange, r2::IntervalRange) = _end(r1) == start(r2)

function *(r1::IntervalRange, r2::IntervalRange)
    @assert concatenable(r1, r2)
    IntervalRange(start(r1), _end(r2))
end

struct CyclicRange{n} <: ItemRange
    start  :: ModInt{n}
    _end   :: ModInt{n}
    length :: Int
end
CyclicRange(s::ModInt, e::ModInt) = CyclicRange(s, e, Int(e-s))
CyclicRange(s::Int, e::Int, n::Int) = CyclicRange(ModInt{n}(s), ModInt{n}(e))

length(r::CyclicRange) = r.length

@inline function concatenable(r1::CyclicRange{n}, r2::CyclicRange{n}) where n
    _end(r1) == start(r2) && length(r1) + length(r2) <= n
end

function *(r1::CyclicRange, r2::CyclicRange)
    @assert concatenable(r1, r2)
    CyclicRange(start(r1), _end(r2), length(r1) + length(r2))
end

* (generic function with 358 methods)

## ItemKey

In [10]:
###############
### ItemKey ###
###############

abstract type ItemKey{R} end

range(k::ItemKey)  =          k.range
start(k::ItemKey)  =  start(range(k))
_end(k::ItemKey)   =   _end(range(k))
length(k::ItemKey) = length(range(k))

length (generic function with 126 methods)

### Partial Item Key (Edge Key)

In [11]:
struct EdgeKey{R,ST} <: ItemKey{R}
    range :: R
    state :: ST
end
state(k::EdgeKey) = k.state

state (generic function with 1 method)

### Complete Item Key (Constituent Key)

In [12]:
struct ConstituentKey{R,C} <: ItemKey{R}
    range    :: R
    category :: C
end
category(k::ConstituentKey) = k.category

category (generic function with 1 method)

## Item

In [13]:
############
### Item ###
############

abstract type Item end

Item(key, trav::Traversal) = Edge(key, trav)
Item(key, comp::EdgeCompletion) = Constituent(key, comp)

key(item::Item) = item.key
range(item::Item) = range(key(item))
start(item::Item) = start(range(item))
_end(item::Item) = _end(range(item))
length(item::Item) = length(range(item))
isfinished(item::Item) = !(isnothing(item.score))
lastpopscore(item::Item) = item.lastpopscore
insidepopnumber(item::Item) = item.insidepopnumber

insidepopnumber (generic function with 1 method)

### Partial Item (Edge)

In [14]:
############
### Edge ###
############

mutable struct Edge{R,ST,S,CO} <: Item
    key             :: EdgeKey{R,ST}
    score           :: Union{S,Nothing}
    lastpopscore    :: S
    insidepopnumber :: Int
    traversals      :: Vector{Traversal{Edge{R,ST,S,CO},CO,S}}
end

@inline function Edge(key, trav::Traversal{E,CO,S}) where {E,CO,S}
    Edge(key, nothing, zero(S), 0, [trav])
end

state(edge::Edge) = state(key(edge))
traversals(edge::Edge) = edge.traversals

@inline function score(edge::Edge)
    if isfinished(edge)
        get(edge.score)
    else
        sum(score(trav) for trav in edge.traversals)
    end
end

function add!(edge::Edge, trav)
    i = findfirst(t->t==trav, edge.traversals)
    if i != 0
        edge.traversals[i] = trav
    else
        push!(edge.traversals, trav)
    end
    nothing
end


add! (generic function with 1 method)

### Complete Item (Constituent)

In [15]:

###################
### Constituent ###
###################

mutable struct Constituent{R,C,T,CR,TR,ST,S} <: Item
    key                 :: ConstituentKey{R,C}
    score               :: Union{S,Nothing}
    lastpopscore        :: S
    insidepopnumber     :: Int
    terminal_completion :: Union{TerminalCompletion{T,TR,S}, Nothing}
    completions         :: Vector{EdgeCompletion{Edge{R,ST,S,Constituent{R,C,T,CR,TR,ST,S}},CR,S}}
end

@inline function Constituent(
        key::ConstituentKey{R,C}, comp::TerminalCompletion, grammar
    ) where {R,C}
    C_, T, CR, TR, ST, S = types(grammar)
    Constituent(
        key, nothing, zero(S), 0, comp,
        Vector{EdgeCompletion{Edge{R,ST,S,Constituent{R,C,T,CR,TR,ST,S}},CR,S}}()
    )
end

@inline function Constituent(
        key,
        comp :: EdgeCompletion{Edge{R,ST,S,Constituent{R,C,T,CR,TR,ST,S}},CR,S}
    ) where {R,C,T,CR,TR,ST,S}
    Constituent(
        key, nothing, zero(S), 0,
        nothing,
        [comp]
    )
end

category(cont::Constituent) = category(key(cont))
completions(cont::Constituent) = cont.completions

hasterminal(cont::Constituent) = !(isnothing(cont.terminal_completion))
terminal_completion(cont::Constituent) = get(cont.terminal_completion)
terminal(cont::Constituent) = get(cont.terminal_completion).terminal

@inline function score(cont::Constituent)
    if isfinished(cont)
        get(cont.score)
    else
        if hasterminal(cont)
            if isempty(completions(cont))
                score(terminal_completion(cont))
            else
                +(
                    score(terminal_completion(cont)),
                    sum(score(comp) for comp in completions(cont))
                )
            end
        else
            sum(score(comp) for comp in completions(cont))
        end
    end
end

function add!(cont::Constituent, comp)
    i = findfirst(c->c==comp, cont.completions)
    if i != 0
        cont.completions[i] = comp
    else
        push!(cont.completions, comp)
    end
    nothing
end

#@inline function Traversal(cont::Constituent)
#    Traversal(nothing, cont, score(cont), false)
#end

add! (generic function with 2 methods)

## Logbook

In [16]:
#####################
### ParserLogbook ###
#####################

struct ParserLogbook{R,C,T,CR,TR,ST,S}
    edges :: Dict{EdgeKey{R,ST}, Edge{R,ST,S,Constituent{R,C,T,CR,TR,ST,S}}}
    conts :: Dict{ConstituentKey{R,C}, Constituent{R,C,T,CR,TR,ST,S}}
end

@inline function ParserLogbook(grammar, n::Int, cyclic::Bool)
    R = cyclic ? CyclicRange{n} : IntervalRange
    C,T,CR,TR,ST,S = types(grammar)
    ParserLogbook(
        Dict{EdgeKey{R,ST}, Edge{R,ST,S,Constituent{R,C,T,CR,TR,ST,S}}}(),
        Dict{ConstituentKey{R,C}, Constituent{R,C,T,CR,TR,ST,S}}()
    )
end
discover!(logbook::ParserLogbook, edge::Edge) =
    logbook.edges[key(edge)] = edge
discover!(logbook::ParserLogbook, cont::Constituent) =
    logbook.conts[key(cont)] = cont
isdiscovered(logbook, key::EdgeKey) = haskey(logbook.edges, key)
isdiscovered(logbook, key::ConstituentKey) = haskey(logbook.conts, key)
getitem(logbook, key::EdgeKey) = logbook.edges[key]
getitem(logbook, key::ConstituentKey) = logbook.conts[key]



getitem (generic function with 2 methods)

## Chart

In [17]:
##################
### ParseChart ###
##################

struct ChartCell{R,C,T,CR,TR,ST,S}
    edges :: Dict{ST, Vector{Edge{R,ST,S,Constituent{R,C,T,CR,TR,ST,S}}}}
    conts :: Dict{C, Vector{Constituent{R,C,T,CR,TR,ST,S}}}
end
@inline function ChartCell(grammar, n::Int, cyclic::Bool)
    R = cyclic ? CyclicRange{n} : IntervalRange
    C,T,CR,TR,ST,S = types(grammar)
    ChartCell(
        Dict{ST, Vector{Edge{R,ST,S,Constituent{R,C,T,CR,TR,ST,S}}}}(),
        Dict{C, Vector{Constituent{R,C,T,CR,TR,ST,S}}}()
    )
end

struct ParseChart{R,C,T,CR,TR,ST,S}
    cells :: Vector{ChartCell{R,C,T,CR,TR,ST,S}}
end
# vector indices begin with 1
# item   indices begin with 0

edges(chart::ParseChart, edge::Edge)        = chart.cells[ _end(edge)+1].edges
edges(chart::ParseChart, cont::Constituent) = chart.cells[start(cont)+1].edges
conts(chart::ParseChart, edge::Edge)        = chart.cells[ _end(edge)+1].conts
conts(chart::ParseChart, cont::Constituent) = chart.cells[start(cont)+1].conts

@inline function push_or_init!(d::Dict, k, v)
    if haskey(d, k)
        push!(d[k], v)
    else
        d[k] = [v]
    end
end
insert!(chart::ParseChart, edge::Edge) =
    push_or_init!(edges(chart, edge), state(edge), edge)
insert!(chart::ParseChart, cont::Constituent) =
    push_or_init!(conts(chart, cont), category(cont), cont)


insert! (generic function with 9 methods)

## Agenda

In [18]:
####################
### InsideAgenda ###
####################

struct InsideAgenda{R,C,T,CR,TR,ST,S}
    edge_queue :: PriorityQueue{Edge{R,ST,S,Constituent{R,C,T,CR,TR,ST,S}}, Int, Base.Order.ForwardOrdering}
    cont_queue :: PriorityQueue{Constituent{R,C,T,CR,TR,ST,S}, Int, Base.Order.ForwardOrdering}
end
function InsideAgenda(grammar, n::Int, cyclic)
    R = cyclic ? CyclicRange{n} : IntervalRange
    C,T,CR,TR,ST,S = types(grammar)
    InsideAgenda(
        PriorityQueue{Edge{R,ST,S,Constituent{R,C,T,CR,TR,ST,S}}, Int}(),
        PriorityQueue{Constituent{R,C,T,CR,TR,ST,S}, Int}()
    )
end
@inline function enqueue!(agenda::InsideAgenda, edge::Edge, just_used)
    agenda.edge_queue[edge] = priority(edge, just_used)
end
@inline function enqueue!(agenda::InsideAgenda, cont::Constituent, just_used)
    agenda.cont_queue[cont] = priority(cont, just_used)
end
@inline function next_is_edge(agenda::InsideAgenda)
    isempty(agenda.cont_queue) || !isempty(agenda.edge_queue) && peek(agenda.edge_queue)[2] < peek(agenda.cont_queue)[2]
end
dequeue_edge!(agenda::InsideAgenda)    = dequeue!(agenda.edge_queue)
dequeue_cont!(agenda::InsideAgenda)    = dequeue!(agenda.cont_queue)
isempty(agenda::InsideAgenda)          = isempty(agenda.edge_queue) && isempty(agenda.cont_queue)
priority(edge::Edge, just_used)        = 4 * length(edge) - 2*!(just_used) - 1
priority(cont::Constituent, just_used) = 4 * length(cont) - 2*!(just_used)



priority (generic function with 2 methods)

# Chart Parser

In [19]:
######################
### Parser Methods ###
######################

function create_or_update!(key, trav_or_comp, agenda, logbook)
    if isdiscovered(logbook, key)
        item = getitem(logbook, key)
        add!(item, trav_or_comp)
    else
        item = Item(key, trav_or_comp)
        discover!(logbook, item)
    end
    enqueue!(agenda, item, false)
    nothing
end

@noinline function initialize(terminals, grammar, epsilon, cyclic)
    n       = length(terminals)
    chart   = ParseChart([ChartCell(grammar, n, cyclic) for i in 0:(cyclic ? n-1 : n)])
    agenda  = InsideAgenda(grammar, n, cyclic)
    logbook = ParserLogbook(grammar, n, cyclic)

    for (i, terminal) in enumerate(terminals)
        for (category, rule, score) in completions(grammar, terminal)
            cont = Constituent(
                ConstituentKey(ItemRange(i-1, i, n, cyclic), category),
                TerminalCompletion(terminal, rule, score),
                grammar
            )
            discover!(logbook, cont)
            enqueue!(agenda, cont, false)
        end
        if !ismissing(epsilon)
            for (category, rule, score) in completions(grammar, epsilon)
                cont = Constituent(
                    ConstituentKey(ItemRange(i-1, i-1, n, cyclic), category),
                    TerminalCompletion(epsilon, rule, score),
                    grammar
                )
                discover!(logbook, cont)
                enqueue!(agenda, cont, false)
            end
        end
    end
    if !ismissing(epsilon) && !cyclic
        for (category, rule, score) in completions(grammar, epsilon)
            cont = Constituent(
                ConstituentKey(ItemRange(n, n, n, cyclic), category),
                TerminalCompletion(epsilon, rule, score),
                grammar
            )
            discover!(logbook, cont)
            enqueue!(agenda, cont, false)
        end
    end
    chart, agenda, logbook
end


initialize (generic function with 1 method)

In [20]:
@inline function do_fundamental_rule!(
        edge::Edge, chart, agenda, logbook, grammar, cyclic
    )
    for category in keys(conts(chart, edge))
        if is_possible_transition(grammar, state(edge), category)
            for cont in conts(chart, edge)[category]
                if !cyclic || concatenable(range(edge), range(cont))
                    trav      = Traversal(edge, cont)
                    new_state = transition(grammar, state(edge), category)
                    key       = EdgeKey(range(edge) * range(cont), new_state)
                    create_or_update!(key, trav, agenda, logbook)
                end
            end
        end
    end
    nothing
end

@inline function do_fundamental_rule!(
        cont::Constituent, chart, agenda, logbook, grammar, cyclic
    )
    for state in keys(edges(chart, cont))
        if is_possible_transition(grammar, state, category(cont))
            for edge in edges(chart, cont)[state]
                if !cyclic || concatenable(range(edge), range(cont))
                    trav      = Traversal(edge, cont)
                    new_state = transition(grammar, state, category(cont))
                    key       = EdgeKey(range(edge) * range(cont), new_state)
                    create_or_update!(key, trav, agenda, logbook)
                end
            end
        end
    end
    nothing
end

do_fundamental_rule! (generic function with 2 methods)

In [21]:
@inline function introduce_edge!(cont, agenda, logbook, grammar)
    if is_possible_transition(grammar, startstate(grammar), category(cont))
        state = transition(grammar, startstate(grammar), category(cont))
        key   = EdgeKey(range(cont), state)
        create_or_update!(key, Traversal(cont), agenda, logbook)
    end
    nothing
end


introduce_edge! (generic function with 1 method)

In [22]:
@noinline function complete_edge!(edge, agenda, logbook::ParserLogbook, grammar)
    for (category::C, rule::CR, s::S) in completions(grammar, state(edge))
        key  = ConstituentKey(range(edge), category)
        comp = EdgeCompletion(edge, rule, score(edge) * s)
        create_or_update!(key, comp, agenda, logbook)
    end
    nothing
end


complete_edge! (generic function with 1 method)

In [23]:
@noinline function process_edge!(
        edge, chart, agenda, logbook, grammar, max_pop_num, cyclic
    )
    s = score(edge)
    edge.insidepopnumber += 1
    if s ≈ lastpopscore(edge) || insidepopnumber(edge) == max_pop_num
        if !isfinal(grammar, state(edge))
            insert!(chart, edge)
        end
        edge.score = Nullable(s) # finish the edge
        do_fundamental_rule!(edge, chart, agenda, logbook, grammar, cyclic)
    else
        complete_edge!(edge, agenda, logbook, grammar)
        edge.lastpopscore = s
        enqueue!(agenda, edge, true)
    end
    nothing
end

process_edge! (generic function with 1 method)

In [24]:
@noinline function process_cont!(
        cont, chart, agenda, logbook, grammar, max_pop_num, cyclic
    )
    s = score(cont)
    cont.insidepopnumber += 1
    if s ≈ lastpopscore(cont) || insidepopnumber(cont) == max_pop_num
        insert!(chart, cont)
        cont.score = Nullable(s) # finish the constituent
        do_fundamental_rule!(cont, chart, agenda, logbook, grammar, cyclic)
    else
        introduce_edge!(cont, agenda, logbook, grammar)
        cont.lastpopscore = s
        enqueue!(agenda, cont, true)
    end
    nothing
end

process_cont! (generic function with 1 method)

In [25]:
@noinline function loop!(chart, agenda, args...)
    while !isempty(agenda)
        if next_is_edge(agenda)
            process_edge!(dequeue_edge!(agenda), chart, agenda, args...)
        else
            process_cont!(dequeue_cont!(agenda), chart, agenda, args...)
        end
    end
end

loop! (generic function with 1 method)

# RUNNING Parser

In [26]:
@noinline function run_chartparser(terminals, grammar; epsilon=missing, max_pop_num=4, cyclic=false)
    C, T, CR, TR, ST, S = types(grammar)
    chart, agenda, logbook = initialize(
        T.(terminals), grammar, epsilon, cyclic)
    loop!(chart, agenda, logbook, grammar, max_pop_num, cyclic)
    #ParseForest(chart, T.(terminals), grammar, cyclic)
    chart
end

run_chartparser (generic function with 1 method)

In [29]:
############
### Test ###
############

ascend = CFRule(1:9) do i
    [i, i+1]
end
double = CFRule(1:10) do i
    [i, i]
end
terminate = CFRule(1:10) do i
    [string(i)]
end
grammar = CFGrammar([ascend, double], [terminate], [1])
# exp(score(grammar, 9, terminate))
#
# using BenchmarkTools
# @btime score(run_chartparser(["1" for i in 1:10], grammar))
# @btime score(run_chartparser(["1" for i in 1:60], grammar))
# @btime score(run_chartparser(["1" for i in 1:30], grammar))


CFGrammar{Int64,String,SimpleCond{Int64,DirCat{CFRule{Int64,RHS} where RHS,Float64},Array{CFRule{Int64,RHS} where RHS,1}},typeof(identity)}(CompletionAutomaton{Int64,Tuple{Int64,CFRule{Int64,Int64}}}(Dict{Int64,Int64}[Dict(7 => 2,4 => 4,9 => 6,10 => 23,2 => 8,3 => 10,5 => 12,8 => 14,6 => 16,1 => 18…), Dict(7 => 20,8 => 3), Dict(), Dict(4 => 21,5 => 5), Dict(), Dict(9 => 22,10 => 7), Dict(), Dict(2 => 25,3 => 9), Dict(), Dict(4 => 11,3 => 26)  …  Dict(), Dict(), Dict(10 => 24), Dict(), Dict(), Dict(), Dict(), Dict(), Dict(), Dict()], Array{Tuple{Int64,CFRule{Int64,Int64}},1}[[], [], [(7, CFRule(rule1))], [], [(4, CFRule(rule1))], [], [(9, CFRule(rule1))], [], [(2, CFRule(rule1))], []  …  [(4, CFRule(rule2))], [(9, CFRule(rule2))], [], [(10, CFRule(rule2))], [(2, CFRule(rule2))], [(3, CFRule(rule2))], [(5, CFRule(rule2))], [(8, CFRule(rule2))], [(6, CFRule(rule2))], [(1, CFRule(rule2))]]), [1], Dict("8" => [(8, CFRule(rule3))],"4" => [(4, CFRule(rule3))],"1" => [(1, CFRule(rule3))],"10" 

In [33]:
score(grammar, 9, terminate)

DomainError: DomainError with -1.0986122886681096:
log will only return a complex result if called with a complex argument. Try log(Complex(x)).

In [35]:
chart = run_chartparser(["1" for i in 1:10], grammar)

DomainError: DomainError with -1.0986122886681096:
log will only return a complex result if called with a complex argument. Try log(Complex(x)).