In [1]:
import treetk

In [2]:
def test_ctree(sexp, with_nonterminal_labels, with_terminal_labels):
    # Creation
    sexp = treetk.preprocess(sexp)
    ctree = treetk.sexp2tree(sexp, with_nonterminal_labels=with_nonterminal_labels, with_terminal_labels=with_terminal_labels)
    print("")
    treetk.pretty_print(ctree)
    print("")

    # Aggregation
    nodes = treetk.traverse(ctree, order="pre-order", include_terminal=True, acc=None)
    print("Traversing (pre-order):")
    for node_i, node in enumerate(nodes):
        print("\t#%d" % (node_i+1))
        print("\tstr(node) = %s" % str(node))
        print("\tnode.is_terminal(): %s" % node.is_terminal())
        if node.is_terminal():
            if with_terminal_labels:
                print("\tnode.label: %s" % node.label)
            print("\tnode.token: %s" % node.token)
            print("\tnode.index: %s" % node.index)
        else:
            if with_nonterminal_labels:
                print("\tnode.label: %s" % node.label)
#         print("\tnode.tolist(): %s" % node.tolist())
#         print("\tnode.leaves(): %s" % node.leaves())
#         if not node.is_terminal():
#             for c_i in range(len(node.children)):
#                 print("\t\t#%d-%d" % (node_i+1, c_i+1))
#                 print("\t\tstr(node.children[%d]): %s" % (c_i, str(node.children[c_i])))

    # Production rules
    if with_nonterminal_labels:
        rules = treetk.aggregate_production_rules(ctree, order="pre-order", include_terminal=with_terminal_labels)
        print("Production rules (w/ terminals, pre-order):")
        for rule in rules:
            print("\t%s" % str(rule))

    # Spans
    ctree.calc_spans() # NOTE

    spans = treetk.aggregate_spans(ctree, include_terminal=True, order="pre-order")
    print("Spans (w/ terminals, pre-order):")
    for span in spans:
        print("\t%s" % str(span))

    spans = treetk.aggregate_spans(ctree, include_terminal=False, order="pre-order")
    print("Spans (w/o terminals, pre-order):")
    for span in spans:
        print("\t%s" % str(span))

    # Composed spans
    mrg_spans = treetk.aggregate_composition_spans(ctree, order="pre-order", binary=False)
    print("Composed span pairs (pre-order):")
    for span in mrg_spans:
        print("\t%s" % str(span))

    # Constituents
    constituents = treetk.aggregate_constituents(ctree, order="pre-order")
    print("Constituents (pre-order):")
    for constituent in constituents:
        print("\t%s" % str(constituent))

    # C-tree -> S-expression
    sexp = treetk.tree2sexp(ctree)
    print("Reversed S-expression")
    print("\t%s" % sexp)


In [3]:
"""
An example of constituent tree with non-terminal/terminal labels
"""
test_ctree("(S (NP (DT a) (NN cat)) (VP (VBZ bites) (NP (DT a) (ADJ small) (NN mouse))))",
           with_nonterminal_labels=True,
           with_terminal_labels=True)


                     S
         ____________|_____________
         |                        |
         |                        VP
         |              __________|__________
         |              |                   |
         NP             |                   NP
    _____|_____         |         __________|__________
    |         |         |         |         |         |
    DT        NN       VBZ        DT       ADJ        NN
    |         |         |         |         |         |
[   a   ] [  cat  ] [ bites ] [   a   ] [ small ] [ mouse ]

Traversing (pre-order):
	#1
	str(node) = ( S ( NP ( DT a ) ( NN cat ) ) ( VP ( VBZ bites ) ( NP ( DT a ) ( ADJ small ) ( NN mouse ) ) ) )
	node.is_terminal(): False
	node.label: S
	#2
	str(node) = ( NP ( DT a ) ( NN cat ) )
	node.is_terminal(): False
	node.label: NP
	#3
	str(node) = ( DT a )
	node.is_terminal(): True
	node.label: DT
	node.token: a
	node.index: 0
	#4
	str(node) = ( NN cat )
	node.is_terminal(): True
	node.label: NN
	node.t

In [4]:
"""
An example of constituent tree with non-terminal labels
"""
test_ctree("(S (NP a cat) (VP bites (NP a small mouse)))",
           with_nonterminal_labels=True,
           with_terminal_labels=False)


                     S
         ____________|_____________
         |                        |
         |                        VP
         |              __________|__________
         |              |                   |
         NP             |                   NP
    _____|_____         |         __________|__________
    |         |         |         |         |         |
    *         *         *         *         *         *
    |         |         |         |         |         |
[   a   ] [  cat  ] [ bites ] [   a   ] [ small ] [ mouse ]

Traversing (pre-order):
	#1
	str(node) = ( S ( NP a cat ) ( VP bites ( NP a small mouse ) ) )
	node.is_terminal(): False
	node.label: S
	#2
	str(node) = ( NP a cat )
	node.is_terminal(): False
	node.label: NP
	#3
	str(node) = a
	node.is_terminal(): True
	node.token: a
	node.index: 0
	#4
	str(node) = cat
	node.is_terminal(): True
	node.token: cat
	node.index: 1
	#5
	str(node) = ( VP bites ( NP a small mouse ) )
	node.is_terminal(): False
	n

In [5]:
"""
An example of constituent tree without labels
"""
test_ctree("((a cat) (bites (a small mouse)))",
           with_nonterminal_labels=False,
           with_terminal_labels=False)


                     *
         ____________|_____________
         |                        |
         |                        *
         |              __________|__________
         |              |                   |
         *              |                   *
    _____|_____         |         __________|__________
    |         |         |         |         |         |
    *         *         *         *         *         *
    |         |         |         |         |         |
[   a   ] [  cat  ] [ bites ] [   a   ] [ small ] [ mouse ]

Traversing (pre-order):
	#1
	str(node) = ( ( a cat ) ( bites ( a small mouse ) ) )
	node.is_terminal(): False
	#2
	str(node) = ( a cat )
	node.is_terminal(): False
	#3
	str(node) = a
	node.is_terminal(): True
	node.token: a
	node.index: 0
	#4
	str(node) = cat
	node.is_terminal(): True
	node.token: cat
	node.index: 1
	#5
	str(node) = ( bites ( a small mouse ) )
	node.is_terminal(): False
	#6
	str(node) = bites
	node.is_terminal(): True
	node.

In [6]:
"""
An example of dependency tree
"""

# Create
tokens = ["ROOT", "a", "boy", "saw", "a", "girl", "with", "a", "telescope"]
arcs = [(2, 1, "det"), (3, 2, "nsubj"), (3, 5, "dobj"), (5, 4, "det"), (3, 6, "prep"), (6, 8, "pobj"), (8, 7, "det"), (0, 3, "root")]
print("tokens = %s" % tokens)
print("arcs = %s" % arcs)
dtree = treetk.arcs2dtree(arcs=arcs, tokens=tokens)

# Show
print("")
treetk.pretty_print_dtree(dtree)
print("")

# String representation
print(">>> str(dtree)")
print(str(dtree))

# Convert dtree to list
print(">>> dtree.tolist(labeled=True)")
print(dtree.tolist(labeled=True))
print(">>> dtree.tolist(labeled=False)")
print(dtree.tolist(labeled=False))

# dtree has a dictionary to map head to dependents
# print(">>> dtree.head2dependents")
# print(dtree.head2dependents)

# dtree also has a dictionary to dependent to head
# print(">>> dtree.dependent2head")
# print(dtree.dependent2head)

# the dictionary are used in get_head/get_dependents functions
for index in range(len(tokens)):
    print("\tdtree.get_head(%d): %s" % (index, dtree.get_head(index)))
    print("\tdtree.get_dependents(%d): %s" % (index, dtree.get_dependents(index)))


tokens = ['ROOT', 'a', 'boy', 'saw', 'a', 'girl', 'with', 'a', 'telescope']
arcs = [(2, 1, 'det'), (3, 2, 'nsubj'), (3, 5, 'dobj'), (5, 4, 'det'), (3, 6, 'prep'), (6, 8, 'pobj'), (8, 7, 'det'), (0, 3, 'root')]

                                     _______________________________
                                     |                         prep|
       _____________________________ |___________________          | ____________________
       |                       root| |              dobj|          | |              pobj|
       |       _________ _________ | |       __________ |          | |       __________ |
       |       |det    | |nsubj  | | |       |det     | |          | |       |det     | |
       |       V       | V       | V |       V        | V          V |       V        | V
[  ROOT  ] [   a   ] [  boy  ] [  saw  ] [   a   ] [  girl  ] [  with  ] [   a   ] [telescope]

>>> str(dtree)
2-1-det 3-2-nsubj 0-3-root 5-4-det 3-5-dobj 3-6-prep 8-7-det 6-8-pobj
>>> dtree.tolist(lab

In [7]:
"""
An example of ctree-to-dtree conversion
"""

# Create a constituent tree
sexp = treetk.preprocess("(S (NP (DT a) (NN boy)) (VP (VP (VBD saw) (NP (DT a) (NN girl))) (PP (IN with) (NP (DT a) (NN telescope)))))")
ctree = treetk.sexp2tree(sexp, with_nonterminal_labels=True, with_terminal_labels=True)
# Show
treetk.pretty_print(ctree)
print("")

# Please define your rules for specifying the head node among the children nodes
def func_head_child_rule(node):
    """
    :type node: NonTerminal
    :rtype: int
    """
    if node.label == "S" and node.children[0].label == "NP" and node.children[1].label == "VP":
        return 1 # the second child
    elif node.label == "NP" and node.children[0].label == "DT" and node.children[1].label == "NN":
        return 1
    elif node.label == "VP" and node.children[0].label == "VP" and node.children[1].label == "PP":
        return 0 # the first child
    elif node.label == "VP" and node.children[0].label == "VBD" and node.children[1].label == "NP":
        return 0
    elif node.label == "PP" and node.children[0].label == "IN" and node.children[1].label == "NP":
        return 0
    else:
        return 0
    
# Please define your labeling function for the head node (i.e., node.children[i]) and the dependent (i.e., node.children[j]).
def func_label_rule(node, i, j):
    """
    :type node: NonTerminal
    :type i: int
    :type j: int
    :rtype: str
    """
    #return "%s,%s,%s" % (node.label, node.children[i].label, node.children[j].label)
    return node.label # Use constituent label of the parent non-terminal node

# Assign heads
ctree.calc_heads(func_head_child_rule=func_head_child_rule)

# Show
# print("Heads (post-order):")
# nodes = treetk.traverse(ctree, order="post-order", include_terminal=True, acc=None)
# for node_i, node in enumerate(nodes):
#     print("\t#%d" % (node_i+1))
#     print("\tstr(node): %s" % str(node))
#     print("\tnode.head_child_index: %d" % node.head_child_index)
#     print("\tnode.head_token_index: %d" % node.head_token_index)

# Convert
dtree = treetk.ctree2dtree(ctree, func_label_rule=func_label_rule, root_symbol="Root")
# Show
treetk.pretty_print_dtree(dtree)


                            S
         ___________________|___________________
         |                                     |
         |                                     VP
         |                     ________________|________________
         |                     |                               |
         |                     VP                              PP
         |              _______|________                _______|________
         |              |              |                |              |
         NP             |              NP               |              NP
    _____|_____         |         _____|______          |         _____|______
    |         |         |         |          |          |         |          |
    DT        NN       VBD        DT         NN         IN        DT         NN
    |         |         |         |          |          |         |          |
[   a   ] [  boy  ] [  saw  ] [   a   ] [  girl  ] [  with  ] [   a   ] [telescope]

    

In [8]:
"""
An example of dtree-to-ctree conversion
"""

# Create a dependency tree
tokens = ["<root>", "a", "boy", "saw", "a", "girl", "with", "a", "telescope"]
arcs = [(2, 1, "det"), (3, 2, "nsubj"), (3, 5, "dobj"), (5, 4, "det"), (3, 6, "prep"), (6, 8, "pobj"), (8, 7, "det"), (0, 3, "<root>")]
dtree = treetk.arcs2dtree(arcs=arcs, tokens=tokens)
# Show
treetk.pretty_print_dtree(dtree)
print("")

# Convert
ctree = treetk.dtree2ctree(dtree)
# Show
treetk.pretty_print(ctree)


                                     _______________________________
                                     |                         prep|
       _____________________________ |___________________          | ____________________
       |                     <root>| |              dobj|          | |              pobj|
       |       _________ _________ | |       __________ |          | |       __________ |
       |       |det    | |nsubj  | | |       |det     | |          | |       |det     | |
       |       V       | V       | V |       V        | V          V |       V        | V
[ <root> ] [   a   ] [  boy  ] [  saw  ] [   a   ] [  girl  ] [  with  ] [   a   ] [telescope]

                             nsubj/dobj/prep
         ___________________________|___________________________
         |              |              |                       |
         |              |              |                      pobj
         |              |              |                _______|________
 

In [9]:
"""
An example of RST-DT style constituent tree
"""

with open("./treetk/rstdt_example.labeled.nary.ctree") as f:
    lines = f.readlines()
    lines = [l.strip().split() for l in lines]
    sexp = lines[0]
print(" ".join(sexp))

# When you read RST-DT style constituent tree, use the standard sexp2tree function
ctree = treetk.sexp2tree(sexp, with_nonterminal_labels=True, with_terminal_labels=False)
# Postprocessing is required
ctree = treetk.rstdt.postprocess(ctree)

# Map fine-grained relations to coarse-grained relations
ctree = treetk.rstdt.map_relations(ctree, mode="f2c")
# Show
treetk.pretty_print(ctree)
nodes = treetk.traverse(ctree, order="pre-order", include_terminal=False, acc=None)
for node in nodes:
    print(node.relation_label, node.nuclearity_label)

# Map coarse-grained relation labels to abbreviations
ctree = treetk.rstdt.map_relations(ctree, mode="c2a")
# Show
treetk.pretty_print(ctree)
nodes = treetk.traverse(ctree, order="pre-order", include_terminal=False, acc=None)
for node in nodes:
    print(node.relation_label, node.nuclearity_label)



( <TextualOrganization,N/N> ( <elaboration-additional,N/S> ( <elaboration-additional,N/S> ( <elaboration-additional,N/S> ( <Same-Unit,N/N> ( <elaboration-object-attribute-e,N/S> 0 1 ) 2 ) ( <List,N/N> 3 4 ) ) ( <Same-Unit,N/N> ( <elaboration-object-attribute-e,N/S> 5 6 ) 7 ) ) ( <Otherwise,N/N> ( <elaboration-object-attribute-e,N/S> 8 9 ) 10 ) ) ( <elaboration-general-specific,N/S> 11 12 ) )
                                                                                    <TEXTUAL-ORGANIZATION,N/N>
                                                                       _________________________|_________________________
                                                                       |                                                 |
                                                               <ELABORATION,N/S>                                         |
                                              _________________________|__________________________                       |
  