In [1]:
import json
tree_data = {
    "node1": {
        #"parent": None,
        "data": "Some data for node1"
    },
    "node2": {
        #"parent": "node1",
        "data": "Some data for node2"
    },
    "node3": {
        "parent": "node1",
        "data": "Some data for node3"
    },
    "node4": {
        "parent": "node3",
        "data": "Some data for node4"
    },
    "node5": {
        "parent": "node3",
        "data": "Some data for node5"
    }
}
print(json.dumps(tree_data, indent=2))

{
  "node1": {
    "data": "Some data for node1"
  },
  "node2": {
    "data": "Some data for node2"
  },
  "node3": {
    "parent": "node1",
    "data": "Some data for node3"
  },
  "node4": {
    "parent": "node3",
    "data": "Some data for node4"
  },
  "node5": {
    "parent": "node3",
    "data": "Some data for node5"
  }
}


In [2]:
import treekit as tk

# load a tree from tree_data
tree = tk.FlatTree(tree_data)
print(json.dumps(tree, indent=2))


{
  "node1": {
    "data": "Some data for node1"
  },
  "node2": {
    "data": "Some data for node2"
  },
  "node3": {
    "parent": "node1",
    "data": "Some data for node3"
  },
  "node4": {
    "parent": "node3",
    "data": "Some data for node4"
  },
  "node5": {
    "parent": "node3",
    "data": "Some data for node5"
  }
}


In [3]:
print(tree["node2"])
node3 = tree.get_node("node3")
print(node3)
print(node3['parent'])
print(node3.children())

{'data': 'Some data for node2'}
ProxyNode(node3: {'parent': 'node1', 'data': 'Some data for node3'})
node1
[ProxyNode(node4: {'parent': 'node3', 'data': 'Some data for node4'}), ProxyNode(node5: {'parent': 'node3', 'data': 'Some data for node5'})]


In [4]:
print(tree.get_root())
for child in tree.get_root().children():
    print(child)

ProxyNode(__logical_root__)
ProxyNode(node1: {'data': 'Some data for node1'})
ProxyNode(node2: {'data': 'Some data for node2'})


We show that it's easy to regenerate any JSON files that may have been used
to generate the FlatTree 'tree'. So, JSON is a good format for storing and
transmitting trees. And, of course, `FlatTree` *is* a dictionary. Of course,
if we store an object that has no serializable representation, it cannot be
stored in JSON.

In [5]:
print(json.dumps(tree,  indent=2) == json.dumps(tree_data, indent=2))

True


In [6]:

# let's create a tree from a dict that cannot be serialized to json
non_serializable_tree_data = {
    "node1": {
        #"parent": None,
        # data is a function that cannot be serialized to json
        "data": lambda x: 2*x**3 + 3*x**2 + 4*x + 5
    }
}

non_serializable_tree = tk.FlatTree(non_serializable_tree_data)
print(non_serializable_tree)
print(non_serializable_tree.get_root())

try:
    json.dumps(non_serializable_tree, indent=2)
except TypeError as e:
    print(e)

{'node1': {'data': <function <lambda> at 0x716c7e2bb9c0>}}
ProxyNode(__logical_root__)
Object of type function is not JSON serializable


In [7]:
from treekit.tree_viz import TreeViz as tv
print(tv.text(tree.get_node("node1")))
print(tv.text(tree.get_root()))
print(tv.text(tree.get_node('node3')))

node1
└── node3
    ├── node4
    └── node5

__logical_root__
├── node1
│   └── node3
│       ├── node4
│       └── node5
└── node2

node3
├── node4
└── node5



In [8]:
child = tree.get_node("node1").add_child(key="node36", data="Some data for node6")
print(json.dumps(tree, indent=2))
print(child)
#print(tree.get_root())

{
  "node1": {
    "data": "Some data for node1"
  },
  "node2": {
    "data": "Some data for node2"
  },
  "node3": {
    "parent": "node1",
    "data": "Some data for node3"
  },
  "node4": {
    "parent": "node3",
    "data": "Some data for node4"
  },
  "node5": {
    "parent": "node3",
    "data": "Some data for node5"
  },
  "node36": {
    "data": "Some data for node6",
    "parent": "node1"
  }
}
ProxyNode(node36: {'data': 'Some data for node6', 'parent': 'node1'})


If we try too add a non-unique node key to the tree, we will get a `KeyError`.

In [9]:
try:
    child2 = tree.get_node("node1").add_child(key="node2", data="Some data for node6!!!")
except KeyError as e:
    print(e)


'Node key already exists'


Let's add some more nodes.

In [10]:

child7 = child.add_child(key="node7", data="Some data for node7")
print(child7)

ProxyNode(node7: {'data': 'Some data for node7', 'parent': 'node36'})


We can see that the underlying tree that the child is a member of has been
appropriately modified:

In [11]:
print(json.dumps(tree, indent=2))

{
  "node1": {
    "data": "Some data for node1"
  },
  "node2": {
    "data": "Some data for node2"
  },
  "node3": {
    "parent": "node1",
    "data": "Some data for node3"
  },
  "node4": {
    "parent": "node3",
    "data": "Some data for node4"
  },
  "node5": {
    "parent": "node3",
    "data": "Some data for node5"
  },
  "node36": {
    "data": "Some data for node6",
    "parent": "node1"
  },
  "node7": {
    "data": "Some data for node7",
    "parent": "node36"
  }
}


We can iterate over the items of the child and we can modify/delete its data.

In [12]:
for k, v in child7.items():
    print(k)

child7["new_data"] = "Some new data for node7"    
print(child7)

del child7["new_data"]
print(child7)

child7["other_new_data"] = "Some other data for node7"


data
parent
ProxyNode(node7: {'data': 'Some data for node7', 'parent': 'node36', 'new_data': 'Some new data for node7'})
ProxyNode(node7: {'data': 'Some data for node7', 'parent': 'node36'})


In [13]:
print(tv.text(tree.get_node("node1"), node_name=lambda n: n))

Node('/node1', data='Some data for node1')
├── Node('/node1/node3', data='Some data for node3')
│   ├── Node('/node1/node3/node4', data='Some data for node4')
│   └── Node('/node1/node3/node5', data='Some data for node5')
└── Node('/node1/node36', data='Some data for node6')
    └── Node('/node1/node36/node7', data='Some data for node7', other_new_data='Some other data for node7')



In [14]:
try:
    non_existent_parent_tree = tk.FlatTree({
        "node1": {
            "parent": "non_existent_parent",
            "data": "Some data for node1"
        }})
except KeyError as e:
    print(e)


"Parent node non-existent: 'non_existent_parent'"


In [15]:

try:
    cycle_tree = tk.FlatTree({
        "node0": {
            "parent": None,
            "data": "Some data for node0"
        },
        "node1": {
            "parent": "node2",
            "data": "Some data for node1"
        },
        "node2": {
            "parent": "node3",
            "data": "Some data for node2"
        },
        "node3": {
            "parent": "node1",
            "data": "Some data for node3"
        },
        "node4": {
            "parent": "node0",
            "data": "Some data for node4"
        }
    })
except ValueError as e:
    print(e)

Cycle detected: {'node3', 'node2', 'node1'}


The following shows how when we set a parent node, it fails if the parent
is non-existent. If you require intermediate state to be an invalid tree,
you can disable constraint checking by invoking `check_constraints(false)`.

In [16]:
try:
    tree.get_node("node2")['parent'] = 'node14994'
except KeyError as e:
    print(e)

print(tree.get_node("node2"))
tree.get_node("node2")['parent'] = 'node1'
print(tree)
tree.check_valid()

"Parent node non-existent: 'node14994'"
ProxyNode(node2: {'data': 'Some data for node2', 'parent': 'node14994'})
{'node1': {'data': 'Some data for node1'}, 'node2': {'data': 'Some data for node2', 'parent': 'node1'}, 'node3': {'parent': 'node1', 'data': 'Some data for node3'}, 'node4': {'parent': 'node3', 'data': 'Some data for node4'}, 'node5': {'parent': 'node3', 'data': 'Some data for node5'}, 'node36': {'data': 'Some data for node6', 'parent': 'node1'}, 'node7': {'data': 'Some data for node7', 'parent': 'node36', 'other_new_data': 'Some other data for node7'}}


In [17]:
"node1" in tree

True

Each node is a key-value pair in the `FlatTree`. We have the  `FlatTree.ProxyNode`
so that we can have an API focused on the nodes and not the underlying dictionary.
However, we stiill permit access to the underlying dictionary. When you modify
the tree in this way, we still maintain the integrity of the tree.

Since the `FlatTree` represents nodes as key-value pairs, and the value must
have a parent key, along with any other arbitrary data, each value for a node
must be a dictionary.

Below, we see that trying to add a `test` node with a non-dictionary value
generates an error.

In [18]:
try:
    import copy
    error_tree = copy.deepcopy(tree)
    error_tree['test'] = "Some test data"
except ValueError as e:
    print(e)


Node test's value must be a dictionary: value='Some test data'


Note that if we didn't make a deep copy of `tree` and just worked directly
with it, the above operation would have put `tree` in an invalid state.
If you want to work with the tree directly, you can disable constraint checking
by invoking `check_constraints(false)`, in which case it won't even throw an error
but will technically no longer be a proper tree.

In [19]:

tree['new_key'] = {'parent': 'node1', 'data': 'Some new data'}
print('-'*10)
print(json.dumps(tree, indent=2))

----------
{
  "node1": {
    "data": "Some data for node1"
  },
  "node2": {
    "data": "Some data for node2",
    "parent": "node1"
  },
  "node3": {
    "parent": "node1",
    "data": "Some data for node3"
  },
  "node4": {
    "parent": "node3",
    "data": "Some data for node4"
  },
  "node5": {
    "parent": "node3",
    "data": "Some data for node5"
  },
  "node36": {
    "data": "Some data for node6",
    "parent": "node1"
  },
  "node7": {
    "data": "Some data for node7",
    "parent": "node36",
    "other_new_data": "Some other data for node7"
  },
  "new_key": {
    "parent": "node1",
    "data": "Some new data"
  }
}


In [20]:
print(tree.get_node('new_key'))
print(type(tree.get_node('new_key')))
print(tree['new_key'])
print(type(tree['new_key']))


ProxyNode(new_key: {'parent': 'node1', 'data': 'Some new data'})
<class 'treekit.flattree.FlatTree.ProxyNode'>
{'parent': 'node1', 'data': 'Some new data'}
<class 'dict'>


In [21]:
root_node = tree.get_root()
print(root_node)

try:
    root_node['data'] = "Some new data for root node"
except TypeError as e:
    print(e)

try:
    root_node['parent'] = None
except TypeError as e:
    print(e)

try:
    root_node.clear()
except TypeError as e:
    print(e)

print('-'*10)
print(child7)
child7.clear()

print(json.dumps(tree, indent=2))

ProxyNode(__logical_root__)
ProxyNode(__logical_root__) is immutable
ProxyNode(__logical_root__) is immutable
----------
ProxyNode(node7: {'data': 'Some data for node7', 'parent': 'node36', 'other_new_data': 'Some other data for node7'})
{
  "node1": {
    "data": "Some data for node1"
  },
  "node2": {
    "data": "Some data for node2",
    "parent": "node1"
  },
  "node3": {
    "parent": "node1",
    "data": "Some data for node3"
  },
  "node4": {
    "parent": "node3",
    "data": "Some data for node4"
  },
  "node5": {
    "parent": "node3",
    "data": "Some data for node5"
  },
  "node36": {
    "data": "Some data for node6",
    "parent": "node1"
  },
  "node7": {},
  "new_key": {
    "parent": "node1",
    "data": "Some new data"
  }
}


In [22]:
tree.get_root().add_child(whatever=3).add_child(whatever=4).add_child(whatever=5)
print(json.dumps(tree, indent=2))

{
  "node1": {
    "data": "Some data for node1"
  },
  "node2": {
    "data": "Some data for node2",
    "parent": "node1"
  },
  "node3": {
    "parent": "node1",
    "data": "Some data for node3"
  },
  "node4": {
    "parent": "node3",
    "data": "Some data for node4"
  },
  "node5": {
    "parent": "node3",
    "data": "Some data for node5"
  },
  "node36": {
    "data": "Some data for node6",
    "parent": "node1"
  },
  "node7": {},
  "new_key": {
    "parent": "node1",
    "data": "Some new data"
  },
  "5d52abe0-35b6-4f74-b996-24d664ccefa0": {
    "whatever": 3
  },
  "34dff6ae-6005-40ed-ac1c-4a0a3569fe1a": {
    "whatever": 4,
    "parent": "5d52abe0-35b6-4f74-b996-24d664ccefa0"
  },
  "8f1c44d0-be69-4aa5-90d8-74f5402ec42d": {
    "whatever": 5,
    "parent": "34dff6ae-6005-40ed-ac1c-4a0a3569fe1a"
  }
}


In [23]:
simple_tree = tk.FlatTree({
    "root": {
        "data": "Some data for root",
        #"parent": None
    },
    "child1": {
        "data": "Some data for child1",
        "parent": "root"
    },
    "child2": {
        "data": "Some data for child2",
        "parent": "root"
    },
    "child3": {
        "data": "Some data for child3",
        "parent": "child1"
    }
})
print(json.dumps(simple_tree, indent=2))  

{
  "root": {
    "data": "Some data for root"
  },
  "child1": {
    "data": "Some data for child1",
    "parent": "root"
  },
  "child2": {
    "data": "Some data for child2",
    "parent": "root"
  },
  "child3": {
    "data": "Some data for child3",
    "parent": "child1"
  }
}


In [24]:
print(simple_tree.get_root())
print(simple_tree.get_node("child3").get_parent().get_parent())
print(simple_tree.get_node("child3").get_parent().get_parent().get_parent())
print(simple_tree.get_node("child3").get_parent().get_parent().get_parent().get_parent())

ProxyNode(__logical_root__)
ProxyNode(root: {'data': 'Some data for root'})
ProxyNode(__logical_root__)
None


In [25]:
import treekit.tree_converter as tc
new_tree = tc.TreeConverter.to_treenode(simple_tree.get_root())

print(json.dumps(new_tree, indent=2))

{
  "__name__": "__logical_root__",
  "children": [
    {
      "__name__": "root",
      "data": "Some data for root",
      "children": [
        {
          "__name__": "child1",
          "data": "Some data for child1",
          "parent": "root",
          "children": [
            {
              "__name__": "child3",
              "data": "Some data for child3",
              "parent": "child1",
              "children": []
            }
          ]
        },
        {
          "__name__": "child2",
          "data": "Some data for child2",
          "parent": "root",
          "children": []
        }
      ]
    }
  ]
}


Let's get the meaningful actual root of the tree and map that to a `TreeNode`.
The actual root we labled as `root` is the meaningful root of the tree, the
logical root is just a device used by `FlatTree` to make a forest of trees
look like a single tree. It is the parent of all nodes that have had no parent
assigned to them. In this case, the logical root is the parent of the only
node that has no parent assigned to it -- the node labeled `root`.

In [26]:
print(json.dumps(tc.TreeConverter.to_treenode(simple_tree.get_node("root")), indent=2))

{
  "__name__": "root",
  "data": "Some data for root",
  "children": [
    {
      "__name__": "child1",
      "data": "Some data for child1",
      "parent": "root",
      "children": [
        {
          "__name__": "child3",
          "data": "Some data for child3",
          "parent": "child1",
          "children": []
        }
      ]
    },
    {
      "__name__": "child2",
      "data": "Some data for child2",
      "parent": "root",
      "children": []
    }
  ]
}


In [27]:
testtree = tk.FlatTree()
testtree.get_root()
testtree.get_root().add_child(key="child1", data="Some data for child1")
print(json.dumps(testtree, indent=2))
# when we add a child to the empty root node, the node is turned into the root.
# if we add another child
print(testtree.get_root())

{
  "child1": {
    "data": "Some data for child1"
  }
}
ProxyNode(__logical_root__)


In [28]:
tree_node = tk.TreeNode({
    "data": "Some data for root",
    "children": [
        {
            "data": "Some data for child1",
            "children": [
                {
                    "data": "Some data for child3",
                    "children": []
                }
            ]
        },
        {
            "data": "Some data for child2",
            "children": []
        }
    ]
    })
print(TreeViz.text(tree_node.get_root()))

NameError: name 'TreeViz' is not defined

We see that when we print the node of a `TreeNode`, we get the entire subtree.
If you just want the node:

In [None]:
print(tree_node.get_data())

{'__name__': {'data': 'Some data for root', 'children': [{'data': 'Some data for child1', 'children': [{'data': 'Some data for child3', 'children': []}]}, {'data': 'Some data for child2', 'children': []}]}}


In [None]:
root = tk.TreeNode(data={ "cargo":"root", "more": 4 })
node1 = root.add_child(data={"data": "node1 - child of root"})
node2 = root.add_child(data={"data": "node2 - child of root"})
node3 = node2.add_child(data={"data": "node3 - child of node2"})
#print(json.dumps(root, indent=2))
print(root.name)

{'cargo': 'root', 'more': 4}


In [None]:
flat = tc.TreeConverter.to_flattree
nest = tc.TreeConverter.to_treenode
print(json.dumps(flat(root), indent=2))

{
  "{'cargo': 'root', 'more': 4}": {
    "data": {
      "cargo": "root",
      "more": 4
    },
    "parent": null
  },
  "{'data': 'node1 - child of root'}": {
    "data": {
      "data": "node1 - child of root"
    },
    "parent": "{'cargo': 'root', 'more': 4}"
  },
  "{'data': 'node2 - child of root'}": {
    "data": {
      "data": "node2 - child of root"
    },
    "parent": "{'cargo': 'root', 'more': 4}"
  },
  "{'data': 'node3 - child of node2'}": {
    "data": {
      "data": "node3 - child of node2"
    },
    "parent": "{'data': 'node2 - child of root'}"
  }
}


We note that the `TreeNode` class does not need to store a unique key for each
node. It just represents a tree as a recursive data structure, with the
`children` attribute being a list of `TreeNode` objects.

When we convert a `TreeNode` object to a `FlatTree` object, we need to assign
a unique key to each node. By default, it just randomly assigns a key using
UUIDs. If you want a more meaningful key, you can use the `node_name` function
parameter that takes in a `TreeNode` object and returns a string that will be
used as the key.

We know that in the data of the `TreeNode` object, we have a `data` attribute
whose value is a string containing the name of the node concatenated with the
`child of <parent>` string. We can use this to generate a more meaningful key.

In [None]:
node_name=lambda node: node['data'].split()[0] if 'data' in node else None

In [None]:

flat_tree = flat(root, node_name)
print(json.dumps(flat_tree, indent=2))

{
  "root": {
    "data": {
      "data": "root"
    },
    "parent": null
  },
  "node1": {
    "data": {
      "data": "node1 - child of root"
    },
    "parent": "root"
  },
  "node2": {
    "data": {
      "data": "node2 - child of root"
    },
    "parent": "root"
  },
  "node3": {
    "data": {
      "data": "node3 - child of node2"
    },
    "parent": "node2"
  }
}


Now let's convert it back to a `TreeNode` object and print it.

In [None]:
root_2 = nest(flat_tree.get_node("root"))
print(json.dumps(root_2, indent=2))


{
  "__name__": "root",
  "data": {
    "data": "root"
  },
  "parent": null,
  "children": [
    {
      "__name__": "node1",
      "data": {
        "data": "node1 - child of root"
      },
      "parent": "root",
      "children": []
    },
    {
      "__name__": "node2",
      "data": {
        "data": "node2 - child of root"
      },
      "parent": "root",
      "children": [
        {
          "__name__": "node3",
          "data": {
            "data": "node3 - child of node2"
          },
          "parent": "node2",
          "children": []
        }
      ]
    }
  ]
}


We see that each node has an additional `parent` key that was not present in the
original tree. This is because the `FlatTree` object needs to store the parent
of each node so that it can regenerate the tree structure. This is not a problem
because the `TreeNode` object does not use this key to represent the tree structure.
It uses the recursive `children` list. They may be removed, if desired, but
otherwise do not cause any harm.

In [None]:
root_3 = flat(root_2)
print(json.dumps(root_3, indent=2))


{
  "root": {
    "data": {
      "__name__": "root",
      "data": {
        "data": "root"
      }
    },
    "parent": null
  },
  "node1": {
    "data": {
      "__name__": "node1",
      "data": {
        "data": "node1 - child of root"
      }
    },
    "parent": "root"
  },
  "node2": {
    "data": {
      "__name__": "node2",
      "data": {
        "data": "node2 - child of root"
      }
    },
    "parent": "root"
  },
  "node3": {
    "data": {
      "__name__": "node3",
      "data": {
        "data": "node3 - child of node2"
      }
    },
    "parent": "node2"
  }
}


In [None]:
print(node_name(flat_tree.get_node("root")))
print(node_name(flat_tree.get_node("node1")))

AttributeError: 'dict' object has no attribute 'split'

In [None]:
anytree = tc.TreeConverter.to_anytree(flat_tree.get_node("root"))


In [None]:

# let's pretty-print the anytree node
print(tv.text(anytree, node_name=lambda node: node))
print(tv.text(flat_tree.get_node("root"),
              node_name=lambda node: node))
print(tv.text(flat_tree.get_node("root")))
              

Node('/root', data='root')
├── Node('/root/node1', data='node1 - child of root')
└── Node('/root/node2', data='node2 - child of root')
    └── Node('/root/node2/node3', data='node3 - child of node2')

Node('/root', data='root')
├── Node('/root/node1', data='node1 - child of root')
└── Node('/root/node2', data='node2 - child of root')
    └── Node('/root/node2/node3', data='node3 - child of node2')

root
├── node1
└── node2
    └── node3



In [None]:
tv.image(anytree,
         filename="anytree.png")
tv.image(flat_tree.get_node("root"),
         filename="flattree.png", maxlevel=2)
tv.image(root,
         filename="treenode.png",
         node_name=lambda n: n.data)
tv.image(root,
         filename="treenode2.png")

tv.image(root, filename="treenode.dot", node_name=lambda n: n.data)