In [1]:
import math
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import itertools
import string

from toydown import GeoUnit, ToyDown

In [2]:
def create_demo_tree_homogenous_branching(h=3):
    """ Creates a demo tree of height h <= 16, with homogenous branching of h.
        This demo tree has two population counts, and count that is the sum of them.
    """
    leaves = ["1" + "".join(a) for a in itertools.product(string.hexdigits[1:h+1], repeat=h-1)]
    leaves_counts = np.random.randint(1000, size=(len(leaves),2))
    leaves_attrs = np.insert(leaves_counts, 0, leaves_counts.sum(axis=1), axis=1)
    
    leaf_dict = {i: j for i, j in zip(leaves, leaves_attrs)}
    return create_tree_from_leaves(leaf_dict)

In [3]:
def create_tree_from_leaves(leaf_dict):
    """ Given a dictionary, where the keys are the names of leaf nodes (labeled by their path)
        and the corresponding value is the associated attribute counts, this function returns
        the list of GeoUnits that defines the corresponding tree.
    """
    nodes = leaf_dict.copy()
    h = len(list(leaf_dict.keys())[0])
    n = len(list(leaf_dict.values())[0])
    
    for i in range(2, h+1):
        level_names = list(set(list(map(lambda s: s[:-(i-1)], leaf_dict.keys()))))
        level_counts = [np.zeros(n)]*len(level_names)
        for node in level_names:
            nodes[node] = np.array([v for k, v in leaf_dict.items() if k.startswith(node)]).sum(axis=0)
        
    return [GeoUnit(k, k[:-1], v) if k != "1" else GeoUnit(k, None, v) for k, v in nodes.items()]

In [4]:
geounits = create_demo_tree_homogenous_branching(3)
geounits.reverse()
eps = 1
eps_split = [0.25, 0.25, 0.5]

In [5]:
model = ToyDown(geounits, 3, eps, eps_split)
model.show()

1
├── 11
│   ├── 111
│   ├── 112
│   └── 113
├── 12
│   ├── 121
│   ├── 122
│   └── 123
└── 13
    ├── 131
    ├── 132
    └── 133



In [6]:
## Constraints: 0 difference among partition bins
## number of children n, returns list of 0 diff constraints.
cons_0_diff = lambda n: [{'type': 'eq', 'fun': lambda x, i=i:  x[i] - np.sum([x[j] for j in range(i+1,i+3)])} 
                         for i in np.arange(n*3, step=3)]

In [7]:
model.noise_and_adjust(node_cons=cons_0_diff, opts={"maxiter": 500, "disp": True})

Adjusting root node 1
Optimization terminated successfully.    (Exit mode 0)
            Current function value: 19.24788271629859
            Iterations: 4
            Function evaluations: 21
            Gradient evaluations: 4
Adjusting children of 1
Optimization terminated successfully.    (Exit mode 0)
            Current function value: 63.33520478308026
            Iterations: 17
            Function evaluations: 193
            Gradient evaluations: 17
Adjusting children of 11
Optimization terminated successfully.    (Exit mode 0)
            Current function value: 7.715322452306339
            Iterations: 41
            Function evaluations: 483
            Gradient evaluations: 41
Adjusting children of 12
Optimization terminated successfully.    (Exit mode 0)
            Current function value: 10.731696765408884
            Iterations: 116
            Function evaluations: 2129
            Gradient evaluations: 115
Adjusting children of 13
Optimization terminated successful

In [8]:
model.get_node("1").data.__dict__

{'name': '1',
 'parent': None,
 'attributes': array([9063, 4468, 4595]),
 'identifier': '1',
 'level': 0,
 'noised': array([9062.34547594, 4489.03435865, 4592.55900001]),
 'noise': array([-0.65452406, 21.03435865, -2.44099999]),
 'noise_type': 'laplacian',
 'adjusted': array([9062.44872768, 4471.35890622, 4591.08982147]),
 'error': array([ 0.55127232, -3.35890622,  3.91017853])}

In [9]:
model.get_node("12").data.__dict__

{'name': '12',
 'parent': '1',
 'attributes': array([2247,  988, 1259]),
 'identifier': '12',
 'level': 1,
 'noised': array([2245.97968084,  985.05356938, 1249.52131387]),
 'noise': array([-1.02031916, -2.94643062, -9.47868613]),
 'noise_type': 'laplacian',
 'adjusted': array([2244.06151812,  986.24120367, 1257.82031446]),
 'error': array([2.93848188, 1.75879633, 1.17968554])}

In [10]:
model.get_node("123").data.__dict__

{'name': '123',
 'parent': '12',
 'attributes': array([549, 368, 181]),
 'identifier': '123',
 'level': 2,
 'noised': array([544.87555718, 370.80144817, 181.76967222]),
 'noise': array([-4.12444282,  2.80144817,  0.76967222]),
 'noise_type': 'laplacian',
 'adjusted': array([548.14163535, 367.53048889, 180.61114646]),
 'error': array([0.85836465, 0.46951111, 0.38885354])}