In [1]:
import sys
sys.path.append("..")

import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor

import dtree
import dforest
import query

In [2]:
missing_data = lambda x : 100. if x == '?' else float(x)
data = np.loadtxt("../datasets/Auto.data", converters=missing_data, 
                    skiprows=1, usecols=[0, 1, 2, 3, 4, 5, 6, 7])

np.random.seed(12345)
np.random.shuffle(data)
np.set_printoptions(precision=1, suppress=True)

auto_X = data[:, 1:]
auto_y = data[:, 0]

In [3]:
model = RandomForestRegressor()
model.fit(auto_X, auto_y)

forest = dforest.make_forest_sklearn(model)

In [4]:
noisy_samples = auto_X + np.random.normal(0, 0.1, auto_X.shape)
model_pred = model.predict(noisy_samples)
forest_pred = forest.eval(noisy_samples)
for i, sample in enumerate(noisy_samples):
    assert round(forest_pred[i], 6) == round(model_pred[i], 6)

In [5]:
forest.print_summary()

Size of forest: 100
Average Tree Size: 208.36
Avg Max Depth: 14.97
Minimum: [9.38, 9.8]
Maximum: [43.662000000000056, 45.809]


In [6]:
merged = forest.copy().merge(50)
merged.print_summary()
merged.free()

Size of forest: 50
Average Tree Size: 4003.48
Avg Max Depth: 22.54
Minimum: [9.41, 9.8]
Maximum: [43.662000000000056, 45.64600000000001]


In [7]:
x = np.array([6, 225.0, 100.0, 3233, 15.4, 76, 1])
delta = np.array([2, 40, 18, 250, 1.8, 1, 2])
y = forest.eval(x)
print(f"x: {x}, f(x): {y}")

x: [   6.   225.   100.  3233.    15.4   76.     1. ], f(x): 21.195999999999998


In [8]:
forest.sample(x - delta, x + delta, 200_000)
forest.print_summary()

Size of forest: 100
Average Tree Size: 208.36
Avg Max Depth: 14.97
Minimum: [9.38, 9.8]
Maximum: [43.662000000000056, 45.809]


In [9]:
copy = forest.copy().prune_box(x - 2 * delta, x + 2 * delta)
copy.sample(x - 2 * delta, x + 2 * delta, 200_000)
copy.print_summary()
copy.free()

Size of forest: 100
Average Tree Size: 55.89
Avg Max Depth: 11.04
Minimum: [13.030000000000001, 14.326999999999998]
Maximum: [28.826999999999998, 32.246]


In [10]:
max_x, max_y = query.pso_max(forest, x - 2 * delta, x + 2 * delta, N=20_000, max_iters=10)
print(f"champ x: {max_x}")
print(f"champ y: {max_y}")

min_x, min_y = query.pso_min(forest, x - 2 * delta, x + 2 * delta, N=20_000, max_iters=10)
print(f"champ x: {min_x}")
print(f"champ y: {min_y}")

champ x: [   4.8  145.2   64.  2752.3   11.9   78.    -0.6]
champ y: 29.218999999999994
champ x: [  10.   292.4  133.  3730.2   19.    74.    -2.3]
champ y: 14.214


In [11]:
pruned = forest.copy().prune_box(x - 2 * delta, x + 2 * delta)

In [12]:
merge1 = pruned.copy().merge(25)
merge1.print_summary()
merge1.free()

Size of forest: 25
Average Tree Size: 15416.04
Avg Max Depth: 24.96
Minimum: [13.337000000000002, None]
Maximum: [None, 30.613000000000003]


In [13]:
merge3 = pruned.copy().merge_max(5, max_x, offset=-2)
merge3.print_summary()
min_opt_bound, max_opt_bound = merge3[0].find_max()
merge3.free()

Size of forest: 5
Average Tree Size: 42459.4
Avg Max Depth: 28.4
Minimum: [25.153999999999996, 29.218999999999994]
Maximum: [29.218999999999994, 29.659000000000002]


In [14]:
merge4 = pruned.copy().merge_min(5, min_x, offset=1)
merge4.print_summary()
merge4.free()

Size of forest: 5
Average Tree Size: 37934.8
Avg Max Depth: 29.8
Minimum: [13.911, 14.214]
Maximum: [14.214, 16.556]


In [15]:
pruned.free()

In [16]:
delta = np.array([2, 20, 10, 30, 1, 1, 1])
eps = 4
true, false, none = query.robustness_query_many(forest, auto_X[0:25], delta, eps, branch_and_bound=False)
print(f"True: {len(true)}")
print(f"False: {len(false)}")
print(f"None: {len(none)}")

True: 10
False: 11
None: 4


In [17]:
true, false, none = query.robustness_query_many(forest, auto_X[0:25], delta, eps, pso_max_iters=5, merge_limit=3)
print(f"True: {len(true)}")
print(f"False: {len(false)}")
print(f"None: {len(none)}")

True: 11
False: 12
None: 2


In [18]:
true, false, none = query.robustness_query_many(forest, auto_X, delta, eps, pso_max_iters=5, merge_limit=3)
print(f"True: {len(true)}")
print(f"False: {len(false)}")
print(f"None: {len(none)}")

True: 202
False: 179
None: 16


In [None]:
# Increase hyperparameters for remaining unproven points
true, false, none = query.robustness_query_many(forest, none, delta, eps, pso_N=40_000, pso_max_iters=10, merge_limit=1)
print(f"True: {len(true)}")
print(f"False: {len(false)}")
print(f"None: {len(none)}")

True: 13
False: 3
None: 0
