In [2]:
import sys
sys.path.append("..")

import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor

import dtree
import dforest
import query

In [3]:
missing_data = lambda x : 100. if x == '?' else float(x)
data = np.loadtxt("../datasets/Auto.data", converters=missing_data, 
                    skiprows=1, usecols=[0, 1, 2, 3, 4, 5, 6, 7])

np.random.seed(12345)
np.random.shuffle(data)
np.set_printoptions(precision=1, suppress=True)

auto_X = data[:, 1:]
auto_y = data[:, 0]

In [4]:
model = RandomForestRegressor()
model.fit(auto_X, auto_y)

forest = dforest.make_forest_sklearn(model)

In [5]:
noisy_samples = auto_X + np.random.normal(0, 0.1, auto_X.shape)
model_pred = model.predict(noisy_samples)
forest_pred = forest.eval(noisy_samples)
for i, sample in enumerate(noisy_samples):
    assert round(forest_pred[i], 6) == round(model_pred[i], 6)

In [6]:
forest.print_summary()

Size of forest: 100
Average Tree Size: 208.36
Avg Max Depth: 14.97
Minimum: [9.38, 9.8]
Maximum: [43.662000000000056, 45.809]


In [7]:
merged = forest.copy().merge(50)
merged.print_summary()
merged.free()

Size of forest: 50
Average Tree Size: 4003.48
Avg Max Depth: 22.54
Minimum: [9.41, 9.8]
Maximum: [43.662000000000056, 45.64600000000001]


In [8]:
x = np.array([6, 225.0, 100.0, 3233, 15.4, 76, 1])
delta = np.array([2, 40, 18, 250, 1.8, 1, 2])
y = forest.eval(x)
epsilon = 5
query.query(forest, x - delta, x + delta, y - epsilon, y + epsilon, merge_limit=25, verbose=True)

Can not (dis)prove query.

Query f(x) <= 26.195999999999998 holds
100 trees, max bound: 25.784000000000002


In [9]:
forest.sample(x - delta, x + delta, 200_000)
forest.print_summary()

Size of forest: 100
Average Tree Size: 208.36
Avg Max Depth: 14.97
Minimum: [9.38, 9.8]
Maximum: [43.662000000000056, 45.809]


In [10]:
copy = forest.copy().prune_box(x - 2 * delta, x + 2 * delta)
copy.sample(x - 2 * delta, x + 2 * delta, 200_000)
copy.print_summary()
copy.free()

Size of forest: 100
Average Tree Size: 55.89
Avg Max Depth: 11.04
Minimum: [13.030000000000001, 14.324000000000002]
Maximum: [28.099000000000007, 32.246]


In [11]:
champ_x, champ_y = query.pso_max(forest, x - 2 * delta, x + 2 * delta, N=20_000, max_iters=10)
print(f"champ x: {champ_x}")
print(f"champ y: {champ_y}")

champ_x, champ_y = query.pso_min(forest, x - 2 * delta, x + 2 * delta, N=20_000, max_iters=10)
print(f"champ x: {champ_x}")
print(f"champ y: {champ_y}")

champ x: [   4.4  145.    64.5 2759.1   11.8   78.    -3. ]
champ y: 29.218999999999994
champ x: [   7.9  294.7  133.1 3733.    19.    74.8    1.9]
champ y: 14.214


In [None]:
robust = []
delta = np.array([2, 20, 10, 30, 1, 1, 1])
for x in auto_X[0:25]:
    robust.append(query.robustness_query(forest, x, delta, 2))
print(f"True: {len(list(filter(lambda x: x == True, robust)))}")
print(f"False: {len(list(filter(lambda x: x == False, robust)))}")
print(f"None: {len(list(filter(lambda x: x is None, robust)))}")

[True, False, True, False, False, False, None, None, False, True, False, False, False, False, False, True, False, False, False, False, False, True, False, False, False]
True: 5
False: 18
None: 2


In [16]:
robust = []
delta = np.array([2, 20, 10, 30, 1, 1, 1])
for x in auto_X[0:25]:
    robust.append(query.robustness_query(forest, x, delta, 4))
print(f"True: {len(list(filter(lambda x: x == True, robust)))}")
print(f"False: {len(list(filter(lambda x: x == False, robust)))}")
print(f"None: {len(list(filter(lambda x: x is None, robust)))}")

True: 10
False: 11
None: 4


In [17]:
robust = []
delta = np.array([2, 20, 10, 30, 1, 1, 1])
for x in auto_X:
    robust.append(query.robustness_query(forest, x, delta, 4))
print(f"True: {len(list(filter(lambda x: x == True, robust)))}")
print(f"False: {len(list(filter(lambda x: x == False, robust)))}")
print(f"None: {len(list(filter(lambda x: x is None, robust)))}")

True: 178
False: 171
None: 48
