In [1]:
import sys
sys.path.append("..")

import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor

import dtree
import dforest
import query

In [2]:
missing_data = lambda x : 100. if x == '?' else float(x)
data = np.loadtxt("../datasets/Auto.data", converters=missing_data, 
                    skiprows=1, usecols=[0, 1, 2, 3, 4, 5, 6, 7])

np.random.seed(12345)
np.random.shuffle(data)
np.set_printoptions(precision=1, suppress=True)

auto_X = data[:, 1:]
auto_y = data[:, 0]

In [3]:
model = RandomForestRegressor()
model.fit(auto_X, auto_y)

forest = dforest.make_forest_sklearn(model)

In [4]:
noisy_samples = auto_X + np.random.normal(0, 0.1, auto_X.shape)
pred = model.predict(noisy_samples)
for i, sample in enumerate(noisy_samples):
    assert round(forest.eval(sample), 6) == round(pred[i], 6)

In [5]:
forest.print_summary()

Size of forest: 100
Average Tree Size: 208.36
Avg Max Depth: 14.97
Minimum: [9.38, 9.8]
Maximum: [43.662000000000056, 45.809]


In [6]:
merged = forest.copy().merge(25)
merged.print_summary()
merged.free()

Size of forest: 25
Average Tree Size: 123320.4
Avg Max Depth: 31.4
Minimum: [9.42, 9.8]
Maximum: [43.662000000000056, 45.238]


In [13]:
x = np.array([6, 225.0, 100.0, 3233, 15.4, 76, 1])
delta = np.array([2, 40, 18, 250, 1.8, 1, 2])
y = 22
epsilon = 2
query.max_query(forest, x - delta, x + delta, y + epsilon, merge_limit=4, n_samples=500_000, verbose=True)

Query holds
f(x) <= 24
6 trees, max bound: 23.961


True

In [8]:
# forest.free()