In [1]:
from creme import datasets
from creme import metrics
from creme import neighbors
from creme import preprocessing

import time

In [2]:
def run_friedman(model, dataset, metric):
    count = 0

    for x, y in dataset:
        if count >= 5:
            metric.update(y, model.predict_one(x))
        model.learn_one(x, y)

        count += 1
        if count == 5000:
            break
    return metric

# Comparing k-NN Regressors

## Circular Buffer + cKDTree

### 1. Mean

In [3]:
%%timeit

dataset = datasets.synth.Friedman(seed=1)
metric = metrics.MAE()

model = (
    preprocessing.StandardScaler() | neighbors.KNNRegressor()
)

print(run_friedman(model, dataset, metric))

MAE: 2.074526
MAE: 2.074526
MAE: 2.074526
MAE: 2.074526
MAE: 2.074526
MAE: 2.074526
MAE: 2.074526
MAE: 2.074526
2.22 s ± 14.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### 2. Median

In [4]:
%%timeit

dataset = datasets.synth.Friedman(seed=1)
metric = metrics.MAE()

model = (
    preprocessing.StandardScaler() | neighbors.KNNRegressor(aggregation_method='median')
)

print(run_friedman(model, dataset, metric))

MAE: 2.257552
MAE: 2.257552
MAE: 2.257552
MAE: 2.257552
MAE: 2.257552
MAE: 2.257552
MAE: 2.257552
MAE: 2.257552
2.63 s ± 221 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### 3. Weighted mean

In [5]:
%%timeit

dataset = datasets.synth.Friedman(seed=1)
metric = metrics.MAE()
0.399144
model = (
    preprocessing.StandardScaler() | neighbors.KNNRegressor(aggregation_method='weighted_mean')
)

print(run_friedman(model, dataset, metric))

MAE: 2.050872
MAE: 2.050872
MAE: 2.050872
MAE: 2.050872
MAE: 2.050872
MAE: 2.050872
MAE: 2.050872
MAE: 2.050872
2.3 s ± 60.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Creme vanilla mean

In [6]:
%%timeit

dataset = datasets.synth.Friedman(seed=1)
metric = metrics.MAE()

model = (
    preprocessing.StandardScaler() | neighbors.KNeighborsRegressor(window_size=1000, weighted=False)
)

print(run_friedman(model, dataset, metric))

MAE: 2.079942
MAE: 2.079942
MAE: 2.079942
MAE: 2.079942
MAE: 2.079942
MAE: 2.079942
MAE: 2.079942
MAE: 2.079942
19.9 s ± 596 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Creme weighted mean

In [7]:
%%timeit

dataset = datasets.synth.Friedman(seed=1)
metric = metrics.MAE()

model = (
    preprocessing.StandardScaler() | neighbors.KNeighborsRegressor(window_size=1000)
)

print(run_friedman(model, dataset, metric))

MAE: 2.039271
MAE: 2.039271
MAE: 2.039271
MAE: 2.039271
MAE: 2.039271
MAE: 2.039271
MAE: 2.039271
MAE: 2.039271
18.7 s ± 147 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
