In [1]:
import logging
from skmultiflow.lazy import KNNADWINClassifier
from skmultiflow.data import FileStream
from skmultiflow.transform import OneHotToCategorical
from sklearn.neighbors.classification import KNeighborsClassifier
from timeit import default_timer as timer



    This demo tests the KNNADWINClassifier on a file stream, which gives
    instances coming from a SEA generator. 
    
    The test computes the performance of the KNNADWINClassifier as well as
    the time to create the structure and classify max_samples (10000 by 
    default) instances.

In [3]:
start = timer()
logging.basicConfig(format='%(message)s', level=logging.INFO)
stream = FileStream("https://raw.githubusercontent.com/scikit-multiflow/streaming-datasets/"
                        "master/sea_big.csv", -1, 1)

knn = KNNADWINClassifier(n_neighbors=8, leaf_size=40, max_window_size=2000)

compare = KNeighborsClassifier(n_neighbors=8, algorithm='kd_tree', leaf_size=40, metric='euclidean')
first = True
train = 200
if train > 0:
    X, y = stream.next_sample(train)
    knn.partial_fit(X, y, classes=stream.target_values)
    compare.fit(X, y)
    first = False
n_samples = 0
max_samples = 10000
my_corrects = 0
compare_corrects = 0

while n_samples < max_samples:
    if n_samples % (max_samples/20) == 0:
        logging.info('%s%%', str((n_samples//(max_samples/20)*5)))
    X, y = stream.next_sample()
    my_pred = knn.predict(X)
    if first:
        knn.partial_fit(X, y, classes=stream.target_values)
        first = False
    else:
        knn.partial_fit(X, y)
    compare_pred = compare.predict(X)
    if y[0] == my_pred[0]:
        my_corrects += 1
    if y[0] == compare_pred[0]:
        compare_corrects += 1
    n_samples += 1

end = timer()

0.0%
5.0%
10.0%
15.0%
20.0%
25.0%
30.0%
35.0%
40.0%
45.0%
50.0%
55.0%
60.0%
65.0%
70.0%
75.0%
80.0%
85.0%
90.0%
95.0%


In [4]:
print('Evaluation time: ' + str(end - start))
print(str(n_samples) + ' samples analyzed.')
print('My performance: ' + str(my_corrects / n_samples))
print('Compare performance: ' + str(compare_corrects / n_samples))

Evaluation time: 29.61288799000613
10000 samples analyzed.
My performance: 0.872
Compare performance: 0.8543
