In [24]:
import numpy as np
import sklearn as sk
import pandas as pd
import random
from skmultiflow.meta import LearnPPNSEClassifier

import matplotlib.pyplot as plt

In [33]:
def circles(theta, n_samples, noise=False):
    x1 = [random.random()*10-5 for i in range(n_samples)]
    x2 = [random.random()*10-5 for i in range(n_samples)]
    df = pd.DataFrame(x1, columns=['x1'])
    df['x2'] = x2
    y = (df['x1']**2+df['x2']**2 <= theta).astype(int)
    if noise:
        for i in y.sample(frac=0.1).index:
            y.loc[i] = random.randint(0, 1)
    y = y.to_numpy()
    df = df.to_numpy()
    return df, y

In [26]:
thetas_a = [3, 2, 1, 2, 3, 4, 5, 4]
thetas_g = [3, 2.5, 2, 2.5, 3, 3.5, 4, 3.5]

In [30]:
len(thetas_a)

8

In [27]:
learn_pp_nse = LearnPPNSEClassifier(pruning='error', n_estimators=25)

In [42]:
%%time
# Gradual 500, 10% random in training set.

r = []
correct_cnt = 0
size = 500
learn_pp_nse = LearnPPNSEClassifier(pruning='error', n_estimators=25, window_size=1000)
for i in range(120):
    

    X, y = circles(theta, size)
    y_pred = learn_pp_nse.predict(X)
    correct_cnt += sum(y == y_pred)
    r.append(sum(y == y_pred)/size)
    
    theta = thetas_g[i%7]
    X, y = circles(theta, size, noise=True)
    learn_pp_nse.partial_fit(X, y, classes=[0, 1])
print(correct_cnt/(120*size))

0.9291166666666667
CPU times: user 2min 24s, sys: 336 ms, total: 2min 24s
Wall time: 2min 24s


In [40]:
print(correct_cnt/(10*size))

0.9302


In [148]:
y_pred

array([1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1,
       1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0,
       1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0,
       0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1,
       1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1,
       0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0,

In [147]:
print(correct_cnt/(12*size))

0.4681666666666667


In [94]:
%%time

# Gradual 200, 10% random in training set.
r = []
correct_cnt = 0
size = 200
learn_pp_nse = LearnPPNSEClassifier(pruning='error', n_estimators=25, window_size=100)
for i in range(120):
    theta = thetas_a[i%7]
    df = pd.DataFrame(SEAGenerator().next_sample(size)[0])
    y = class_function(df, theta, on=False).to_numpy()
    X = df.to_numpy()
    learn_pp_nse.partial_fit(X, y, classes=[0, 1])

    df = pd.DataFrame(SEAGenerator().next_sample(size)[0])
    y = class_function(df, theta).to_numpy()
    X = df.to_numpy()

    y_pred = learn_pp_nse.predict(X)
    correct_cnt += sum(y == y_pred)
    r.append(sum(y == y_pred)/size)
print(correct_cnt/(120*size))

0.8869166666666667
CPU times: user 1min 16s, sys: 52 ms, total: 1min 16s
Wall time: 1min 16s


In [95]:
%%time

# Abrupt 200, 10% random in training set.
r = []
correct_cnt = 0
size = 200
learn_pp_nse = LearnPPNSEClassifier(pruning='error', n_estimators=25, window_size=100)
for i in range(120):
    theta = thetas_g[i%7]
    df = pd.DataFrame(SEAGenerator().next_sample(size)[0])
    y = class_function(df, theta, on=False).to_numpy()
    X = df.to_numpy()
    learn_pp_nse.partial_fit(X, y, classes=[0, 1])

    df = pd.DataFrame(SEAGenerator().next_sample(size)[0])
    y = class_function(df, theta).to_numpy()
    X = df.to_numpy()

    y_pred = learn_pp_nse.predict(X)
    correct_cnt += sum(y == y_pred)
    r.append(sum(y == y_pred)/size)
print(correct_cnt/(120*size))

0.8740416666666667
CPU times: user 1min 8s, sys: 104 ms, total: 1min 9s
Wall time: 1min 8s


In [75]:
%%time
r = []
correct_cnt = 0
size = 500
for i in range(120):
    theta = thetas[i%7]
    df = pd.DataFrame(SEAGenerator().next_sample(size)[0])
    y = class_function(df, theta).to_numpy()
    X = df.to_numpy()
    learn_pp_nse.partial_fit(X, y, classes=[0, 1])
    
    df = pd.DataFrame(SEAGenerator().next_sample(size)[0])
    y = class_function(df, theta).to_numpy()
    X = df.to_numpy()
    
    y_pred = learn_pp_nse.predict(X)
    correct_cnt += sum(y == y_pred)
    r.append(sum(y == y_pred)/size)
print(correct_cnt/(120*size))

0.8668833333333333
CPU times: user 2min 51s, sys: 693 ms, total: 2min 52s
Wall time: 2min 51s


In [76]:
%%time
r = []
correct_cnt = 0
size = 500
for i in range(120):
    theta = thetas[i//15]
    df = pd.DataFrame(SEAGenerator().next_sample(size)[0])
    y = class_function(df, theta).to_numpy()
    X = df.to_numpy()
    learn_pp_nse.partial_fit(X, y, classes=[0, 1])
    
    df = pd.DataFrame(SEAGenerator().next_sample(size)[0])
    y = class_function(df, theta, on=False).to_numpy()
    X = df.to_numpy()
    
    y_pred = learn_pp_nse.predict(X)
    correct_cnt += sum(y == y_pred)
    r.append(sum(y == y_pred)/size)
print(correct_cnt/(120*size))

0.9068166666666667
CPU times: user 2min 19s, sys: 116 ms, total: 2min 19s
Wall time: 2min 19s


In [None]:
%%time

r = []
correct_cnt = 0
size = 500

for i in range(120):
    theta = thetas_g[i%7]
    df = pd.DataFrame(SEAGenerator().next_sample(size)[0])
    y = class_function(df, theta, on=False).to_numpy()
    X = df.to_numpy()
    learn_pp_nse.partial_fit(X, y, classes=[0, 1])

    df = pd.DataFrame(SEAGenerator().next_sample(size)[0])
    y = class_function(df, theta).to_numpy()
    X = df.to_numpy()

    y_pred = learn_pp_nse.predict(X)
    correct_cnt += sum(y == y_pred)
    r.append(sum(y == y_pred)/size)
print(correct_cnt/(120*size))

In [79]:
%%time
for j in range(10):
    r = []
    correct_cnt = 0
    size = 500
    learn_pp_nse = LearnPPNSEClassifier(pruning='error', n_estimators=25)
    for i in range(120):
        theta = thetas[i%7]
        df = pd.DataFrame(SEAGenerator().next_sample(size)[0])
        y = class_function(df, theta, on=False).to_numpy()
        X = df.to_numpy()
        learn_pp_nse.partial_fit(X, y, classes=[0, 1])

        df = pd.DataFrame(SEAGenerator().next_sample(size)[0])
        y = class_function(df, theta).to_numpy()
        X = df.to_numpy()

        y_pred = learn_pp_nse.predict(X)
        correct_cnt += sum(y == y_pred)
        r.append(sum(y == y_pred)/size)
    print(correct_cnt/(120*size))

0.9096
0.9116833333333333
0.9099333333333334
0.9098666666666667
0.9083333333333333


KeyboardInterrupt: 

In [83]:
r = []
correct_cnt = 0
size = 200
learn_pp_nse = LearnPPNSEClassifier(pruning='error', n_estimators=25, window_size=100)

for i in range(120):
    theta = thetas[i%7]
    df = pd.DataFrame(SEAGenerator().next_sample(size)[0])
    y = class_function(df, theta, on=False).to_numpy()
    X = df.to_numpy()
    learn_pp_nse.partial_fit(X, y, classes=[0, 1])

    df = pd.DataFrame(SEAGenerator().next_sample(size)[0])
    y = class_function(df, theta).to_numpy()
    X = df.to_numpy()

    y_pred = learn_pp_nse.predict(X)
    correct_cnt += sum(y == y_pred)
    r.append(sum(y == y_pred)/size)
print(correct_cnt/(120*size))

0.8909583333333333


In [82]:
learn_pp_nse

LearnPPNSEClassifier(base_estimator=DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best'),
                     crossing_point=10, n_estimators=25, pruning='error',
                     slope=0.5, window_size=250)