# Tring to use different Concept drift techniques

Also the data is generated artificially.
d1: in range(0,0.5) d2: in range (0.4,0.9) d3: in range(0.7, 1.6)

In [1]:
pip install river

Note: you may need to restart the kernel to use updated packages.


In [1]:
import numpy as np
from bokeh.plotting import figure, output_file, show
from bokeh.io import output_notebook
from bokeh.layouts import gridplot
from bokeh.palettes import Pastel1
from bokeh.models import Span

In [2]:
def plot_drift_stream(dist_a, dist_b, dist_c, drifts=None):
    output_notebook()
    color_0 = Pastel1[6][0]
    color_1 = Pastel1[7][1]
    color_2 = Pastel1[8][2]

    left = figure(plot_width=600, plot_height=400,
                  tools="pan,box_zoom,reset,save",
                  title="drift stream",
                  x_axis_label='samples', y_axis_label='value',
                  background_fill_color="#fafafa"
                  )
    # add some renderers
    left.circle(range(625), dist_a, legend_label=r"dist_a",
                fill_color=color_0, line_color=color_0, size=4)    
    left.circle(range(625,1325 , 1), dist_b, legend_label=r"dist_b",
                fill_color=color_1, line_color=color_1, size=4)
    left.circle(range(1325, 2325, 1), dist_c, legend_label=r"dist_c",
                fill_color=color_2, line_color=color_2, size=4)
    
    if drifts is not None:
        for drift_loc in drifts:
            drift_line = Span(location=drift_loc, dimension='height',
                              line_color='red', line_width=2)
            left.add_layout(drift_line)
    
    

    show(left)

In [3]:
def plot_distribution(dist_a, dist_b, dist_c, drifts=None, warnings=None):
    output_notebook()
    color_0 = Pastel1[3][0]
    color_1 = Pastel1[3][1]
    color_2 = Pastel1[3][2]

    right = figure(plot_width=300, plot_height=400,
                   tools="pan,box_zoom,reset,save",
                   title="distributions",
                   background_fill_color="#fafafa"
                   )
    hist, edges = np.histogram(dist_a, density=True, bins=50)
    right.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],
               fill_color=color_0, line_color=color_0, legend_label='dist_a')
    hist, edges = np.histogram(dist_b, density=True, bins=50)
    right.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],
                   fill_color=color_1, line_color=color_1, legend_label='dist_b')
    hist, edges = np.histogram(dist_c, density=True, bins=50)
    right.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],
                   fill_color=color_2, line_color=color_2, legend_label='dist_c')
    
    
  
    show(right)

In [4]:
np.random.seed(42)
dist_a1 = np.random.uniform(0,0.3,500)
dist_a2 = np.random.uniform(0,0.4,125)
dist_a = np.concatenate((dist_a1, dist_a2))
dist_b = np.random.uniform(0.25,0.8,700)
dist_c = np.random.uniform(0.7,1.5,1000)

data_stream = np.concatenate((dist_a, dist_b, dist_c))

In [5]:
plot_drift_stream(dist_a,dist_b,dist_c)

In [6]:
plot_distribution(dist_a, dist_b, dist_c)

# ADWIN

In [7]:
from river.drift import ADWIN

drift_detector = ADWIN()
drifts = []


for i, val in enumerate(data_stream):
    drift_detector.update(val)           # Data is processed one sample at a time
    if drift_detector.drift_detected:
        print(f'Change detected at index {i}')
        drifts.append(i) 

Change detected at index 703
Change detected at index 1375


In [8]:
plot_drift_stream(dist_a,dist_b,dist_c, drifts)
plot_distribution(dist_a, dist_b, dist_c)

# Page Hinkley

In [9]:
from river.drift import PageHinkley

drift_detector = PageHinkley()
drifts = []

for i, val in enumerate(data_stream):
    drift_detector.update(val)           # Data is processed one sample at a time
    if drift_detector.drift_detected:
        print(f'Change detected at index {i}')
        drifts.append(i)
         

Change detected at index 767
Change detected at index 1414


In [10]:
plot_drift_stream(dist_a,dist_b,dist_c, drifts)
plot_distribution(dist_a, dist_b, dist_c)

# EDDM

In [11]:
from river.drift import EDDM

In [12]:
drift_detector = EDDM()
drifts = []

for i, val in enumerate(data_stream):
    drift_detector.update(val)           # Data is processed one sample at a time
    
    if drift_detector.drift_detected:
        print(f'Change detected at index {i}')
        drifts.append(i)
     

no drift has been detected

In [13]:
plot_drift_stream(dist_a,dist_b,dist_c, drifts)
plot_distribution(dist_a, dist_b, dist_c)

# CUSUM

In [14]:
class Cusum():
    def _init_(self,delta,lamb,min_obs):
        self._n = 1
        self._x_mean = 0.0
        self._sum = 0.0
        self._delta = delta #max tolerable change in mean
        self._lambda = lamb
        self._min_obs = min_obs #minimum observed samples
        #self._warning_detected = False
        self._change_detected = False
    
    def update(self, value):
        #incremental mean
        self._x_mean += (value - self._x_mean)/ self._n
        
        self._sum = max(0, self._sum + value - self._x_mean - self._delta)
        
        #update number of samples
        self._n += 1
        
        if self._n >= self._min_obs and self._sum > self._lambda:
            self.change_detected = True
        
    def reset(self):
        self._n = 1
        self._x_mean = 0.0
        self._sum = 0.0
        #self._warning_detected = False
        self._change_detected = False
        

In [21]:
drift_detector = Cusum()

drift_detector._init_(delta = 0.0001, lamb = 0.1 , min_obs = 50)

drifts = []

for i,val in enumerate(data_stream):
    drift_detector.update(val)
    if drift_detector._change_detected:
        print('Change at index ' + str(i))
        drifts.append(i)
        drift_detector.reset()