# Baselines on the Synthetic and Real Datasets
### This Jupyter Notebook simulates 9 baseline methods on the synthetic and real data.

## Import libraries

In [1]:
import math
import copy
import pickle
import time
import numpy as np
import EarlyStopping

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

from sklearn.base import clone
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score
from sklearn.neighbors import NearestNeighbors
from sklearn import linear_model, metrics, neighbors

In [3]:
from skmultiflow.trees import HoeffdingAdaptiveTreeClassifier
from skmultiflow.meta import AdaptiveRandomForestClassifier
from skmultiflow.meta import LearnPPNSEClassifier
from sklearn.tree import DecisionTreeClassifier
from model import NormalNN, NNClassifier

from utils import prepare_data
from DSS.dataloader.glisterdataloader import GLISTERDataLoader
from DSS.dataloader.gradmatchdataloader import GradMatchDataLoader
from DSS.simpleNN_net import TwoLayerNet
from dotmap import DotMap

## Set CUDA

In [4]:
cuda = True if torch.cuda.is_available() else False

if cuda:
    device = 'cuda:6'

## naïve: Full Data and Current Seg

In [5]:
dataset_li = ['SEA', 'Hyperplane', 'RandomRBF', 'Sine', 'Electricity', 'Weather', 'Spam', 'Usenet1', 'Usenet2']

for dataset in dataset_li:
    # load data, label, and concept drift points
    x_all = np.load(f'./dataset/{dataset}/data.npy')
    y_all = np.load(f'./dataset/{dataset}/label.npy')
    concept_drifts = np.load(f'./dataset/{dataset}/concept_drifts.npy')
    
    # number of classes in dataset
    if dataset == 'RandomRBF':
        n_class = 5
    else:
        n_class = 2
    
    # number of available data in current segment
    if dataset in ['Spam', 'Usenet1', 'Usenet2']:
        n_train = int((len(x_all)/len(concept_drifts))*0.2)
    else:
        n_train = int((len(x_all)/len(concept_drifts))*0.1)
    
    print('dataset: ', dataset)
    print('concept drifts: ', concept_drifts)
    
    method_li = ['Full data', 'Current seg']

    for method in method_li:
        print("----------------------------------------------------------------------------")
        print('method: ', method)

        all_time = []
        all_acc = []
        all_acc_std = []
        all_f1 = []
        all_f1_std = []

        # consecutive evaluation with data segments
        for n in range(len(concept_drifts)):
            train_time_li = []
            test_acc_li = []
            test_f1_li = []

            n_dataset = n+1
            n_feature = x_all.shape[1]

            # data preprocessing (scaling)
            scaler = StandardScaler()
            x_all_scale = scaler.fit_transform(x_all)

            # split train, valid, and test set
            if n == 0:
                x_curr = x_all_scale[:concept_drifts[n]]
                y_curr = y_all[:concept_drifts[n]]
            else:
                x_curr = x_all_scale[concept_drifts[n-1]:concept_drifts[n]]
                y_curr = y_all[concept_drifts[n-1]:concept_drifts[n]]

            indices = list(range(len(x_curr)))
            split1 = int(n_train*0.5)
            split2 = n_train
            train_indices = indices[:split1]
            valid_indices = indices[split1:split2]
            test_indices = indices[split2:]

            if n == 0:
                x_train = x_all_scale[:split1]
                y_train = y_all[:split1]
            else:
                if method == 'Full data':
                    x_train = x_all_scale[:concept_drifts[n-1]+split1]
                    y_train = y_all[:concept_drifts[n-1]+split1]
                elif method == 'Current seg':
                    x_train = x_curr[train_indices]
                    y_train = y_curr[train_indices]

            x_valid = x_curr[valid_indices]
            y_valid = y_curr[valid_indices]

            x_test = x_curr[test_indices]
            y_test = y_curr[test_indices]

            x_1 = torch.Tensor(x_train).to(device)
            y_1 = torch.Tensor(y_train).to(device, dtype=torch.int64)

            x_2 = torch.Tensor(x_valid).to(device)
            y_2 = torch.Tensor(y_valid).to(device, dtype=torch.int64)

            x_3 = torch.Tensor(x_test).to(device)
            y_3 = torch.Tensor(y_test).to(device, dtype=torch.int64)

            train_ds = TensorDataset(x_1, y_1)
            valid_ds = TensorDataset(x_2, y_2)
            test_ds = TensorDataset(x_3, y_3)

            train_loader = DataLoader(train_ds, batch_size=128, shuffle=True)
            valid_loader = DataLoader(valid_ds, batch_size=128, shuffle=True)
            test_loader = DataLoader(test_ds, batch_size=128, shuffle=True)

            # repeat experiments with 5 different seeds
            for s in range(5):
                # initialize model, optimizer, and criterion
                model = NormalNN(input_features=n_feature, n_class=n_class, seed=s)
                model = model.to(device)
                optimizer_config = {"lr": 0.001}
                clf = NNClassifier(model, nn.CrossEntropyLoss(reduction='mean'), optim.Adam, optimizer_config)

                # model training
                start = time.time()
                clf.fit({"train": train_loader, "val": valid_loader}, epochs=2000, 
                        earlystop_path=f'./ckpt/{dataset}_{method}.pt')
                train_time = time.time() - start
                train_time_li.append(train_time)

                # model evaluation
                test_output, test_loss = clf.evaluate(test_loader)
                test_acc = accuracy_score(test_output['true_y'], test_output['output'])
                if n_class > 2:
                    test_f1 = f1_score(test_output['true_y'], test_output['output'], average='weighted')
                elif n_class == 2:
                    test_f1 = f1_score(test_output['true_y'], test_output['output'])
                test_acc_li.append(test_acc)
                test_f1_li.append(test_f1)

            all_time.append(np.mean(train_time_li))
            all_acc.append(np.mean(test_acc_li))
            all_acc_std.append(np.std(test_acc_li))
            all_f1.append(np.mean(test_f1_li))
            all_f1_std.append(np.std(test_f1_li))

        # print runtime, accuracy, and F1 score
        print('overall train time: %.3f' %(np.mean(all_time)))
        print('overall test acc: avg %.3f, std %.3f' %(np.mean(all_acc), np.mean(all_acc_std)))
        print('overall test f1: avg %.3f, std %.3f' %(np.mean(all_f1), np.mean(all_f1_std)))
        
    print('\n')

dataset:  SEA
concept drifts:  [ 2000  4000  6000  8000 10000 12000 14000 16000]
----------------------------------------------------------------------------
method:  Full data
overall train time: 3.417
overall test acc: avg 0.849, std 0.005
overall test f1: avg 0.881, std 0.004
----------------------------------------------------------------------------
method:  Current seg
overall train time: 0.219
overall test acc: avg 0.864, std 0.004
overall test f1: avg 0.888, std 0.004


dataset:  Hyperplane
concept drifts:  [ 2000  4000  6000  8000 10000 12000 14000 16000]
----------------------------------------------------------------------------
method:  Full data
overall train time: 2.457
overall test acc: avg 0.843, std 0.004
overall test f1: avg 0.844, std 0.004
----------------------------------------------------------------------------
method:  Current seg
overall train time: 0.516
overall test acc: avg 0.893, std 0.004
overall test f1: avg 0.894, std 0.004


dataset:  RandomRBF
concept

## model-centric: HAT, ARF, and Learn++.NSE

In [6]:
dataset_li = ['SEA', 'Hyperplane', 'RandomRBF', 'Sine', 'Electricity', 'Weather', 'Spam', 'Usenet1', 'Usenet2']

for dataset in dataset_li:
    # load data, label, and concept drift points
    x_all = np.load(f'./dataset/{dataset}/data.npy')
    y_all = np.load(f'./dataset/{dataset}/label.npy')
    concept_drifts = np.load(f'./dataset/{dataset}/concept_drifts.npy')
    
    # number of classes in dataset
    if dataset == 'RandomRBF':
        n_class = 5
    else:
        n_class = 2
    
    # number of available data in current segment
    if dataset in ['Spam', 'Usenet1', 'Usenet2']:
        n_train = int((len(x_all)/len(concept_drifts))*0.2)
    else:
        n_train = int((len(x_all)/len(concept_drifts))*0.1)

    print('dataset: ', dataset)
    print('concept drifts: ', concept_drifts)
    
    method_li = ['HAT', 'ARF', 'LearnPPNSE']

    for method in method_li:
        print("----------------------------------------------------------------------------")
        print('method: ', method)

        all_time = []
        all_acc = []
        all_acc_std = []
        all_f1 = []
        all_f1_std = []

        # consecutive evaluation with data segments
        for n in range(len(concept_drifts)):
            n_dataset = n+1
            n_feature = x_all.shape[1]

            # data preprocessing (scaling)
            scaler = StandardScaler()
            x_all_scale = scaler.fit_transform(x_all)
            
            dataset_final = range(n+1)
        
            # split train and test set
            if n == 0:
                x_curr = x_all_scale[:concept_drifts[n]]
                y_curr = y_all[:concept_drifts[n]]
            else:
                x_curr = x_all_scale[concept_drifts[n-1]:concept_drifts[n]]
                y_curr = y_all[concept_drifts[n-1]:concept_drifts[n]]

            indices = list(range(len(x_curr)))
            split1 = int(n_train*0.5)
            split2 = n_train
            train_indices = indices[:split1]
            valid_indices = indices[split1:split2]
            test_indices = indices[split2:]

            if n == 0:
                x_train = np.concatenate((x_curr[train_indices], x_curr[valid_indices]))
                y_train = np.concatenate((y_curr[train_indices], y_curr[valid_indices])).reshape(-1,1)

            else:
                x_train = np.concatenate((x_all_scale[:concept_drifts[n-1]], x_curr[train_indices], 
                                          x_curr[valid_indices]))
                y_train = np.concatenate((y_all[:concept_drifts[n-1]], y_curr[train_indices], 
                                          y_curr[valid_indices])).reshape(-1,1)

            x_test = x_curr[test_indices]
            y_test = y_curr[test_indices].reshape(-1,1)

            train_time_li = []
            test_acc_li = []
            test_f1_li = []

            # repeat experiments with 5 different seeds
            for s in range(5):
                # initialize model
                if method == 'HAT':
                    model = HoeffdingAdaptiveTreeClassifier(random_state=s, grace_period=n_train)
                elif method == 'ARF':
                    model = AdaptiveRandomForestClassifier(random_state=s, grace_period=n_train)
                elif method == 'LearnPPNSE':
                    model = LearnPPNSEClassifier(window_size=n_train)

                # model training
                start = time.time()
                for i in range(len(x_train)):
                    X = np.array([x_train[i]])
                    y = y_train[i]
                    if method == 'HAT' or method == 'ARF':
                        model.partial_fit(X, y)
                    elif method == 'LearnPPNSE':
                        model.partial_fit(X, y, classes=np.array(range(n_class)))
                train_time = time.time() - start
                train_time_li.append(train_time)

                # model evaluation
                n_samples = 0
                correct_cnt = 0
                
                y_pred = []
                y_truth = []

                for i in range(len(x_test)):
                    X = np.array([x_test[i]])
                    y = y_test[i]
                    y_pred.append(model.predict(X)[0])
                    y_truth.append(y[0])

                test_acc = accuracy_score(y_truth, y_pred)
                if n_class > 2:
                    test_f1 = f1_score(y_truth, y_pred, average='weighted')
                elif n_class == 2:
                    test_f1 = f1_score(y_truth, y_pred)
                test_acc_li.append(test_acc)
                test_f1_li.append(test_f1)

            all_time.append(np.mean(train_time_li))
            all_acc.append(np.mean(test_acc_li))
            all_acc_std.append(np.std(test_acc_li))
            all_f1.append(np.mean(test_f1_li))
            all_f1_std.append(np.std(test_f1_li))

        # print runtime, accuracy, and F1 score
        print('overall train time: %.3f' %(np.mean(all_time)))
        print('overall test acc: avg %.3f, std %.3f' %(np.mean(all_acc), np.mean(all_acc_std)))
        print('overall test f1: avg %.3f, std %.3f' %(np.mean(all_f1), np.mean(all_f1_std)))
        
    print('\n')

dataset:  SEA
concept drifts:  [ 2000  4000  6000  8000 10000 12000 14000 16000]
----------------------------------------------------------------------------
method:  HAT
overall train time: 1.376
overall test acc: avg 0.825, std 0.008
overall test f1: avg 0.862, std 0.006
----------------------------------------------------------------------------
method:  ARF
overall train time: 23.657
overall test acc: avg 0.825, std 0.008
overall test f1: avg 0.863, std 0.008
----------------------------------------------------------------------------
method:  LearnPPNSE
overall train time: 6.637
overall test acc: avg 0.803, std 0.006
overall test f1: avg 0.835, std 0.005


dataset:  Hyperplane
concept drifts:  [ 2000  4000  6000  8000 10000 12000 14000 16000]
----------------------------------------------------------------------------
method:  HAT
overall train time: 2.257
overall test acc: avg 0.860, std 0.008
overall test f1: avg 0.862, std 0.008
-------------------------------------------------

## model-centric: SEGA

In [7]:
"""
Implementation of the SEGA paper.
A Segment-Based Drift Adaptation Method for Data Streams.
citation:
@article{DBLP:journals/tnn/SongLLLZ22,
  author       = {Yiliao Song and
                  Jie Lu and
                  Anjin Liu and
                  Haiyan Lu and
                  Guangquan Zhang},
  title        = {A Segment-Based Drift Adaptation Method for Data Streams},
  journal      = {{IEEE} Trans. Neural Networks Learn. Syst.},
  volume       = {33},
  number       = {9},
  pages        = {4876--4889},
  year         = {2022},
  url          = {https://doi.org/10.1109/TNNLS.2021.3062062},
  doi          = {10.1109/TNNLS.2021.3062062},
  timestamp    = {Thu, 22 Sep 2022 19:58:23 +0200},
  biburl       = {https://dblp.org/rec/journals/tnn/SongLLLZ22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
"""
class IncLDD_C:
    def __str__(self):
        return "Class: Inc_LDD_stream_classification"
    
    def __init__(self, dataset, n_train, base_learner, knowledge_base_size=1000, win_size=100, para_KNN = 100):        
        self.data = dataset
        self.n_train = n_train
        self.learner = base_learner 
        self.win_size = win_size         
        self.kb_size = knowledge_base_size
        self.KNN_size = para_KNN
        self.predictions = []       
    
    def run_online(self):
        data = self.data
        n_size_data = self.data.shape[0]        
        knowledgebase = self.data[:self.kb_size, :]
        num_kb_win = math.ceil(self.kb_size/self.win_size)
        error_debug = []
        
        predictions = []
        buffer = np.ndarray(shape=(0,data.shape[1]), dtype=float)                
           
        kb_data_list = np.split(knowledgebase,num_kb_win,axis=0)    
        knowledgebase_norm,min_max_data = scale_linear_bycolumn(knowledgebase)
        kb_ssd_array, dist_indx = compSSD(knowledgebase_norm,num_kb_win,self.KNN_size)
        kb_ssd_array_0 = kb_ssd_array
        
        learner_list = []
        for learn_data  in kb_data_list:
            _learner = self.learner
            learner_list.append(clone(_learner).fit(learn_data[:,:-1],learn_data[:,-1]))
                    
        K_uq = np.zeros(shape=(self.kb_size))
        K_vp = np.zeros(shape=(num_kb_win))
        Oq = np.ndarray(shape = 0)
        
        for i in range(self.kb_size, n_size_data):
            
            min_ssd_idx = np.argsort(kb_ssd_array)[0:5]
   
            learner_predictions = []                
            for _learner_indx in min_ssd_idx:
                learner_predictions.append(learner_list[_learner_indx].predict([data[i,:-1]])[0])
                if i < self.kb_size + self.n_train:
                    learn_data = np.vstack([kb_data_list[_learner_indx], data[i,:]])
                    learner_list[_learner_indx] = learner_list[_learner_indx].fit(learn_data[:,:-1],learn_data[:,-1])
            error_debug.append(learner_predictions)
            predictions.append(max(set(learner_predictions), key=learner_predictions.count))
    
            if i < self.kb_size + self.n_train and buffer.shape[0] == self.win_size:
                K_vp, K_uq, Oq = incrOnS(knowledgebase_norm, data[i,:], min_max_data, dist_indx, num_kb_win, K_uq, K_vp, Oq, self.KNN_size)         
                deltaSSD = -K_vp - (1+1/num_kb_win)*np.sum(np.split(K_uq,num_kb_win),axis=1)
                kb_ssd_array = deltaSSD
                kb_ssd_array_0 = deltaSSD + kb_ssd_array_0
                buffer = np.vstack([buffer, data[i,:]])
            
            
            if i < self.kb_size + self.n_train and buffer.shape[0] == self.win_size:
                knowledgebase =  np.concatenate((knowledgebase,buffer),axis = 0)[-self.kb_size:,:]
               
                learner_buffer = clone(self.learner).fit(buffer[:,:-1],buffer[:,-1])
                learner_list.append(learner_buffer)
                del learner_list[0]
               
                buffer = np.ndarray(shape=(0,data.shape[1]), dtype=float)
               
                kb_data_list = np.split(knowledgebase,num_kb_win,axis=0)    
                knowledgebase_norm,min_max_data = scale_linear_bycolumn(knowledgebase)
                kb_ssd_array, dist_indx = compSSD(knowledgebase_norm,num_kb_win,self.KNN_size)
                kb_ssd_array_0 = kb_ssd_array

                K_uq = np.zeros(shape=(self.kb_size))
                K_vp = np.zeros(shape=(num_kb_win))
                Oq = np.ndarray(shape = 0)

        predictions = predictions[n_train:]

        acc = metrics.accuracy_score(self.data[self.kb_size+n_train:n_size_data, -1], predictions)

        return acc,self.data[self.kb_size+n_train:n_size_data, -1:],np.asarray(predictions),np.asarray(error_debug)
       
def incrOnS(knowledgebase_norm,newinstance,min_max_data,dist_indx,num_kb_win,K_uq,K_vp,Oq,para_KNN=100):
    
    _instance = np.ndarray(shape=(1,len(newinstance))); _instance[0,:] = newinstance 
    _instance_norm, __ = scale_linear_bycolumn(_instance,min_max_data=min_max_data)
    
    Np = int(knowledgebase_norm.shape[0])
    Nq = int(knowledgebase_norm.shape[0]/num_kb_win)
    
    distances_list = dist_indx[0]
    indx_seg = dist_indx[2]
    nbrs = NearestNeighbors(n_neighbors=knowledgebase_norm.shape[0],algorithm = 'ball_tree').fit(knowledgebase_norm)
    _distances_0,_indices_0 = nbrs.kneighbors(_instance_norm)  
    _distances = np.zeros(shape=(1,_distances_0.shape[1]))
    _distances[0][_indices_0[0]] = _distances_0[0]
    _indices = _indices_0[0][:para_KNN]
    
    delta_K_vp = np.ndarray(shape=(0))
    for _seg in indx_seg:
        _K_vp =len(np.intersect1d(_indices,_seg))
        delta_K_vp = np.append(delta_K_vp,[_K_vp/Np/Nq],axis=0)
    K_vp = delta_K_vp 
               
    return K_vp, K_uq, Oq    
        
def compSSD(knowledgebase_norm, num_kb_win, para_KNN = 100):    
    Np = int(knowledgebase_norm.shape[0] - knowledgebase_norm.shape[0]/num_kb_win)
    Nq = int(knowledgebase_norm.shape[0]/num_kb_win)
    
    fitbase = knowledgebase_norm
    nbrs = NearestNeighbors(n_neighbors=para_KNN,algorithm = 'ball_tree').fit(fitbase) #Ntrain-Nseg

    distances,indices = nbrs.kneighbors(fitbase)
    indicesP = indices[:Np,:]; indicesQ = indices[Np:,:];
    K_up = (indicesP<=Np).sum(1)  
    K_uq = (indicesP>Np).sum(1)   
    
    K_vq = (indicesQ>=Np).sum(1)
    K_vp = (indicesQ<Np).sum(1)
    
    sd_P = (K_up/Np-K_uq/Nq)/Np
    sd_Q = (K_vq/Nq-K_vp/Np)/Nq
    sd = sd_P.sum()+sd_Q.sum()
    
    ssd_p = np.split(sd_P,num_kb_win-1,axis=0)
    ssd = np.zeros(shape = num_kb_win)
    ssd[0:-1] = np.sum(ssd_p,1)+sd_Q.sum()/(num_kb_win-1)
    
    indx_base = np.arange(0,knowledgebase_norm.shape[0],1)
    indx_seg = np.split(indx_base,num_kb_win,axis=0) 
    distances_list = np.ndarray.tolist(distances)
    dist_indx = [distances_list,indices,indx_seg]
    return ssd, dist_indx  
            
def scale_linear_bycolumn(data, high=1, low=0,  min_max_data=None):
    if min_max_data is None:
        mins_data = np.min(data, axis=0)
        maxs_data = np.max(data, axis=0)
        avg_data = np.mean(data, axis=0)
        std_data = np.std(data, axis = 0)
    else:
        mins_data = min_max_data[0]
        maxs_data = min_max_data[1]
        avg_data = min_max_data[2]
        std_data = min_max_data[3]
    rng = maxs_data - mins_data
    rng[rng==0]=1
    
    data_norm = high - (((high - low) * (maxs_data - data)) / rng)

    data_norm[:,-1] = data_norm[:,-1]*(data.shape[1]-1)

    return data_norm, [mins_data, maxs_data, avg_data, std_data]

In [8]:
dataset_li = ['SEA', 'Hyperplane', 'RandomRBF', 'Sine', 'Electricity', 'Weather', 'Spam', 'Usenet1', 'Usenet2']
    
for dataset in dataset_li:
    # load data, label, and concept drift points
    x_all = np.load(f'./dataset/{dataset}/data.npy')
    y_all = np.load(f'./dataset/{dataset}/label.npy')
    concept_drifts = np.load(f'./dataset/{dataset}/concept_drifts.npy')

    # number of classes in dataset
    if dataset == 'RandomRBF':
        n_class = 5
    else:
        n_class = 2

    # number of available data in current segment
    if dataset in ['Spam', 'Usenet1', 'Usenet2']:
        n_train = int((len(x_all)/len(concept_drifts))*0.2)
    else:
        n_train = int((len(x_all)/len(concept_drifts))*0.1)

    # set window size
    win_size = n_train

    # set historical buffer size
    if dataset in ['Spam', 'Usenet1', 'Usenet2']:
        hist_size = 5
    else:
        hist_size = 10

    # set knowledge base size
    knowledge_base_size = hist_size*win_size

    print('dataset: ', dataset)
    print('concept drifts: ', concept_drifts)

    sega_time = []
    sega_acc = []
    sega_acc_std = []
    sega_f1 = []
    sega_f1_std = []
    
    print("----------------------------------------------------------------------------")
    print('method: SEGA')

    # consecutive evaluation with data segments
    for n in range(len(concept_drifts)):
        sega_time_li = []
        sega_acc_li = []
        sega_f1_li = []

        # repeat experiments with 5 different seeds
        for s in range(5):
            # for the first segment, there is no knowledge base
            if n == 0:
                neigh = neighbors.KNeighborsClassifier(n_neighbors=5, weights='distance')
                start = time.time()
                neigh.fit(x_all[:n_train], y_all[:n_train])
                end = time.time()
                y_pred = neigh.predict(x_all[n_train:int(len(x_all)/len(concept_drifts))])
                y_truth = y_all[n_train:int(len(x_all)/len(concept_drifts))]

                sega_time_li.append(end-start)
                sega_acc_li.append(accuracy_score(y_truth, y_pred))
                if n_class > 2:
                    sega_f1_li.append(f1_score(y_truth, y_pred, average='weighted'))
                elif n_class == 2:
                    sega_f1_li.append(f1_score(y_truth, y_pred))
                    
            # after the first segment, use previous knowledge base for prediction
            else:
                x_temp = x_all[(n-1)*int(len(x_all)/len(concept_drifts)):(n+1)*int(len(x_all)/len(concept_drifts))]
                y_temp = y_all[(n-1)*int(len(x_all)/len(concept_drifts)):(n+1)*int(len(x_all)/len(concept_drifts))]

                _data = np.hstack((x_temp,y_temp.reshape(len(y_temp),1)))

                neigh = neighbors.KNeighborsClassifier(n_neighbors=5, weights='distance')
                _predictor = neigh 
                _incLDD_C = IncLDD_C(_data, n_train, _predictor, knowledge_base_size, win_size, para_KNN = int(n_train/2))   

                start = time.time()
                acc_result, realvalue, predvalue, error_debug = _incLDD_C.run_online()
                end = time.time()
                sega_time_li.append(end-start)

                y_pred = predvalue
                y_truth = realvalue

                sega_acc_li.append(accuracy_score(y_truth, y_pred))
                if n_class > 2:
                    sega_f1_li.append(f1_score(y_truth, y_pred, average='weighted'))
                elif n_class == 2:
                    sega_f1_li.append(f1_score(y_truth, y_pred))

        sega_time.append(np.mean(sega_time_li))
        sega_acc.append(np.mean(sega_acc_li))
        sega_acc_std.append(np.std(sega_acc_li))
        sega_f1.append(np.mean(sega_f1_li))
        sega_f1_std.append(np.std(sega_f1_li))

    # print runtime, accuracy, and F1 score
    print('overall train time: %.3f' %(np.mean(sega_time)))
    print('overall test acc: avg %.3f, std %.3f' %(np.mean(sega_acc), np.mean(sega_acc_std)))
    print('overall test f1: avg %.3f, std %.3f' %(np.mean(sega_f1), np.mean(sega_f1_std)))

    print('\n')

dataset:  SEA
concept drifts:  [ 2000  4000  6000  8000 10000 12000 14000 16000]
----------------------------------------------------------------------------
method: SEGA
overall train time: 4.466
overall test acc: avg 0.797, std 0.000
overall test f1: avg 0.842, std 0.000


dataset:  Hyperplane
concept drifts:  [ 2000  4000  6000  8000 10000 12000 14000 16000]
----------------------------------------------------------------------------
method: SEGA
overall train time: 4.519
overall test acc: avg 0.853, std 0.000
overall test f1: avg 0.851, std 0.000


dataset:  RandomRBF
concept drifts:  [ 2000  4000  6000  8000 10000 12000 14000 16000]
----------------------------------------------------------------------------
method: SEGA
overall train time: 4.509
overall test acc: avg 0.825, std 0.000
overall test f1: avg 0.825, std 0.000


dataset:  Sine
concept drifts:  [ 2000  4000  6000  8000 10000 12000 14000 16000]
----------------------------------------------------------------------------


## data-centric: CVDTE

In [9]:
dataset_li = ['SEA', 'Hyperplane', 'RandomRBF', 'Sine', 'Electricity', 'Weather', 'Spam', 'Usenet1', 'Usenet2']

for dataset in dataset_li:
    # load data, label, and concept drift points
    x_all = np.load(f'./dataset/{dataset}/data.npy')
    y_all = np.load(f'./dataset/{dataset}/label.npy')
    concept_drifts = np.load(f'./dataset/{dataset}/concept_drifts.npy')
    
    # number of classes in dataset
    if dataset == 'RandomRBF':
        n_class = 5
    else:
        n_class = 2
    
    # number of available data in current segment
    if dataset in ['Spam', 'Usenet1', 'Usenet2']:
        n_train = int((len(x_all)/len(concept_drifts))*0.2)
    else:
        n_train = int((len(x_all)/len(concept_drifts))*0.1)

    print('dataset: ', dataset)
    print('concept drifts: ', concept_drifts)
    
    method_li = ['CVDTE']

    for method in method_li:
        print("----------------------------------------------------------------------------")
        print('method: ', method)

        all_time = []
        all_acc = []
        all_acc_std = []
        all_f1 = []
        all_f1_std = []

        # consecutive evaluation with data segments
        for n in range(len(concept_drifts)):
            n_dataset = n+1
            n_feature = x_all.shape[1]

            # data preprocessing (scaling)
            scaler = StandardScaler()
            x_all_scale = scaler.fit_transform(x_all)

            # first data segment case (only FN possible)
            if n == 0:
                train_time_li = []
                test_acc_li = []
                
                # split train, valid, and test set
                x_curr = x_all_scale[:concept_drifts[n]]
                y_curr = y_all[:concept_drifts[n]]

                indices = list(range(len(x_curr)))
                split1 = int(n_train*0.5)
                split2 = n_train
                train_indices = indices[:split1]
                valid_indices = indices[split1:split2]
                test_indices = indices[split2:]

                x_train = x_curr[train_indices]
                y_train = y_curr[train_indices]

                x_valid = x_curr[valid_indices]
                y_valid = y_curr[valid_indices]

                x_test = x_curr[test_indices]
                y_test = y_curr[test_indices]

                # repeat experiments with 5 different seeds
                for s in range(5):
                    start = time.time()

                    # train model with new data (FN)
                    model = DecisionTreeClassifier(random_state=s)
                    model.fit(x_train, y_train)

                    train_time = time.time() - start
                    train_time_li.append(train_time)

                    # model evaluation
                    y_pred = model.predict(x_test)
                    y_truth = y_test
                    
                    test_acc = accuracy_score(y_truth, y_pred)
                    if n_class > 2:
                        test_f1 = f1_score(y_truth, y_pred, average='weighted')
                    elif n_class == 2:
                        test_f1 = f1_score(y_truth, y_pred)
                    test_acc_li.append(test_acc)
                    test_f1_li.append(test_f1)

                    pickle.dump(model, open(f'./ckpt/CVDTE_{0}_{s}', 'wb'))

                all_time.append(np.mean(train_time_li))
                all_acc.append(np.mean(test_acc_li))
                all_acc_std.append(np.std(test_acc_li))
                all_f1.append(np.mean(test_f1_li))
                all_f1_std.append(np.std(test_f1_li))

            # next data segment case (FN, FN+, FO, FO+ possible)
            else:
                # split train, valid, and test set
                x_curr = x_all_scale[concept_drifts[n-1]:concept_drifts[n]]
                y_curr = y_all[concept_drifts[n-1]:concept_drifts[n]]

                indices = list(range(len(x_curr)))
                split1 = int(n_train*0.5)
                split2 = n_train
                train_indices = indices[:split1]
                valid_indices = indices[split1:split2]
                test_indices = indices[split2:]

                x_train = x_curr[train_indices]
                y_train = y_curr[train_indices]

                x_valid = x_curr[valid_indices]
                y_valid = y_curr[valid_indices]

                x_test = x_curr[test_indices]
                y_test = y_curr[test_indices]

                train_time_li = []
                test_acc_li = []
                test_f1_li = []

                # repeat experiments with 5 different seeds
                for s in range(5):
                    start = time.time()

                    # train model with new data (FN)
                    model_fn = DecisionTreeClassifier(random_state=s)
                    model_fn.fit(x_train, y_train)

                    # find previous useful data
                    useful_sample = []

                    for j in range(1, n+1):

                        if j == 1:
                            init = 0
                        else:
                            init = concept_drifts[j-2]

                        model_j = pickle.load(open(f'./ckpt/CVDTE_{j-1}_{s}', 'rb'))

                        if j == 1:
                            x_cand = x_all_scale[:concept_drifts[j-1]]
                            y_cand = y_all[:concept_drifts[j-1]]
                        else:
                            x_cand = x_all_scale[concept_drifts[j-2]:concept_drifts[j-1]]
                            y_cand = y_all[concept_drifts[j-2]:concept_drifts[j-1]]

                        y_pred = model.predict(x_cand)
                        prev_y_pred = model_j.predict(x_cand)

                        for ind in range(len(y_pred)):
                            # condition for determining useful data
                            if y_pred[ind] == prev_y_pred[ind] and y_pred[ind] == y_cand[ind]:
                                useful_sample.append(init+ind)

                    x_useful = x_all_scale[useful_sample]
                    y_useful = y_all[useful_sample]

                    x_train_final = np.concatenate((x_all_scale[useful_sample], x_train))
                    y_train_final = np.concatenate((y_all[useful_sample], y_train))

                    # train model with new data and previous useful data (FN+)
                    model_fnp = DecisionTreeClassifier(random_state=s)
                    model_fnp.fit(x_train_final, y_train_final)

                    # find previous best model (FO)
                    best_score = 0

                    for j in range(1, n+1):
                        model_j = pickle.load(open(f'./ckpt/CVDTE_{j-1}_{s}', 'rb'))
                        score = model_j.score(x_valid, y_valid)
                        if score > best_score:
                            model_fo = model_j 

                    # update previous best model with new data (FO+)
                    model_fop = copy.deepcopy(model_fo)
                    model_fop.fit(x_train, y_train) 

                    # compare four different models(FN, FN+, FO, FO+) and choose the best model
                    fn_score = model_fn.score(x_valid, y_valid)
                    fnp_score = model_fnp.score(x_valid, y_valid)
                    fo_score = model_fo.score(x_valid, y_valid)
                    fop_score = model_fop.score(x_valid, y_valid)

                    if max(fn_score, fnp_score, fo_score, fop_score) == fn_score:
                        model = model_fn

                    elif max(fn_score, fnp_score, fo_score, fop_score) == fnp_score:
                        model = model_fnp

                    elif max(fn_score, fnp_score, fo_score, fop_score) == fo_score:
                        model = model_fo

                    elif max(fn_score, fnp_score, fo_score, fop_score) == fop_score:
                        model = model_fop

                    train_time = time.time() - start
                    train_time_li.append(train_time)

                    # final model evaluation
                    y_pred = model.predict(x_test)
                    y_truth = y_test
                    
                    test_acc = accuracy_score(y_truth, y_pred)
                    if n_class > 2:
                        test_f1 = f1_score(y_truth, y_pred, average='weighted')
                    elif n_class == 2:
                        test_f1 = f1_score(y_truth, y_pred)
                    test_acc_li.append(test_acc)
                    test_f1_li.append(test_f1)

                    pickle.dump(model, open(f'./ckpt/CVDTE_{n}_{s}', 'wb'))
                    
                all_time.append(np.mean(train_time_li))
                all_acc.append(np.mean(test_acc_li))
                all_acc_std.append(np.std(test_acc_li))
                all_f1.append(np.mean(test_f1_li))
                all_f1_std.append(np.std(test_f1_li))

        # print runtime, accuracy, and F1 score
        print('overall train time: %.3f' %(np.mean(all_time)))
        print('overall test acc: avg %.3f, std %.3f' %(np.mean(all_acc), np.mean(all_acc_std)))
        print('overall test f1: avg %.3f, std %.3f' %(np.mean(all_f1), np.mean(all_f1_std)))
        
    print('\n')

dataset:  SEA
concept drifts:  [ 2000  4000  6000  8000 10000 12000 14000 16000]
----------------------------------------------------------------------------
method:  CVDTE
overall train time: 0.023
overall test acc: avg 0.806, std 0.018
overall test f1: avg 0.812, std 0.047


dataset:  Hyperplane
concept drifts:  [ 2000  4000  6000  8000 10000 12000 14000 16000]
----------------------------------------------------------------------------
method:  CVDTE
overall train time: 0.038
overall test acc: avg 0.744, std 0.007
overall test f1: avg 0.752, std 0.015


dataset:  RandomRBF
concept drifts:  [ 2000  4000  6000  8000 10000 12000 14000 16000]
----------------------------------------------------------------------------
method:  CVDTE
overall train time: 0.053
overall test acc: avg 0.614, std 0.015
overall test f1: avg 0.621, std 0.023


dataset:  Sine
concept drifts:  [ 2000  4000  6000  8000 10000 12000 14000 16000]
-----------------------------------------------------------------------

## data subset selection: GLISTER and GRAD-MATCH

In [10]:
# loss functions in model training
criterion = nn.CrossEntropyLoss()
criterion_nored = nn.CrossEntropyLoss(reduction='none')

In [11]:
def Glister_cv(budget):
    """Glister cross validation.
    
    Args:
        budget: fraction of subset size.
    Returns:
        Minus value of minimum validation loss.
    """
    # initialize model
    model = TwoLayerNet(n_feature, n_class, n_hidden, s)
    model = model.to(device)

    # set model optimizer, scheduler, and earlystop path
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=10)
    earlystop_path=f'./ckpt/{dataset}_Glister.pt'

    # set dataloader parameters
    dss_args = dict(model=model,
                    loss=criterion_nored,
                    eta=0.001,
                    num_classes=n_class,
                    num_epochs=2000,
                    device=device,
                    fraction=budget,
                    init_budget=split1,
                    select_every=20,
                    kappa=0,
                    linear_layer=True,
                    selection_type='PerSample',
                    groups=group,
                    x_all=x_all_scale,
                    y_all=y_all,
                   )
    dss_args = DotMap(dss_args)

    # define GLISTER dataloader
    dataloader = GLISTERDataLoader(trainloader, valloader, dss_args, batch_size=128, 
                                   shuffle=True, pin_memory=False)
    
    val_losses = list()
    val_acc = list()
    early_stopping = EarlyStopping.EarlyStopping(patience=10, delta=0.0001, path=earlystop_path)
    
    # model training with data segment selection
    for epoch in range(num_epochs):
        model.train()
        for k, (inputs, targets, weights) in enumerate(dataloader):
            inputs = inputs.to(device)
            targets = targets.to(device, non_blocking=True)
            weights = weights.to(device)  
            optimizer.zero_grad()
            outputs = model(inputs, last=False, freeze=False)
            losses = criterion_nored(outputs, targets)
            loss = torch.dot(losses, weights/(weights.sum()))
            loss.backward()
            optimizer.step()

        model.eval()
        val_loss = 0
        val_correct = 0
        val_total = 0
        
        # model evaluation using validation set
        with torch.no_grad():
            for idx, (inputs, targets) in enumerate(valloader):
                inputs, targets = inputs.to(device), targets.to(device, non_blocking=True, dtype=torch.int64)
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                val_loss += loss.item()*targets.size(0)
                _, predicted = outputs.max(1)
                val_total += targets.size(0)
                val_correct += predicted.eq(targets).sum().item()
            val_losses.append(val_loss / val_total)
            val_acc.append(val_correct / val_total)

        scheduler.step(val_loss/val_total)
        early_stopping(val_losses[-1], model)
            
        if early_stopping.early_stop:
            break

    return -np.min(val_losses)

In [12]:
def GradMatch_cv(budget):
    """GradMatch cross validation.
    
    Args:
        budget: fraction of subset size.
    Returns:
        Minus value of minimum validation loss.
    """
    # initialize model
    model = TwoLayerNet(n_feature, n_class, n_hidden, s)
    model = model.to(device)

    # set model optimizer, scheduler, and earlystop path
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=10)
    earlystop_path=f'./ckpt/{dataset}_GradMatch.pt'
    
    if dataset == 'Sine':
        eps = 0.5
    else:
        eps = 0.01

    # set dataloader parameters
    dss_args = dict(model=model,
                    loss=criterion_nored,
                    eta=0.001,
                    num_classes=n_class,
                    num_epochs=2000,
                    device=device,
                    fraction=budget,
                    init_budget=split1,
                    select_every=20,
                    kappa=0,
                    linear_layer=True,
                    selection_type='PerBatch',
                    valid=True,
                    v1=True, 
                    lam=0.5, 
                    eps=eps,
                    groups=group,
                    x_all=x_all_scale,
                    y_all=y_all
                   )
    dss_args = DotMap(dss_args)

    # define GradMatch dataloader
    dataloader = GradMatchDataLoader(trainloader, valloader, dss_args, 
                      batch_size=128, 
                      shuffle=True,
                      pin_memory=False)
    
    val_losses = list()
    val_acc = list()
    early_stopping = EarlyStopping.EarlyStopping(patience=10, delta=0.0001, path=earlystop_path)
    
    # model training with data segment selection
    for epoch in range(num_epochs):
        model.train()
        for k, (inputs, targets, weights) in enumerate(dataloader):
            inputs = inputs.to(device)
            targets = targets.to(device, non_blocking=True)
            weights = weights.to(device)  
            optimizer.zero_grad()
            outputs = model(inputs, last=False, freeze=False)
            losses = criterion_nored(outputs, targets)
            loss = torch.dot(losses, weights/(weights.sum()))
            loss.backward()
            optimizer.step()

        model.eval()
        val_loss = 0
        val_correct = 0
        val_total = 0
        
        # model evaluation using validation set
        with torch.no_grad():
            for idx, (inputs, targets) in enumerate(valloader):
                inputs, targets = inputs.to(device), targets.to(device, non_blocking=True, dtype=torch.int64)
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                val_loss += loss.item()*targets.size(0)
                _, predicted = outputs.max(1)
                val_total += targets.size(0)
                val_correct += predicted.eq(targets).sum().item()
            val_losses.append(val_loss / val_total)
            val_acc.append(val_correct / val_total)

        scheduler.step(val_loss/val_total)
            
        early_stopping(val_losses[-1], model)
            
        if early_stopping.early_stop:
            break

    return -np.min(val_losses)

In [13]:
def run_dss(dataloader, trainloader, valloader, testloader, model, optimizer, scheduler, num_epochs, earlystop_path):
    """Run data segment selection algorithm.
    
    Args:
        dataloader: Dataloader with data subset selection method.
        trainloader: Train dataloader.
        valloader: Validation dataloader.
        testloader: Test dataloader.
        model: Train model.
        optimizer: Model optimizer.
        scheduler: Learning rate scheduler.
        num_epochs: Number of maximum epochs.
        earlystop_path: Earlystop model checkpoint path.
    Returns:
        Earlystop epoch, best accuracy, best f1 score, and runtime.
    """
    val_losses = list() 
    tst_acc = list()
    tst_f1 = list()
    timing = list()
    
    # set model optimizer, scheduler, and earlystop path
    early_stopping = EarlyStopping.EarlyStopping(patience=10, delta=0.0001, path=earlystop_path)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=10)
    
    # model training with data segment selection
    for epoch in range(num_epochs):
        model.train()
        start_time = time.time()   
        
        for k, (inputs, targets, weights) in enumerate(dataloader):
            train_start = time.time()
            inputs = inputs.to(device)
            targets = targets.to(device, non_blocking=True)
            weights = weights.to(device)  
            optimizer.zero_grad()
            outputs = model(inputs, last=False, freeze=False)
            losses = criterion_nored(outputs, targets)
            loss = torch.dot(losses, weights/(weights.sum()))
            loss.backward()
            optimizer.step()
            
        epoch_time = time.time() - start_time
        timing.append(epoch_time)

        val_loss = 0
        val_total = 0
        
        model.eval()

        # model evaluation using validation set
        with torch.no_grad():
            for idx, (inputs, targets) in enumerate(valloader):
                inputs, targets = inputs.to(device), targets.to(device, non_blocking=True, dtype=torch.int64)
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                val_loss += loss.item()*targets.size(0)
                val_total += targets.size(0)
            val_losses.append(val_loss / val_total)

        scheduler.step(val_loss/val_total)
        
        y_pred = []
        y_truth = []

        # model evaluation using test set
        with torch.no_grad():
            for _, (inputs, targets) in enumerate(testloader):
                inputs, targets = inputs.to(device), targets.to(device, non_blocking=True, dtype=torch.int64)
                outputs = model(inputs)
                _, predicted = outputs.max(1)
                y_pred.append(predicted.cpu().detach().numpy())
                y_truth.append(targets.cpu().detach().numpy())

        y_pred = [item for sublist in y_pred for item in sublist]
        y_truth = [item for sublist in y_truth for item in sublist]

        tst_acc.append(accuracy_score(y_truth, y_pred))
        
        if n_class > 2:
            tst_f1.append(f1_score(y_truth, y_pred, average='weighted'))
        elif n_class == 2:
            tst_f1.append(f1_score(y_truth, y_pred))
            
        early_stopping(val_losses[-1], model)
            
        if early_stopping.early_stop:
            break

    timing_array = np.array(timing)
    best_ind = np.argmin(val_losses)
    
    tmp = 0
    mod_cum_timing = np.zeros(len(timing_array))
    for i in range(len(timing_array)):
        tmp += timing_array[i]
        mod_cum_timing[i] = tmp
    cum_timing = list(mod_cum_timing)
    
    return best_ind, tst_acc[best_ind], tst_f1[best_ind], cum_timing[-1]

In [14]:
dataset_li = ['SEA', 'Hyperplane', 'RandomRBF', 'Sine', 'Electricity', 'Weather', 'Spam', 'Usenet1', 'Usenet2']

for dataset in dataset_li:
    # load data, label, and concept drift points
    x_all = np.load(f'./dataset/{dataset}/data.npy')
    y_all = np.load(f'./dataset/{dataset}/label.npy')
    concept_drifts = np.load(f'./dataset/{dataset}/concept_drifts.npy')
    
    # number of classes in dataset
    if dataset == 'RandomRBF':
        n_class = 5
    else:
        n_class = 2
    
    # number of nodes in hidden layer
    n_hidden = 256
    
    # number of maximum epochs
    num_epochs=2000
    
    # number of available data in current segment
    if dataset in ['Spam', 'Usenet1', 'Usenet2']:
        n_train = int((len(x_all)/len(concept_drifts))*0.2)
    else:
        n_train = int((len(x_all)/len(concept_drifts))*0.1)
        
    # split available data into train and valid
    split1 = int(n_train*0.5)
    
    print('dataset: ', dataset)
    print('concept drifts: ', concept_drifts)

    method_li = ['Glister', 'GradMatch']
    
    for method in method_li:
        print("----------------------------------------------------------------------------")
        print('method: ', method)
    
        all_time = []
        all_acc = []
        all_acc_std = []
        all_f1 = []
        all_f1_std = []

        # consecutive evaluation with data segments
        for n in range(len(concept_drifts)):
            train_time_li = []
            test_acc_li = []
            test_f1_li = []

            n_dataset = n+1
            n_feature = x_all.shape[1]

            if n == 0:
                num = concept_drifts[n]
            else:
                num = concept_drifts[n]-concept_drifts[n-1]

            # data preprocessing (scaling)
            scaler = StandardScaler()
            x_all_scale = scaler.fit_transform(x_all)

            # repeat experiments with 5 different seeds
            for s in range(5):
                # initialize model
                model = TwoLayerNet(n_feature, n_class, n_hidden, s)
                model = model.to(device)
                
                # prepare data for train, valid, and test
                dataset_all = range(n_dataset)
                train_ds, valid_ds, test_ds, train_index = prepare_data(n, n_train, x_all_scale, y_all, 
                                                                        concept_drifts, dataset_all, n_feature, 
                                                                        device)
                
                trainloader = DataLoader(train_ds, batch_size=128, shuffle=False)
                valloader = DataLoader(valid_ds, batch_size=128, shuffle=True)
                testloader = DataLoader(test_ds, batch_size=128, shuffle=True)
            
                if method == 'Glister':
                    # split data into sample unit
                    if n == 0:
                        arr = np.arange(split1)
                        group = np.split(arr, len(arr))
                    else:
                        arr = np.arange(n*int((len(x_all)/len(concept_drifts)))+split1)
                        group = np.split(arr, len(arr))
                    
                    # cross validation to find best budget value
                    pbounds = [0.1*b for b in range(1,11)]
                    glister_perf = []
                    for budget in pbounds:
                        perf = Glister_cv(budget)
                        glister_perf.append(perf)
                    best_budget = pbounds[np.argmax(glister_perf)]

                    # initialize model
                    model = TwoLayerNet(n_feature, n_class, n_hidden, s)
                    model = model.to(device)

                    # set model optimizer, scheduler, and earlystop path
                    optimizer = optim.Adam(model.parameters(), lr=1e-3)
                    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=10)
                    earlystop_path=f'./ckpt/{dataset}_Glister.pt'

                    # set arguments for data subset selection
                    dss_args = dict(model=model,
                                    loss=criterion_nored,
                                    eta=0.001,
                                    num_classes=n_class,
                                    num_epochs=2000,
                                    device=device,
                                    fraction=best_budget,
                                    init_budget=split1,
                                    select_every=20,
                                    kappa=0,
                                    linear_layer=True,
                                    selection_type='PerSample',
                                    groups=group,
                                    x_all=x_all_scale,
                                    y_all=y_all
                                   )
                    dss_args = DotMap(dss_args)

                    # define GLISTER dataloader
                    dataloader = GLISTERDataLoader(trainloader, valloader, dss_args, batch_size=128, 
                                                   shuffle=True, pin_memory=False)

                    # model training and evaluation
                    best_ind, best_acc, best_f1, runtime = run_dss(dataloader, trainloader, valloader, testloader, 
                                                                   model, optimizer, scheduler, num_epochs, 
                                                                   earlystop_path=f'./ckpt/{dataset}_Glister.pt')
                
                elif method == 'GradMatch':
                    # split data into random batch unit
                    if n == 0:
                        group = [np.arange(split1)]
                    else:
                        group = []
                        arr = np.arange(n*int((len(x_all)/len(concept_drifts)))+split1)
                        np.random.seed(s)
                        np.random.shuffle(arr)
                        batch_size = 128
                        num_batch = math.ceil(len(arr)/batch_size)
                            
                        for i in range(num_batch):
                            if i == num_batch-1:
                                group.append(arr[i*batch_size:])
                            else:
                                group.append(arr[i*batch_size:(i+1)*batch_size])
                                
                    # cross validation to find best budget value
                    pbounds = [0.1*b for b in range(1,11)]
                    gradmatch_perf = []
                    for budget in pbounds:
                        perf = GradMatch_cv(budget)
                        gradmatch_perf.append(perf)
                    best_budget = pbounds[np.argmax(gradmatch_perf)]

                    # initialize model
                    model = TwoLayerNet(n_feature, n_class, n_hidden, s)
                    model = model.to(device)

                    # set model optimizer, scheduler, and earlystop path
                    optimizer = optim.Adam(model.parameters(), lr=1e-3)
                    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=10)
                    earlystop_path=f'./ckpt/{dataset}_GradMatch.pt'
                    
                    if dataset == 'Sine':
                        eps = 0.5
                    else:
                        eps = 0.01

                    # set arguments for data subset selection
                    dss_args = dict(model=model,
                                    loss=criterion_nored,
                                    eta=0.001,
                                    num_classes=n_class,
                                    num_epochs=2000,
                                    device=device,
                                    fraction=best_budget,
                                    init_budget=split1,
                                    select_every=20,
                                    kappa=0,
                                    linear_layer=True,
                                    selection_type='PerBatch',
                                    valid=True,
                                    v1=True, 
                                    lam=0.5, 
                                    eps=eps,
                                    groups=group,
                                    x_all=x_all_scale,
                                    y_all=y_all
                                   )
                    dss_args = DotMap(dss_args)
                    
                    # define GradMatch dataloader
                    dataloader = GradMatchDataLoader(trainloader, valloader, dss_args, batch_size=128, 
                                                     shuffle=True, pin_memory=False)

                    # model training and evaluation
                    best_ind, best_acc, best_f1, runtime = run_dss(dataloader, trainloader, valloader, testloader, 
                                                                   model, optimizer, scheduler, num_epochs, 
                                                                   earlystop_path=f'./ckpt/{dataset}_GradMatch.pt')

                train_time_li.append(runtime)
                test_acc_li.append(best_acc)
                test_f1_li.append(best_f1)

            all_time.append(np.mean(train_time_li))
            all_acc.append(np.mean(test_acc_li))
            all_acc_std.append(np.std(test_acc_li))
            all_f1.append(np.mean(test_f1_li))
            all_f1_std.append(np.std(test_f1_li))

        # print runtime, accuracy, and F1 score
        print('overall train time: %.3f' %(np.mean(all_time)))
        print('overall test acc: avg %.3f, std %.3f' %(np.mean(all_acc), np.mean(all_acc_std)))
        print('overall test f1: avg %.3f, std %.3f' %(np.mean(all_f1), np.mean(all_f1_std)))

    print('\n')

dataset:  SEA
concept drifts:  [ 2000  4000  6000  8000 10000 12000 14000 16000]
----------------------------------------------------------------------------
method:  Glister
overall train time: 23.613
overall test acc: avg 0.856, std 0.009
overall test f1: avg 0.885, std 0.008
----------------------------------------------------------------------------
method:  GradMatch
overall train time: 2.847
overall test acc: avg 0.853, std 0.007
overall test f1: avg 0.884, std 0.005


dataset:  Hyperplane
concept drifts:  [ 2000  4000  6000  8000 10000 12000 14000 16000]
----------------------------------------------------------------------------
method:  Glister
overall train time: 22.704
overall test acc: avg 0.907, std 0.006
overall test f1: avg 0.907, std 0.006
----------------------------------------------------------------------------
method:  GradMatch
overall train time: 1.257
overall test acc: avg 0.841, std 0.007
overall test f1: avg 0.843, std 0.008


dataset:  RandomRBF
concept drift