In [None]:
!pip install -q scikit-multiflow

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from skmultiflow.drift_detection import ADWIN, DDM
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import OneHotEncoder
from matplotlib.colors import ListedColormap
from multiprocessing.pool import ThreadPool
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import roc_auc_score
from imblearn.over_sampling import SMOTE
from sklearn.utils import shuffle
from contextlib import suppress
from collections import Counter
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from random import shuffle
import seaborn as sns
from time import time
import pandas as pd
import numpy as np
import warnings
import scipy.io
import pickle
import sys
import gc
import os
import re

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
data_path = '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/data'
code_path = '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/Codes/Shared Codes'
results_path = '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/results'

In [None]:
sys.path.insert(0,code_path)
from genetic_programming import SymbolicRegressor
from binirizer import CustomLabelBinirizer
from ensemble import Ensemble, Classifier
from oselm import OSELMClassifier,set_use_know
from DynamicFeatureSelection import dynamic_feature_selection
from SharedFunctions import prepare_data,train_and_test,feature_evolving,save_pickle,load_pickle,save_object,load_object,generate_new_samples

In [None]:
filenames = ['kddcup99_csv.csv','ISCX2012.csv','CSE-CIC2018.csv','CICIDS2017.csv','7recurrentDrift.csv', 'blip.csv', 'incrementalDrift.csv',
             '7gradualDrift.csv', '7suddenDrift.csv']
filenames = list(map(lambda x: os.path.join(data_path, x), filenames))
filenames

['/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/data/kddcup99_csv.csv',
 '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/data/ISCX2012.csv',
 '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/data/CSE-CIC2018.csv',
 '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/data/CICIDS2017.csv',
 '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/data/7recurrentDrift.csv',
 '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/data/blip.csv',
 '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/data/incrementalDrift.csv',
 '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/data/7gradualDrift.csv',
 '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/data/7suddenDrift.csv']

In [None]:
def genetic_programming():
    return SymbolicRegressor(population_size=10,
            generations=5, stopping_criteria=0.85,
            p_crossover=0.7, p_subtree_mutation=0.1,
            p_hoist_mutation=0.05, p_point_mutation=0.1,
            max_samples=0.7, verbose=1,
            parsimony_coefficient=1e-4, random_state=42,
            function_set=['avg2', 'avg3', 'avg5',
                          'median3', 'median5', 'maximum2', 'maximum3', 'maximum5'],
            metric='f1-score')

In [None]:
def generate_oselm_models(number_of_hidden_neurons, apply_model_replacement=False):
    models= [OSELMClassifier(number_of_hidden_neurons, 'relu', binarizer=CustomLabelBinirizer(), random_state=42),
             OSELMClassifier(number_of_hidden_neurons, 'relu', binarizer=CustomLabelBinirizer(), random_state=42),
             OSELMClassifier(number_of_hidden_neurons, 'relu', binarizer=CustomLabelBinirizer(), random_state=42),
             OSELMClassifier(number_of_hidden_neurons, 'relu', binarizer=CustomLabelBinirizer(), random_state=42),
             ]

    ensemble = Ensemble(classifiers=models, program=genetic_programming(), apply_model_replacement=apply_model_replacement)
    return ensemble

def generate_ml_models(number_of_hidden_neurons, apply_model_replacement=False):
    models = [
              KNeighborsClassifier(5),
              KNeighborsClassifier(5),
              # DecisionTreeClassifier(),
              LogisticRegression(),
              LogisticRegression(),
              GaussianNB(),
              GaussianNB(),
              GaussianNB(),
              ]
    ensemble = Ensemble(classifiers=models, program=genetic_programming(), apply_model_replacement=apply_model_replacement)
    return ensemble

In [None]:
def concept_drift_detection(drift_detection_obj, sample) -> bool:
    """
    Detect concept drift
    :param drift_detection_obj: sklearn drift detection object (ADWIN, DDM, )
    :param smaple : new instanece of data stream
    return True if concept drift was detected otherwise false
    """
    drift_detection_obj.add_element(sample)
    return drift_detection_obj.detected_change()

In [None]:
def feature_evolving(evolving_matrix):
    """
    evolving_matrix : list of random list
    """
    random_index = np.random.randint(0, len(evolving_matrix), 1)[0]
    return evolving_matrix[random_index]

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectFromModel
def random_forest_feature_selection(X, y):
    """
    return best feature from X using random forest
    """
    sel = SelectFromModel(RandomForestClassifier(n_estimators = 20))
    sel.fit(X, y)
    return sel.get_support()

In [None]:
def E2SC4ID (X,
             y,
             sample_index:int,
             buffer:list,
             ensemble: Ensemble,
             drift_detection_obj,
             maxC=8,
             n=2000,
             train_size=0.5,
             drift=False,
             unselected_features=None,
             drift_location={}):
    y_pred = ensemble.global_support_degree(X)
    # if the sample is labeled then insert it into buffer
    if y is not None:
        buffer.append((X, y))
        actual_drift = concept_drift_detection(drift_detection_obj, int(y!=y_pred))
        if actual_drift and not drift:
          drift_location[sample_index] = 'drift'
        drift = drift or actual_drift
        # if drift:
        #   print("sample_index {} => drift {}".format(sample_index,drift))
        if len(buffer) >= n:
            print("buffer size {} N {}".format(len(buffer),n))
            print("drift {}".format(drift))
            if drift:
                drift = False
                drift_detection_obj.reset()
                x_buffer, y_buffer = [], []
                for tup in buffer:
                    x_buffer.append(tup[0])
                    y_buffer.append(tup[1])
                ######################################################
                train_size = int(len(x_buffer)*train_size)
                X_train = x_buffer[:train_size]
                y_train = y_buffer[:train_size]
                X_valid = x_buffer[train_size:]
                y_valid = y_buffer[train_size:]
                ######################################################
                __sum = np.array(y_train).sum()
                print("len of y_trian {} number of ones {}".format(len(y_train),__sum))
                if 0 ==  __sum or __sum == len(y_train):
                  y_train[0] = 0 if y_train[0] == 1 else 1
                new_models = ensemble.classifier_induction([
                                        model.clf for model in ensemble.classifiers],
                                        X_train,
                                        y_train,
                                        unselected_features)
                if len(ensemble.classifiers) > maxC:
                    ensemble.model_replacement('time')
                ######################################################
                ensemble.update_program(X_valid, y_valid)
            else:
                buffer.clear()
                print("Clear bufer : {}".format(len(buffer)))
        return ensemble, buffer, drift, drift_location
    else:
      print("y is none ")

In [None]:
def E2SC4ID_STREAM(ensemble, stremdata, y, unselected_features, drift_location, chunk_number,result_save_path_data,key,sample_number=0):
    if not ensemble.fitted:
      y[0] = 0 if y[0] == 1 else 1
      ensemble.fit(stremdata[:200], y[:200])

    drift_detection_obj, drift, buffer = DDM(),False,[]
    if sample_number != 0 :
      drift_location = load_pickle(os.path.join(result_save_path_data, "{}_drift_location.pkl".format(key)))
      drift = load_pickle(os.path.join(result_save_path_data, "{}_drift.pkl".format(key)))
      buffer = load_pickle(os.path.join(result_save_path_data, "{}_buffer.pkl".format(key)))
      ensemble = load_pickle(os.path.join(result_save_path_data, "{}_ensemble.pkl".format(key)))

    for i in tqdm(range(200+sample_number, len(stremdata))):
        X, y_true = stremdata[i], y[i]
        ensemble, buffer, drift, drift_location = E2SC4ID (X,
                                                           y_true,
                                                           sample_index=(i +(chunk_number * 10000)),
                                                           buffer=buffer,
                                                           ensemble=ensemble,
                                                           drift_detection_obj=drift_detection_obj,
                                                           maxC=8,
                                                           n=len(stremdata)-200,
                                                           train_size=0.7,
                                                           drift=drift,
                                                           unselected_features=unselected_features,
                                                           drift_location=drift_location)

        save_pickle(drift, os.path.join(result_save_path_data, "{}_drift.pkl".format(key)))
        save_pickle(buffer, os.path.join(result_save_path_data, "{}_buffer.pkl".format(key)))
        save_pickle(drift_location, os.path.join(result_save_path_data, "{}_drift_location.pkl".format(key)))
        save_pickle(ensemble, os.path.join(result_save_path_data, "{}_ensemble.pkl".format(key)))
    return ensemble, drift_location

In [None]:
def main(f_name, generate_model, train_size=3000,apply_model_replacement=False, transfer_learning=False, feature_selection="random_forest", result_save_path="",ChunkNumber=0,sample_number=0,DFS_results_path=''):
  """
  datasets_paths: list of paths (absolute path for each dataset)
  generate_model: function to generate machine learning model.
  train_size: number of samples to be used in training phase.
  transfer_learning: to determine using of transfer learning in the training phase.
  """
  # load the dataset and then process it
  datasets = {}
  first_chunk=True
  d = prepare_data(f_name)
  d = d.sample(frac=1, random_state=42)
  buffer = d.sample(n=5000)
  d.reset_index(inplace=True)
  d.replace([np.inf], 0, inplace=True)
  datasets[f_name.split('/')[-1]] = d
  results = {}
  drift_locations_in_all_dataset = {}
  ensemble = None ####
  ########################################
  for key in tqdm(datasets.keys()):
      result_save_path_data = os.path.join(result_save_path, key)
      drift_location = {} if ChunkNumber==0 else load_pickle(os.path.join(result_save_path_data, "{}_drift_location.pkl".format(key)))
      results[key] = {'model_result': []}
      # convert dataset from dataframe to numpy array.
      data = datasets[key].values
      # split the data into features array and target array.
      X, Y = data[:, 0:-1], data[:, -1].astype('int')
      if not os.path.exists("{}_evolving_matrix.pkl".format(key)):
        a2 = np.random.randint(low=0, high=X.shape[1], size = X.shape[1] // 6).tolist()
        a3 = np.random.randint(low=0, high=X.shape[1], size = X.shape[1] // 5).tolist()
        a4 = np.random.randint(low=0, high=X.shape[1], size = X.shape[1] // 4).tolist()
        evolving_matrix = [a2, a3, a4]
        save_pickle(evolving_matrix, "{}_evolving_matrix.pkl".format(key))
      else:
        evolving_matrix = load_pickle("{}_evolving_matrix.pkl".format(key))
      ensemble = generate_model(number_of_hidden_neurons=X.shape[1]*3 // 2, apply_model_replacement=apply_model_replacement)
      # split the data into chunks (10 chunks)
      chunks_features = np.array_split(X, 10)
      chunks_labels = np.array_split(Y, 10)

      ################# train on each chunk ####################
      print("===================== dataset : {} ======================".format(key))
      chunk_number = 1
      if ChunkNumber>0:
        ensemble = load_pickle(os.path.join(result_save_path_data, "{}_ensemble.pkl".format(key)))
        results = load_pickle(os.path.join(result_save_path_data, "{}_results.pkl".format(key)))
        drift_locations_in_all_dataset = load_pickle(os.path.join(result_save_path_data, "{}_drift_locations_in_all_dataset.pkl".format(key)))
      for CN,chunk_X, chunk_Y in tqdm(zip([*range(len(chunks_labels))],chunks_features, chunks_labels)):
          if ChunkNumber > CN:
            print("Skip Chunk Number : {}".format(CN))
            continue
          if not first_chunk:
            sample_number=0
            first_chunk = False
          else:first_chunk = False
          try:
            chunk_X, chunk_Y = SMOTE().fit_resample(chunk_X, chunk_Y)
          except:
            if chunk_Y.sum() in [0, 1]:
              new_samples, new_labels = generate_new_samples(buffer, chunk_Y)
              chunk_X = np.concatenate((chunk_X, new_samples))
              chunk_Y = np.concatenate((chunk_Y, new_labels))
          gc.collect()
          unselected_feautres = None
          selected = None
          X_train, X_test, y_train, y_test = chunk_X[:train_size], chunk_X[train_size:], chunk_Y[:train_size], chunk_Y[train_size:]
          print("X_train shape {} , X_test shape {} ".format(X_train.shape,X_test.shape))
          if feature_selection[0] == "feature_evolving":
            unselected_feautres = feature_evolving(evolving_matrix=evolving_matrix)
            if feature_selection[1] == "random_forest":
              print('random_forest')
              print("1- X_trian[{}] {} shape {} ".format(0,X_train[0],X_train[0].shape))
              X_train = np.delete(X_train, unselected_feautres, 1)
              print("2- X_trian[{}] {} shape {} ".format(0,X_train[0],X_train[0].shape))
              X_test = np.delete(X_test, unselected_feautres, 1)
              selected = random_forest_feature_selection(X_train, y_train)
              unselected_feautres = np.where(selected != 1)[0]
            elif feature_selection[1] == "DFS_feature_selection":
              # selected = dynamic_feature_selection(chunk_X, chunk_Y)
              unselected_feautres = feature_evolving(evolving_matrix=load_pickle("{}_evolving_matrix.pkl".format(DFS_results_path)))
              X_train = np.delete(X_train, unselected_feautres, 1)
              X_test = np.delete(X_test, unselected_feautres, 1)
              softmax_results = load_object('softmax_results',DFS_results_path)
              average_results = load_object('average_results',DFS_results_path)
              single_agent_results = load_object('single_agent_results',DFS_results_path)
              random_forest_results = load_object('random_forest_results',DFS_results_path)
              voting_results = load_object('voting_results',DFS_results_path)
              f1_score_for_all_algorithm = [softmax_results['f1'],average_results['f1'],single_agent_results['f1'],random_forest_results['f1'],voting_results['f1']]
              masks = [load_object("softmax_mask_"+str(CN),DFS_results_path),
                        load_object("average_mask_"+str(CN),DFS_results_path),
                        load_object("single_agent_mask_"+str(CN),DFS_results_path),
                        load_object("random_forest_mask_"+str(CN),DFS_results_path),
                        load_object("voting_mask_"+str(CN),DFS_results_path)]
              selected = masks[f1_score_for_all_algorithm.index(max(f1_score_for_all_algorithm))]
              unselected_feautres = np.where(selected != 1)[0]
          else:
            if feature_selection[1] == "random_forest":
              selected = random_forest_feature_selection(X_train, y_train)
              unselected_feautres = np.where(selected != 1)[0]
            elif feature_selection[1] == "DFS_feature_selection":
              # selected = dynamic_feature_selection(X_train, y_train)
              softmax_results = load_object('softmax_results',DFS_results_path)
              average_results = load_object('average_results',DFS_results_path)
              single_agent_results = load_object('single_agent_results',DFS_results_path)
              random_forest_results = load_object('random_forest_results',DFS_results_path)
              voting_results = load_object('voting_results',DFS_results_path)
              f1_score_for_all_algorithm = [softmax_results['f1'][CN],
                                            average_results['f1'][CN],
                                            single_agent_results['f1'][CN],
                                            random_forest_results['f1'][CN],
                                            voting_results['f1'][CN]]
              masks = [load_object("softmax_mask_"+str(CN),DFS_results_path),
                        load_object("average_mask_"+str(CN),DFS_results_path),
                        load_object("single_agent_mask_"+str(CN),DFS_results_path),
                        load_object("random_forest_mask_"+str(CN),DFS_results_path),
                        load_object("voting_mask_"+str(CN),DFS_results_path)]
              selected = masks[f1_score_for_all_algorithm.index(max(f1_score_for_all_algorithm))]
              unselected_feautres = np.where(selected != 1)[0]
          print("selected : ",sum(selected))
          if not os.path.exists(result_save_path_data):
            os.mkdir(result_save_path_data)
          if transfer_learning:
            temp = np.squeeze(X_train[:, selected]) if len(list(X_train[:, selected].shape))>2 else X_train[:, selected]
            print(selected)
            print("3- X_trian[{}] {} shape {}".format(0,temp[0],temp[0].shape))
            # temp = np.squeeze(X_train) if len(list(X_train.shape))>2 else X_train
            ensemble, drift_location = E2SC4ID_STREAM(ensemble=ensemble, stremdata=temp, y=y_train, unselected_features=None, drift_location=drift_location,
                                                      chunk_number=chunk_number, result_save_path_data=result_save_path_data,key=key,sample_number=sample_number)
            temp = np.squeeze(X_test[:, selected]) if len(list(X_test[:, selected].shape))>2 else X_test[:, selected]
            # temp = np.squeeze(X_test) if len(list(X_test.shape))>2 else X_test

            ensemble.evaluate(temp, y_test, chunk_number)
          else:
            ensemble, drift_location = E2SC4ID_STREAM(ensemble=generate_model(number_of_hidden_neurons=X.shape[1]*3 // 2,apply_model_replacement=apply_model_replacement),
                                                      stremdata=X_train, y=y_train, unselected_features=unselected_feautres, drift_location=drift_location,
                                                      chunk_number=chunk_number,result_save_path_data=result_save_path_data,key=key,sample_number=sample_number)
            ensemble.evaluate(X_test, y_test, chunk_number)
          save_pickle(drift_location, os.path.join(result_save_path_data, "{}_drift_location.pkl".format(key)))

          temp = np.squeeze(X_test[:, selected]) if len(list(X_test[:, selected].shape))>2 else X_test[:, selected]
          y_pre = ensemble.predict(temp)
          results[key][chunk_number] = {"y_true" : y_test, "y_pred": y_pre}
          results[key]['model_result'].append(ensemble.scores)
          if transfer_learning:
             ensemble.fit(temp, y_test, None)
          chunk_number += 1
          drift_locations_in_all_dataset[key] = drift_location

          save_pickle(ensemble, os.path.join(result_save_path_data, "{}_ensemble.pkl".format(key)))
          save_pickle(results, os.path.join(result_save_path_data, "{}_results.pkl".format(key)))
          save_pickle(drift_locations_in_all_dataset, os.path.join(result_save_path_data, "{}_drift_locations_in_all_dataset.pkl".format(key)))

In [None]:
data_name = ['kddcup99','ISCX2012','CSE-CIC2018','CICIDS2017','7recurrentDrift', 'blip', 'incrementalDrift','7gradualDrift', '7suddenDrift']

In [None]:
# # use_know = False
# # set_use_know(use_know)
# # path = os.path.join(results_path,'faoselm_gp_combiner_with_model_replacement_evolving_random_forest')
# # os.makedirs(path, exist_ok=True)
# # for f_name,d_name in zip(filenames,data_name):
# #   main(f_name, generate_oselm_models, apply_model_replacement=True,transfer_learning=True,
# #        feature_selection=["feature_evolving",'random_forest'], result_save_path=path)

In [None]:
DATA_NUMBER=0
ChunkNumber=0
sample_number=sum([0])
use_know = False
set_use_know(use_know)
path = os.path.join(results_path,'faoselm_gp_combiner_with_model_replacement_evolving_random_forest')
os.makedirs(path, exist_ok=True)
f_name,d_name = filenames[DATA_NUMBER],data_name[DATA_NUMBER]
main(f_name, generate_oselm_models, apply_model_replacement=True,transfer_learning=True,
     feature_selection=["feature_evolving",'random_forest'], result_save_path=path,
     ChunkNumber=ChunkNumber,sample_number=sample_number)

  0%|          | 0/1 [00:00<?, ?it/s]



0it [00:00, ?it/s]

X_train shape (3000, 40) , X_test shape (4792, 40) 
random_forest
1- X_trian[0] [5.12100000e+03 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 7.28571429e-01 0.00000000e+00 3.33333333e-01 0.00000000e+00
 0.00000000e+00 1.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 2.17221135e-01 0.00000000e+00
 0.00000000e+00 1.56555773e-02 0.00000000e+00 7.00000000e-02
 0.00000000e+00 0.00000000e+00 0.00000000e+00 1.00000000e+00
 1.00000000e+00 7.00000000e-02 3.00000000e-02 6.36363636e-01
 3.13725490e-02 1.00000000e+00 7.00000000e-02 1.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00] shape (40,) 
2- X_trian[0] [5.12100000e+03 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 7.28571429e-01
 0.00000000e+00 3.33333333e-01 0.00000000e+00 1.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 1.56555

  0%|          | 0/2800 [00:00<?, ?it/s]

buffer size 2800 N 2800
drift True
len of y_trian 1959 number of ones 1556
    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0    66.10         0.994768        9          0.99848          0.99848      0.54s
{1: {'accuracy': 0.9935308848080133, 'precision': 0.9824789097988319, 'recall': 0.997364953886693, 'f1-score': 0.9898659692710036, 'auc': 0.994559080486413}}
    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0    39.40         0.996941       85         0.997697         0.997697      1.07s
X_train shape (3000, 40) , X_test shape (4832, 40) 
random_forest
1- X_trian[0] [4.67800000e+03 0.00000000e

  0%|          | 0/2800 [00:00<?, ?it/s]

buffer size 2800 N 2800
drift False
Clear bufer : 0
{1: {'accuracy': 0.9935308848080133, 'precision': 0.9824789097988319, 'recall': 0.997364953886693, 'f1-score': 0.9898659692710036, 'auc': 0.994559080486413}, 2: {'accuracy': 0.9449503311258278, 'precision': 0.8601841196777905, 'recall': 0.9848484848484849, 'f1-score': 0.9183046683046683, 'auc': 0.9557615990929207}, 3: {'accuracy': 0.2667493796526055, 'precision': 0.26983769161406673, 'recall': 0.7958776595744681, 'f1-score': 0.403030303030303, 'auc': 0.4118944120201272}}
    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0    35.90         0.985974        3         0.988024         0.988024      0.65s
X_train shape (3000, 40) , X_test shape (4876, 40) 
random_forest
1- X_trian[0] [3.49010000e+04 0.00000000e+00 0.00000000e+00 1.18119033e-06
 

  0%|          | 0/2800 [00:00<?, ?it/s]

KeyboardInterrupt: ignored

In [None]:
# without featuer selection
DATA_NUMBER=0
ChunkNumber=0
sample_number=sum([0])
use_know = False
set_use_know(use_know)
path = os.path.join(results_path,'faoselm_gp_combiner_with_model_replacement_evolving_random_forest')
os.makedirs(path, exist_ok=True)
f_name,d_name = filenames[DATA_NUMBER],data_name[DATA_NUMBER]
main(f_name, generate_oselm_models, apply_model_replacement=True,transfer_learning=True,
     feature_selection=["feature_evolving",'random_forest'], result_save_path=path,
     ChunkNumber=ChunkNumber,sample_number=sample_number)

  0%|          | 0/1 [00:00<?, ?it/s]



0it [00:00, ?it/s]

X_train shape (3000, 40) , X_test shape (4792, 40) 
random_forest
selected :  10
    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0   128.70                1       81                1                1      0.58s


  0%|          | 0/2800 [00:00<?, ?it/s]

buffer size 2800 N 2800
drift True
len of y_trian 1959 number of ones 1556
    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0    66.10          0.99499        9         0.996965         0.996965      0.73s
{1: {'accuracy': 0.994991652754591, 'precision': 0.987597911227154, 'recall': 0.9967061923583662, 'f1-score': 0.9921311475409835, 'auc': 0.9954514468206004}}
    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0    39.40         0.994549        8         0.997368         0.997368      1.00s
X_train shape (3000, 40) , X_test shape (4832, 40) 
random_forest
selected :  8


  0%|          | 0/2800 [00:00<?, ?it/s]

KeyboardInterrupt: ignored

In [None]:
# all featuers
DATA_NUMBER=0
ChunkNumber=0
sample_number=sum([0])
use_know = False
set_use_know(use_know)
path = os.path.join(results_path,'faoselm_gp_combiner_with_model_replacement_evolving_random_forest')
os.makedirs(path, exist_ok=True)
f_name,d_name = filenames[DATA_NUMBER],data_name[DATA_NUMBER]
main(f_name, generate_oselm_models, apply_model_replacement=True,transfer_learning=True,
     feature_selection=["feature_evolving",'random_forest'], result_save_path=path,
     ChunkNumber=ChunkNumber,sample_number=sample_number)

  0%|          | 0/1 [00:00<?, ?it/s]



0it [00:00, ?it/s]

X_train shape (3000, 40) , X_test shape (4792, 40) 
8
    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0   128.70                1       81                1                1      0.56s


  0%|          | 0/2800 [00:00<?, ?it/s]

buffer size 2800 N 2800
drift True
len of y_trian 1959 number of ones 1556
    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0    66.10         0.995833       73         0.996209         0.996209      0.51s
{1: {'accuracy': 0.991652754590985, 'precision': 0.9780077619663649, 'recall': 0.9960474308300395, 'f1-score': 0.9869451697127937, 'auc': 0.9928312902470295}}
    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0    39.40         0.998318       25         0.998682         0.998682      0.69s
X_train shape (3000, 40) , X_test shape (4832, 40) 
7


  0%|          | 0/2800 [00:00<?, ?it/s]

buffer size 2800 N 2800
drift True
len of y_trian 1959 number of ones 1579
    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0    43.60         0.996751       12         0.996979         0.996979      0.49s
{1: {'accuracy': 0.991652754590985, 'precision': 0.9780077619663649, 'recall': 0.9960474308300395, 'f1-score': 0.9869451697127937, 'auc': 0.9928312902470295}, 2: {'accuracy': 0.9981374172185431, 'precision': 1.0, 'recall': 0.9940711462450593, 'f1-score': 0.9970267591674926, 'auc': 0.9970355731225297}}
    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0    37.50         0.994044       83        

  0%|          | 0/2800 [00:00<?, ?it/s]

KeyboardInterrupt: ignored

In [None]:
DATA_NUMBER=1
ChunkNumber=0
sample_number=sum([0])
use_know = False
set_use_know(use_know)
path = os.path.join(results_path,'faoselm_gp_combiner_with_model_replacement_evolving_random_forest')
os.makedirs(path, exist_ok=True)
f_name,d_name = filenames[DATA_NUMBER],data_name[DATA_NUMBER]
main(f_name, generate_oselm_models, apply_model_replacement=True,transfer_learning=True,
     feature_selection=["feature_evolving",'random_forest'], result_save_path=path,
     ChunkNumber=ChunkNumber,sample_number=sample_number)

In [None]:
DATA_NUMBER=2
ChunkNumber=0
sample_number=sum([0])
use_know = False
set_use_know(use_know)
path = os.path.join(results_path,'faoselm_gp_combiner_with_model_replacement_evolving_random_forest')
os.makedirs(path, exist_ok=True)
f_name,d_name = filenames[DATA_NUMBER],data_name[DATA_NUMBER]
main(f_name, generate_oselm_models, apply_model_replacement=True,transfer_learning=True,
     feature_selection=["feature_evolving",'random_forest'], result_save_path=path,
     ChunkNumber=ChunkNumber,sample_number=sample_number)

In [None]:
DATA_NUMBER=3
ChunkNumber=0
sample_number=sum([0])
use_know = False
set_use_know(use_know)
path = os.path.join(results_path,'faoselm_gp_combiner_with_model_replacement_evolving_random_forest')
os.makedirs(path, exist_ok=True)
f_name,d_name = filenames[DATA_NUMBER],data_name[DATA_NUMBER]
main(f_name, generate_oselm_models, apply_model_replacement=True,transfer_learning=True,
     feature_selection=["feature_evolving",'random_forest'], result_save_path=path,
     ChunkNumber=ChunkNumber,sample_number=sample_number)

In [None]:
DATA_NUMBER=4
ChunkNumber=0
sample_number=sum([0])
use_know = False
set_use_know(use_know)
path = os.path.join(results_path,'faoselm_gp_combiner_with_model_replacement_evolving_random_forest')
os.makedirs(path, exist_ok=True)
f_name,d_name = filenames[DATA_NUMBER],data_name[DATA_NUMBER]
main(f_name, generate_oselm_models, apply_model_replacement=True,transfer_learning=True,
     feature_selection=["feature_evolving",'random_forest'], result_save_path=path,
     ChunkNumber=ChunkNumber,sample_number=sample_number)

In [None]:
DATA_NUMBER=5
ChunkNumber=0
sample_number=sum([0])
use_know = False
set_use_know(use_know)
path = os.path.join(results_path,'faoselm_gp_combiner_with_model_replacement_evolving_random_forest')
os.makedirs(path, exist_ok=True)
f_name,d_name = filenames[DATA_NUMBER],data_name[DATA_NUMBER]
main(f_name, generate_oselm_models, apply_model_replacement=True,transfer_learning=True,
     feature_selection=["feature_evolving",'random_forest'], result_save_path=path,
     ChunkNumber=ChunkNumber,sample_number=sample_number)

In [None]:
DATA_NUMBER=6
ChunkNumber=0
sample_number=sum([0])
use_know = False
set_use_know(use_know)
path = os.path.join(results_path,'faoselm_gp_combiner_with_model_replacement_evolving_random_forest')
os.makedirs(path, exist_ok=True)
f_name,d_name = filenames[DATA_NUMBER],data_name[DATA_NUMBER]
main(f_name, generate_oselm_models, apply_model_replacement=True,transfer_learning=True,
     feature_selection=["feature_evolving",'random_forest'], result_save_path=path,
     ChunkNumber=ChunkNumber,sample_number=sample_number)

In [None]:
DATA_NUMBER=7
ChunkNumber=0
sample_number=sum([0])
use_know = False
set_use_know(use_know)
path = os.path.join(results_path,'faoselm_gp_combiner_with_model_replacement_evolving_random_forest')
os.makedirs(path, exist_ok=True)
f_name,d_name = filenames[DATA_NUMBER],data_name[DATA_NUMBER]
main(f_name, generate_oselm_models, apply_model_replacement=True,transfer_learning=True,
     feature_selection=["feature_evolving",'random_forest'], result_save_path=path,
     ChunkNumber=ChunkNumber,sample_number=sample_number)

In [None]:
DATA_NUMBER=8
ChunkNumber=0
sample_number=sum([0])
use_know = False
set_use_know(use_know)
path = os.path.join(results_path,'faoselm_gp_combiner_with_model_replacement_evolving_random_forest')
os.makedirs(path, exist_ok=True)
f_name,d_name = filenames[DATA_NUMBER],data_name[DATA_NUMBER]
main(f_name, generate_oselm_models, apply_model_replacement=True,transfer_learning=True,
     feature_selection=["feature_evolving",'random_forest'], result_save_path=path,
     ChunkNumber=ChunkNumber,sample_number=sample_number)