In [16]:
!pip install -q scikit-multiflow

In [17]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from skmultiflow.drift_detection import ADWIN, DDM
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import OneHotEncoder
from matplotlib.colors import ListedColormap
from multiprocessing.pool import ThreadPool
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import roc_auc_score
from imblearn.over_sampling import SMOTE
from sklearn.utils import shuffle
from contextlib import suppress
from collections import Counter
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from random import shuffle
import seaborn as sns
from time import time
import pandas as pd
import numpy as np
import warnings
import scipy.io
import pickle
import sys
import gc
import os
import re

In [18]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [19]:
data_path = '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/data'
code_path = '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/Codes/Shared Codes'
results_path = '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/results'
feature_selection_results = '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/feature_selection_results'
feature_selection_results_evolving = '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/feature_selection_results_evolving'

In [20]:
sys.path.insert(0,code_path)
from genetic_programming import SymbolicRegressor
from binirizer import CustomLabelBinirizer
from ensemble import Ensemble, Classifier
from oselm import OSELMClassifier,set_use_know
from DynamicFeatureSelection import dynamic_feature_selection
from SharedFunctions import prepare_data,train_and_test,feature_evolving,save_pickle,load_pickle,save_object,load_object,generate_new_samples

In [21]:
filenames = ['kddcup99_csv.csv','ISCX2012.csv','CSE-CIC2018.csv','CICIDS2017.csv','7recurrentDrift.csv', 'blip.csv', 'incrementalDrift.csv',
             '7gradualDrift.csv', '7suddenDrift.csv']
filenames = list(map(lambda x: os.path.join(data_path, x), filenames))
filenames

['/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/data/kddcup99_csv.csv',
 '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/data/ISCX2012.csv',
 '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/data/CSE-CIC2018.csv',
 '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/data/CICIDS2017.csv',
 '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/data/7recurrentDrift.csv',
 '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/data/blip.csv',
 '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/data/incrementalDrift.csv',
 '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/data/7gradualDrift.csv',
 '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/data/7suddenDrift.csv']

In [22]:
def genetic_programming():
    return SymbolicRegressor(population_size=10,
            generations=5, stopping_criteria=0.85,
            p_crossover=0.7, p_subtree_mutation=0.1,
            p_hoist_mutation=0.05, p_point_mutation=0.1,
            max_samples=0.7, verbose=1,
            parsimony_coefficient=1e-4, random_state=42,
            function_set=['avg2', 'avg3', 'avg5',
                          'median3', 'median5', 'maximum2', 'maximum3', 'maximum5'],
            metric='f1-score')

In [23]:
def generate_oselm_models(number_of_hidden_neurons, apply_model_replacement=False):
    models= [OSELMClassifier(number_of_hidden_neurons, 'relu', binarizer=CustomLabelBinirizer(), random_state=42),
             OSELMClassifier(number_of_hidden_neurons, 'relu', binarizer=CustomLabelBinirizer(), random_state=42),
             OSELMClassifier(number_of_hidden_neurons, 'relu', binarizer=CustomLabelBinirizer(), random_state=42),
             OSELMClassifier(number_of_hidden_neurons, 'relu', binarizer=CustomLabelBinirizer(), random_state=42),
             ]

    ensemble = Ensemble(classifiers=models, program=genetic_programming(), apply_model_replacement=apply_model_replacement)
    return ensemble

def generate_ml_models(number_of_hidden_neurons, apply_model_replacement=False):
    models = [
              KNeighborsClassifier(5),
              KNeighborsClassifier(5),
              # DecisionTreeClassifier(),
              LogisticRegression(),
              LogisticRegression(),
              GaussianNB(),
              GaussianNB(),
              GaussianNB(),
              ]
    ensemble = Ensemble(classifiers=models, program=genetic_programming(), apply_model_replacement=apply_model_replacement)
    return ensemble

In [24]:
def concept_drift_detection(drift_detection_obj, sample) -> bool:
    """
    Detect concept drift
    :param drift_detection_obj: sklearn drift detection object (ADWIN, DDM, )
    :param smaple : new instanece of data stream
    return True if concept drift was detected otherwise false
    """
    drift_detection_obj.add_element(sample)
    return drift_detection_obj.detected_change()

In [25]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectFromModel
def random_forest_feature_selection(X, y):
    """
    return best feature from X using random forest
    """
    sel = SelectFromModel(RandomForestClassifier(n_estimators = 20))
    sel.fit(X, y)
    return sel.get_support()

In [26]:
def E2SC4ID (X,
             y,
             sample_index:int,
             buffer:list,
             ensemble: Ensemble,
             drift_detection_obj,
             maxC=8,
             n=2000,
             train_size=0.5,
             drift=False,
             unselected_features=None,
             drift_location={}):
    y_pred = ensemble.global_support_degree(X)
    # if the sample is labeled then insert it into buffer
    if y is not None:
        buffer.append((X, y))
        actual_drift = concept_drift_detection(drift_detection_obj, int(y!=y_pred))
        if actual_drift and not drift:
          drift_location[sample_index] = 'drift'
        drift = drift or actual_drift
        if len(buffer) >= n:
            if drift:
                drift = False
                drift_detection_obj.reset()
                x_buffer, y_buffer = [], []
                for tup in buffer:
                    x_buffer.append(tup[0])
                    y_buffer.append(tup[1])
                ######################################################
                train_size = int(len(x_buffer)*train_size)
                X_train = x_buffer[:train_size]
                y_train = y_buffer[:train_size]
                X_valid = x_buffer[train_size:]
                y_valid = y_buffer[train_size:]
                ######################################################
                __sum = np.array(y_train).sum()
                if 0 ==  __sum or __sum == len(y_train):
                  y_train[0] = 0 if y_train[0] == 1 else 1
                new_models = ensemble.classifier_induction([
                                        model.clf for model in ensemble.classifiers],
                                        X_train,
                                        y_train,
                                        unselected_features)
                if len(ensemble.classifiers) > maxC:
                    ensemble.model_replacement('time')
                ######################################################
                ensemble.update_program(X_valid, y_valid)
            else:
                buffer.clear()
        return ensemble, buffer, drift, drift_location

In [27]:
def E2SC4ID_STREAM(ensemble, stremdata, y, unselected_features, drift_location, chunk_number,result_save_path_data,key,sample_number=0):
    if not ensemble.fitted:
      y[0] = 0 if y[0] == 1 else 1
      ensemble.fit(stremdata[:200], y[:200])

    drift_detection_obj, drift, buffer = DDM(),False,[]
    if sample_number != 0 :
      drift_location = load_pickle(os.path.join(result_save_path_data, "{}_drift_location.pkl".format(key)))
      drift = load_pickle(os.path.join(result_save_path_data, "{}_drift.pkl".format(key)))
      buffer = load_pickle(os.path.join(result_save_path_data, "{}_buffer.pkl".format(key)))
      ensemble = load_pickle(os.path.join(result_save_path_data, "{}_ensemble.pkl".format(key)))

    for i in tqdm(range(200+sample_number, len(stremdata))):
        X, y_true = stremdata[i], y[i]
        ensemble, buffer, drift, drift_location = E2SC4ID (X,
                                                           y_true,
                                                           sample_index=(i +(chunk_number * 10000)),
                                                           buffer=buffer,
                                                           ensemble=ensemble,
                                                           drift_detection_obj=drift_detection_obj,
                                                           maxC=8,
                                                           n=len(stremdata)-200,
                                                           train_size=0.7,
                                                           drift=drift,
                                                           unselected_features=unselected_features,
                                                           drift_location=drift_location)
        save_pickle(ensemble, os.path.join(result_save_path_data, "{}_ensemble.pkl".format(key)))
        save_pickle(drift_location, os.path.join(result_save_path_data, "{}_drift_location.pkl".format(key)))
        save_pickle(drift, os.path.join(result_save_path_data, "{}_drift.pkl".format(key)))
        save_pickle(buffer, os.path.join(result_save_path_data, "{}_buffer.pkl".format(key)))
    return ensemble, drift_location

In [28]:
def main(f_name, generate_model, train_size=3000,apply_model_replacement=False, transfer_learning=False, feature_selection="random_forest", result_save_path="",ChunkNumber=0,sample_number=0,DFS_results_path=''):
  """
  datasets_paths: list of paths (absolute path for each dataset)
  generate_model: function to generate machine learning model.
  train_size: number of samples to be used in training phase.
  transfer_learning: to determine using of transfer learning in the training phase.
  """
  # load the dataset and then process it
  datasets = {}
  first_chunk=True
  d = prepare_data(f_name)
  d = d.sample(frac=1, random_state=42)
  buffer = d.sample(n=5000)
  d.reset_index(inplace=True)
  d.replace([np.inf], 0, inplace=True)
  datasets[f_name.split('/')[-1]] = d
  results = {}
  drift_locations_in_all_dataset = {}
  ensemble = None ####
  ########################################
  for key in tqdm(datasets.keys()):
      result_save_path_data = os.path.join(result_save_path, key)
      drift_location = {} if ChunkNumber==0 else load_pickle(os.path.join(result_save_path_data, "{}_drift_location.pkl".format(key)))
      results[key] = {'model_result': []}
      # convert dataset from dataframe to numpy array.
      data = datasets[key].values
      # split the data into features array and target array.
      X, Y = data[:, 0:-1], data[:, -1].astype('int')
      if not os.path.exists("{}_evolving_matrix.pkl".format(key)):
        a2 = np.random.randint(low=0, high=X.shape[1], size = X.shape[1] // 6).tolist()
        a3 = np.random.randint(low=0, high=X.shape[1], size = X.shape[1] // 5).tolist()
        a4 = np.random.randint(low=0, high=X.shape[1], size = X.shape[1] // 4).tolist()
        evolving_matrix = [a2, a3, a4]
        save_pickle(evolving_matrix, "{}_evolving_matrix.pkl".format(key))
      else:
        evolving_matrix = load_pickle("{}_evolving_matrix.pkl".format(key))
      ensemble = generate_model(number_of_hidden_neurons=X.shape[1]*3 // 2, apply_model_replacement=apply_model_replacement)
      # split the data into chunks (10 chunks)
      chunks_features = np.array_split(X, 10)
      chunks_labels = np.array_split(Y, 10)

      ################# train on each chunk ####################
      print("===================== dataset : {} ======================".format(key))
      chunk_number = 1
      if ChunkNumber>0:
        ensemble = load_pickle(os.path.join(result_save_path_data, "{}_ensemble.pkl".format(key)))
        results = load_pickle(os.path.join(result_save_path_data, "{}_results.pkl".format(key)))
        drift_locations_in_all_dataset = load_pickle(os.path.join(result_save_path_data, "{}_drift_locations_in_all_dataset.pkl".format(key)))
      for CN,chunk_X, chunk_Y in tqdm(zip([*range(len(chunks_labels))],chunks_features, chunks_labels)):
          if ChunkNumber > CN:
            print("Skip Chunk Number : {}".format(CN))
            continue
          if not first_chunk:
            sample_number=0
            first_chunk = False
          else:first_chunk = False
          try:
            chunk_X, chunk_Y = SMOTE().fit_resample(chunk_X, chunk_Y)
          except:
            if chunk_Y.sum() in [0, 1]:
              new_samples, new_labels = generate_new_samples(buffer, chunk_Y)
              chunk_X = np.concatenate((chunk_X, new_samples))
              chunk_Y = np.concatenate((chunk_Y, new_labels))
          gc.collect()
          unselected_feautres = None
          selected = None
          X_train, X_test, y_train, y_test = chunk_X[:train_size], chunk_X[train_size:], chunk_Y[:train_size], chunk_Y[train_size:]
          if feature_selection[0] == "feature_evolving":
            if feature_selection[1] == "random_forest":
              unselected_feautres = feature_evolving(evolving_matrix=evolving_matrix)
              X_train = np.delete(X_train, unselected_feautres, 1)
              X_test = np.delete(X_test, unselected_feautres, 1)
              selected = random_forest_feature_selection(X_train, y_train)
              unselected_feautres = np.where(selected != 1)[0]
            elif feature_selection[1] == "DFS_feature_selection":
              # selected = dynamic_feature_selection(chunk_X, chunk_Y)
              softmax_results = load_object('softmax_results',DFS_results_path)
              average_results = load_object('average_results',DFS_results_path)
              single_agent_results = load_object('single_agent_results',DFS_results_path)
              random_forest_results = load_object('random_forest_results',DFS_results_path)
              voting_results = load_object('voting_results',DFS_results_path)
              f1_score_for_all_algorithm = [softmax_results['f1'],average_results['f1'],single_agent_results['f1'],random_forest_results['f1'],voting_results['f1']]
              masks = [load_object("softmax_mask_"+str(CN),DFS_results_path),
                        load_object("average_mask_"+str(CN),DFS_results_path),
                        load_object("single_agent_mask_"+str(CN),DFS_results_path),
                        load_object("random_forest_mask_"+str(CN),DFS_results_path),
                        load_object("voting_mask_"+str(CN),DFS_results_path)]
              selected = masks[f1_score_for_all_algorithm.index(max(f1_score_for_all_algorithm))]
              unselected_feautres = np.where(selected != 1)[0]
          else:
            if feature_selection[1] == "random_forest":
              selected = random_forest_feature_selection(X_train, y_train)
              unselected_feautres = np.where(selected != 1)[0]
            elif feature_selection[1] == "DFS_feature_selection":
              # selected = dynamic_feature_selection(X_train, y_train)
              softmax_results = load_object('softmax_results',DFS_results_path)
              average_results = load_object('average_results',DFS_results_path)
              single_agent_results = load_object('single_agent_results',DFS_results_path)
              random_forest_results = load_object('random_forest_results',DFS_results_path)
              voting_results = load_object('voting_results',DFS_results_path)
              f1_score_for_all_algorithm = [softmax_results['f1'][CN],average_results['f1'][CN],single_agent_results['f1'][CN],random_forest_results['f1'][CN],voting_results['f1'][CN]]
              masks = [load_object("softmax_mask_"+str(CN),DFS_results_path),
                        load_object("average_mask_"+str(CN),DFS_results_path),
                        load_object("single_agent_mask_"+str(CN),DFS_results_path),
                        load_object("random_forest_mask_"+str(CN),DFS_results_path),
                        load_object("voting_mask_"+str(CN),DFS_results_path)]
              selected = masks[f1_score_for_all_algorithm.index(max(f1_score_for_all_algorithm))]
              unselected_feautres = np.where(selected != 1)[0]

          if not os.path.exists(result_save_path_data):
            os.mkdir(result_save_path_data)
          if transfer_learning:
            temp = np.squeeze(X_train[:, selected]) if len(list(X_train[:, selected].shape))>2 else X_train[:, selected]
            ensemble, drift_location = E2SC4ID_STREAM(ensemble=ensemble, stremdata=temp, y=y_train, unselected_features=None, drift_location=drift_location,
                                                      chunk_number=chunk_number, result_save_path_data=result_save_path_data,key=key,sample_number=sample_number)
            temp = np.squeeze(X_test[:, selected]) if len(list(X_test[:, selected].shape))>2 else X_test[:, selected]
            ensemble.evaluate(temp, y_test, chunk_number)
          else:
            ensemble, drift_location = E2SC4ID_STREAM(ensemble=generate_model(number_of_hidden_neurons=X.shape[1]*3 // 2,apply_model_replacement=apply_model_replacement),
                                                      stremdata=X_train, y=y_train, unselected_features=unselected_feautres, drift_location=drift_location,
                                                      chunk_number=chunk_number,result_save_path_data=result_save_path_data,key=key,sample_number=sample_number)
            ensemble.evaluate(X_test, y_test, chunk_number)
          save_pickle(drift_location, os.path.join(result_save_path_data, "{}_drift_location.pkl".format(key)))

          temp = np.squeeze(X_test[:, selected]) if len(list(X_test[:, selected].shape))>2 else X_test[:, selected]
          y_pre = ensemble.predict(temp)
          results[key][chunk_number] = {"y_true" : y_test, "y_pred": y_pre}
          results[key]['model_result'].append(ensemble.scores)
          if transfer_learning:
             ensemble.fit(temp, y_test, None)
          chunk_number += 1
          drift_locations_in_all_dataset[key] = drift_location

          save_pickle(ensemble, os.path.join(result_save_path_data, "{}_ensemble.pkl".format(key)))
          save_pickle(results, os.path.join(result_save_path_data, "{}_results.pkl".format(key)))
          save_pickle(drift_locations_in_all_dataset, os.path.join(result_save_path_data, "{}_drift_locations_in_all_dataset.pkl".format(key)))

In [29]:
data_name = ['kddcup99','ISCX2012','CSE-CIC2018','CICIDS2017','7recurrentDrift', 'blip', 'incrementalDrift','7gradualDrift', '7suddenDrift']

In [None]:
# # use_know = True
# # set_use_know(use_know)
# # path = os.path.join(results_path,'kpfaoselm_gp_combiner_with_model_replacement_evolving_DFS')
# # os.makedirs(path, exist_ok=True)
# # for f_name,d_name in zip(filenames,data_name):
# #   DFS_results_path = os.path.join(feature_selection_results_evolving,d_name)
# #   main(f_name, generate_oselm_models, apply_model_replacement=True,transfer_learning=True,
# #        feature_selection=["feature_evolving",'DFS_feature_selection'], result_save_path=path,DFS_results_path=DFS_results_path)

In [31]:
DATA_NUMBER=0
ChunkNumber=0
sample_number=sum([0])
use_know = True
set_use_know(use_know)
path = os.path.join(results_path,'kpfaoselm_gp_combiner_with_model_replacement_evolving_DFS')
os.makedirs(path, exist_ok=True)
f_name,d_name = filenames[DATA_NUMBER],data_name[DATA_NUMBER]
DFS_results_path = os.path.join(feature_selection_results_evolving,d_name)
main(f_name, generate_oselm_models, apply_model_replacement=True,transfer_learning=True, feature_selection=["feature_evolving",'DFS_feature_selection'],
     result_save_path=path,DFS_results_path=DFS_results_path,ChunkNumber=ChunkNumber,sample_number=sample_number)

  0%|          | 0/1 [00:00<?, ?it/s]



0it [00:00, ?it/s]

    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0   128.70         0.895028       81         0.895028         0.895028      0.51s


  0%|          | 0/2800 [00:00<?, ?it/s]

{1: {'accuracy': 0.3167779632721202, 'precision': 0.3167779632721202, 'recall': 1.0, 'f1-score': 0.4811410459587956, 'auc': 0.5}}
    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0    66.50                0       79                0                0      0.80s
   1   691.70                0     3104                0                0      3.29s
   2   148.20                0      529                0                0      0.59s
   3   611.10                0        9                0                0      1.02s
   4   186.90                0      994                0                0      0.00s


  0%|          | 0/2800 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


{1: {'accuracy': 0.3167779632721202, 'precision': 0.3167779632721202, 'recall': 1.0, 'f1-score': 0.4811410459587956, 'auc': 0.5}, 2: {'accuracy': 0.6858443708609272, 'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'auc': 0.5}}
    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0    47.80                0       95                0                0      0.66s
   1   530.20                0     1942                0                0      2.64s
   2   180.50                0      543                0                0      0.66s
   3   583.40                0        6                0                0      1.04s
   4   193.80                0     1236                0                0      0.00s


  0%|          | 0/2800 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


{1: {'accuracy': 0.3167779632721202, 'precision': 0.3167779632721202, 'recall': 1.0, 'f1-score': 0.4811410459587956, 'auc': 0.5}, 2: {'accuracy': 0.6858443708609272, 'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'auc': 0.5}, 3: {'accuracy': 0.6889991728701406, 'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'auc': 0.5}}
    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0    30.80        0.0440387        9         0.208297         0.208297      0.81s
   1   778.40       0.00356959      142       0.00397088       0.00397088      6.27s
   2   155.70        0.0440392       14         0.208297         0.208297      0.77s
   3   660.60        0.0234228        6         0.206469         0.206469      1.10s
   4   152.00        0.0228125        4         0.208297         0.208297      0.00s


  0%|          | 0/2800 [00:00<?, ?it/s]

{1: {'accuracy': 0.3167779632721202, 'precision': 0.3167779632721202, 'recall': 1.0, 'f1-score': 0.4811410459587956, 'auc': 0.5}, 2: {'accuracy': 0.6858443708609272, 'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'auc': 0.5}, 3: {'accuracy': 0.6889991728701406, 'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'auc': 0.5}, 4: {'accuracy': 0.6681706316652994, 'precision': 0.3520710059171598, 'recall': 0.07839262187088274, 'f1-score': 0.12823275862068964, 'auc': 0.5065876152832676}}
    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0    41.80                0       92                0                0      0.79s
   1   563.10                0     2629                0                0      2.77s
   2   615.30                0      628                0                0      2.12s
   3   488.

  0%|          | 0/2800 [00:00<?, ?it/s]

{1: {'accuracy': 0.3167779632721202, 'precision': 0.3167779632721202, 'recall': 1.0, 'f1-score': 0.4811410459587956, 'auc': 0.5}, 2: {'accuracy': 0.6858443708609272, 'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'auc': 0.5}, 3: {'accuracy': 0.6889991728701406, 'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'auc': 0.5}, 4: {'accuracy': 0.6681706316652994, 'precision': 0.3520710059171598, 'recall': 0.07839262187088274, 'f1-score': 0.12823275862068964, 'auc': 0.5065876152832676}, 5: {'accuracy': 0.6571899464359291, 'precision': 0.26453488372093026, 'recall': 0.06058588548601864, 'f1-score': 0.09859154929577466, 'auc': 0.49255427925852246}}
    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0    34.00       0.00381026        3       0.00656168       0.00656168      0.58s
   1   508.80     

  0%|          | 0/2800 [00:00<?, ?it/s]

{1: {'accuracy': 0.3167779632721202, 'precision': 0.3167779632721202, 'recall': 1.0, 'f1-score': 0.4811410459587956, 'auc': 0.5}, 2: {'accuracy': 0.6858443708609272, 'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'auc': 0.5}, 3: {'accuracy': 0.6889991728701406, 'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'auc': 0.5}, 4: {'accuracy': 0.6681706316652994, 'precision': 0.3520710059171598, 'recall': 0.07839262187088274, 'f1-score': 0.12823275862068964, 'auc': 0.5065876152832676}, 5: {'accuracy': 0.6571899464359291, 'precision': 0.26453488372093026, 'recall': 0.06058588548601864, 'f1-score': 0.09859154929577466, 'auc': 0.49255427925852246}, 6: {'accuracy': 0.6507323026851098, 'precision': 0.29894736842105263, 'recall': 0.09305373525557012, 'f1-score': 0.14192903548225885, 'auc': 0.49741182337999745}}
    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fi

  0%|          | 0/2800 [00:00<?, ?it/s]

KeyboardInterrupt: ignored

In [None]:
DATA_NUMBER=1
ChunkNumber=0
sample_number=sum([0])
use_know = True
set_use_know(use_know)
path = os.path.join(results_path,'kpfaoselm_gp_combiner_with_model_replacement_evolving_DFS')
os.makedirs(path, exist_ok=True)
f_name,d_name = filenames[DATA_NUMBER],data_name[DATA_NUMBER]
DFS_results_path = os.path.join(feature_selection_results_evolving,d_name)
main(f_name, generate_oselm_models, apply_model_replacement=True,transfer_learning=True, feature_selection=["feature_evolving",'DFS_feature_selection'],
     result_save_path=path,DFS_results_path=DFS_results_path,ChunkNumber=ChunkNumber,sample_number=sample_number)

In [None]:
DATA_NUMBER=2
ChunkNumber=0
sample_number=sum([0])
use_know = True
set_use_know(use_know)
path = os.path.join(results_path,'kpfaoselm_gp_combiner_with_model_replacement_evolving_DFS')
os.makedirs(path, exist_ok=True)
f_name,d_name = filenames[DATA_NUMBER],data_name[DATA_NUMBER]
DFS_results_path = os.path.join(feature_selection_results_evolving,d_name)
main(f_name, generate_oselm_models, apply_model_replacement=True,transfer_learning=True, feature_selection=["feature_evolving",'DFS_feature_selection'],
     result_save_path=path,DFS_results_path=DFS_results_path,ChunkNumber=ChunkNumber,sample_number=sample_number)

In [None]:
DATA_NUMBER=3
ChunkNumber=0
sample_number=sum([0])
use_know = True
set_use_know(use_know)
path = os.path.join(results_path,'kpfaoselm_gp_combiner_with_model_replacement_evolving_DFS')
os.makedirs(path, exist_ok=True)
f_name,d_name = filenames[DATA_NUMBER],data_name[DATA_NUMBER]
DFS_results_path = os.path.join(feature_selection_results_evolving,d_name)
main(f_name, generate_oselm_models, apply_model_replacement=True,transfer_learning=True, feature_selection=["feature_evolving",'DFS_feature_selection'],
     result_save_path=path,DFS_results_path=DFS_results_path,ChunkNumber=ChunkNumber,sample_number=sample_number)

In [None]:
DATA_NUMBER=4
ChunkNumber=0
sample_number=sum([0])
use_know = True
set_use_know(use_know)
path = os.path.join(results_path,'kpfaoselm_gp_combiner_with_model_replacement_evolving_DFS')
os.makedirs(path, exist_ok=True)
f_name,d_name = filenames[DATA_NUMBER],data_name[DATA_NUMBER]
DFS_results_path = os.path.join(feature_selection_results_evolving,d_name)
main(f_name, generate_oselm_models, apply_model_replacement=True,transfer_learning=True, feature_selection=["feature_evolving",'DFS_feature_selection'],
     result_save_path=path,DFS_results_path=DFS_results_path,ChunkNumber=ChunkNumber,sample_number=sample_number)

In [None]:
DATA_NUMBER=5
ChunkNumber=0
sample_number=sum([0])
use_know = True
set_use_know(use_know)
path = os.path.join(results_path,'kpfaoselm_gp_combiner_with_model_replacement_evolving_DFS')
os.makedirs(path, exist_ok=True)
f_name,d_name = filenames[DATA_NUMBER],data_name[DATA_NUMBER]
DFS_results_path = os.path.join(feature_selection_results_evolving,d_name)
main(f_name, generate_oselm_models, apply_model_replacement=True,transfer_learning=True, feature_selection=["feature_evolving",'DFS_feature_selection'],
     result_save_path=path,DFS_results_path=DFS_results_path,ChunkNumber=ChunkNumber,sample_number=sample_number)

In [None]:
DATA_NUMBER=6
ChunkNumber=0
sample_number=sum([0])
use_know = True
set_use_know(use_know)
path = os.path.join(results_path,'kpfaoselm_gp_combiner_with_model_replacement_evolving_DFS')
os.makedirs(path, exist_ok=True)
f_name,d_name = filenames[DATA_NUMBER],data_name[DATA_NUMBER]
DFS_results_path = os.path.join(feature_selection_results_evolving,d_name)
main(f_name, generate_oselm_models, apply_model_replacement=True,transfer_learning=True, feature_selection=["feature_evolving",'DFS_feature_selection'],
     result_save_path=path,DFS_results_path=DFS_results_path,ChunkNumber=ChunkNumber,sample_number=sample_number)

In [None]:
DATA_NUMBER=7
ChunkNumber=0
sample_number=sum([0])
use_know = True
set_use_know(use_know)
path = os.path.join(results_path,'kpfaoselm_gp_combiner_with_model_replacement_evolving_DFS')
os.makedirs(path, exist_ok=True)
f_name,d_name = filenames[DATA_NUMBER],data_name[DATA_NUMBER]
DFS_results_path = os.path.join(feature_selection_results_evolving,d_name)
main(f_name, generate_oselm_models, apply_model_replacement=True,transfer_learning=True, feature_selection=["feature_evolving",'DFS_feature_selection'],
     result_save_path=path,DFS_results_path=DFS_results_path,ChunkNumber=ChunkNumber,sample_number=sample_number)

In [None]:
DATA_NUMBER=8
ChunkNumber=0
sample_number=sum([0])
use_know = True
set_use_know(use_know)
path = os.path.join(results_path,'kpfaoselm_gp_combiner_with_model_replacement_evolving_DFS')
os.makedirs(path, exist_ok=True)
f_name,d_name = filenames[DATA_NUMBER],data_name[DATA_NUMBER]
DFS_results_path = os.path.join(feature_selection_results_evolving,d_name)
main(f_name, generate_oselm_models, apply_model_replacement=True,transfer_learning=True, feature_selection=["feature_evolving",'DFS_feature_selection'],
     result_save_path=path,DFS_results_path=DFS_results_path,ChunkNumber=ChunkNumber,sample_number=sample_number)