In [1]:
from sklearn.metrics import recall_score, precision_score, f1_score, roc_auc_score,accuracy_score
from sklearn.ensemble import RandomForestClassifier,RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from multiprocessing.pool import ThreadPool
from sklearn.naive_bayes import GaussianNB
from collections import deque, defaultdict
from imblearn.over_sampling import SMOTE
from warnings import filterwarnings
from xgboost import XGBClassifier
from scipy.special import softmax
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from keras import backend as K
from typing import Callable
import tensorflow as tf
from sklearn import svm
from typing import List
import seaborn as sns
import pandas as pd
import numpy as np
import requests
import pickle
import random
import keras
import copy
import json
import sys
import os
import gc

sns.set(rc = {'figure.figsize':(22,12)}, style="whitegrid")

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
data_path = '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/data'
code_path = '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/codes'
results_path = '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/results'
feature_selection_results = '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/feature_selection_results'
feature_selection_results_evolving = '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/feature_selection_results_evolving'

In [4]:
sys.path.insert(0,code_path)
from genetic_programming import SymbolicRegressor
from binirizer import CustomLabelBinirizer
from ensemble import Ensemble, Classifier
from oselm import OSELMClassifier,set_use_know
from DynamicFeatureSelection import dynamic_feature_selection



# 1. Policy Function & DQN Architicture
* 1- epsilon greedy implementaion for make action
* 2- DQN model and compile

In [5]:
def epsilon_greedy(expected_reward, epsilon=0.97) -> int:
    """
    expected_reward: list of expected rewards for each possible action
    epsilon: .
    """
    if np.random.rand() <= epsilon:
        return np.random.choice(list(range(len(expected_reward))))
    else:
        return np.argmax(expected_reward)

PolicyFunction  = Callable[[np.ndarray, float], int]

In [6]:
LEARNING_RATE = 0.001

def create_model(input_dim):
    K.clear_session()
    model = keras.models.Sequential()
    model.add(keras.layers.Input(shape=(input_dim,)))
    model.add(keras.layers.Dense(32, kernel_initializer='he_uniform', activation='relu'))
    model.add(keras.layers.Dense(16, kernel_initializer='he_uniform', activation='relu'))
    model.add(keras.layers.Dense(2))
    model.compile(loss='mse', optimizer='adam')
    return model

## 2. Agents Implementaion

* Agents class will be the parent of 4 types of agents listed as follow:
    * 1- Softmax version (distrbute the total reward between agents using softmax function)
    * 2- Average version (distrbute the total reward between agents using average function)
    * 1- regression version (calcualte the contrbution of each agent using regression model)
    * 1- Single Agent version (only one agent at a time can make action and get the total reawrd as a result)

In [7]:
class Agents:
    def __init__(self, evaluation_network,number_of_featuer, buffer_size: int = 800):

        self.evaluation_network = evaluation_network
        self.target_network = copy.deepcopy(self.evaluation_network)
        self.buffer_size = buffer_size
        self.fitted = False
        self.number_of_featuer = number_of_featuer
        # reply buffer is a list of tuples each tuples contains the following
        # (St, At, St+1, Rt+1)
        # (Current state, Action was made, New state, Reward)
        self.reply_buffer = deque(maxlen=self.buffer_size)
        self.contrbution = np.random.rand()

    def make_action(self, curr_state: np.ndarray, policy_function: PolicyFunction, epsilon) -> int:
        # q_values represents the expected rewards for each possible action
        if self.fitted:
            q_values = self.evaluation_network.predict(curr_state.reshape(-1, self.number_of_featuer),verbose=0)
            action = policy_function(q_values, epsilon)
        else:
            action = policy_function([0, 1], 1)
        return action

    def update_target_network(self):
        self.target_network = copy.deepcopy(self.evaluation_network)
        return

In [8]:
class AgentsSoftmax(Agents):
    # class variable
    agent_count = 0
    def __init__(self, evaluation_network,number_of_featuer, buffer_size=800):
        self.agent_id = AgentsSoftmax.agent_count
        AgentsSoftmax.agent_count += 1
        super().__init__(evaluation_network,number_of_featuer, buffer_size)


    def update_evaluation_network(self, batch_size=32, epochs=5, discount_factor=0.995):
        # select random batch from the reply buffer
        batch = random.sample(self.reply_buffer, batch_size)

        # inintilize some lists to store transition information
        Q1, actions, Q2, rewards = [], [], [], []

        # from each transition extract its values
        for transition in batch:
            Q1.append(transition[0])
            actions.append(transition[1])
            Q2.append(transition[2])
            rewards.append(transition[3])
        # X_train will be the states from
        X_train = np.array(Q1).reshape(-1, self.number_of_featuer)

        expected_reward = self.evaluation_network.predict(np.array(Q1).reshape(-1, self.number_of_featuer),verbose=0)
        Q2 = self.target_network.predict(np.array(Q2).reshape(-1, self.number_of_featuer),verbose=0)

        # update expected rewards using biliman equation

        for i, act in enumerate(actions[:-1]):
            expected_reward[i, act] = rewards[i] + (discount_factor * np.argmax(Q2[i]))

        y_train = expected_reward.copy()

        # calculate the change frequency of the agent decision to use it as its contrbution in get total reward

        change_frequency = 0
        for state, next_state, reward, next_reward in zip(X_train[:-1], X_train[1:], rewards[: -1], rewards[1:]):
            #print(state, next_state, reward, next_reward)
            if np.abs(state[self.agent_id] - next_state[self.agent_id]) == 1:
                self.contrbution += np.abs(reward - next_reward)
                change_frequency += 1

        self.contrbution = 0 if change_frequency==0 else self.contrbution/change_frequency

        # train the DQN evaluation network.
        self.evaluation_network.fit(X_train, y_train, epochs=epochs, verbose=0)
        self.fitted = True
        return

In [9]:
class AgentsRegression(Agents):
    # class variable
    agent_count = 0
    def __init__(self, evaluation_network,number_of_featuer, buffer_size=800):
        self.agent_id = AgentsRegression.agent_count
        AgentsRegression.agent_count += 1
        super().__init__(evaluation_network,number_of_featuer, buffer_size)

    def update_evaluation_network(self, batch_size=32, epochs=5, discount_factor=0.995):
        # select random batch from the reply buffer
        batch = random.sample(self.reply_buffer, batch_size)

        # inintilize some lists to store transition information
        Q1, actions, Q2, rewards = [], [], [], []

        # from each transition extract its values
        for transition in batch:
            Q1.append(transition[0])
            actions.append(transition[1])
            Q2.append(transition[2])
            rewards.append(transition[3])
        # X_train will be the states from
        X_train = np.array(Q1).reshape(-1, self.number_of_featuer)

        expected_reward = self.evaluation_network.predict(np.array(Q1).reshape(-1, self.number_of_featuer),verbose=0)
        Q2 = self.target_network.predict(np.array(Q2).reshape(-1, self.number_of_featuer),verbose=0)

        # update expected rewards using biliman equation
        for i, act in enumerate(actions[:-1]):
            expected_reward[i, act] = rewards[i] + (discount_factor * np.argmax(Q2[i]))

        y_train = expected_reward.copy()
        # train the DQN evaluation network.
        self.evaluation_network.fit(X_train, y_train, epochs=epochs, verbose=0)
        self.fitted = True
        return

In [10]:
class AgentsAverage(Agents):
    # class variable
    agent_count = 0
    def __init__(self, evaluation_network,number_of_featuer, buffer_size=800):
        self.agent_id = AgentsAverage.agent_count
        AgentsAverage.agent_count += 1
        super().__init__(evaluation_network,number_of_featuer, buffer_size)

    def update_evaluation_network(self, batch_size=32, epochs=5, discount_factor=0.995):
        # select random batch from the reply buffer
        batch = random.sample(self.reply_buffer, batch_size)

        # inintilize some lists to store transition information
        Q1, actions, Q2, rewards = [], [], [], []

        # from each transition extract its values
        for transition in batch:
            Q1.append(transition[0])
            actions.append(transition[1])
            Q2.append(transition[2])
            rewards.append(transition[3])
        # X_train will be the states from
        X_train = np.array(Q1).reshape(-1, self.number_of_featuer)

        expected_reward = self.evaluation_network.predict(np.array(Q1).reshape(-1, self.number_of_featuer),verbose=0)
        Q2 = self.target_network.predict(np.array(Q2).reshape(-1, self.number_of_featuer),verbose=0)

        # update expected rewards using biliman equation

        for i, act in enumerate(actions[:-1]):
            expected_reward[i, act] = rewards[i] + (discount_factor * np.argmax(Q2[i]))

        y_train = expected_reward.copy()

        WINDOW_SIZE = 4
        X_train_ = np.zeros((X_train.shape[0] // WINDOW_SIZE, X_train.shape[1]))
        y_train_ = []
        j = 0
        for i in range(0, batch_size, WINDOW_SIZE):
            window_of_states = X_train[i: i + WINDOW_SIZE].sum(axis=0) / WINDOW_SIZE
            window_of_rewards = sum(rewards[i: i + WINDOW_SIZE])
            r = window_of_rewards * window_of_states[self.agent_id]
             # Rounding state
            X_train_[j, :] = np.around(window_of_states)
            if window_of_states[self.agent_id] == 0:
                if window_of_rewards > 0.6:
                    r = window_of_rewards
                else:
                    r = window_of_rewards / WINDOW_SIZE
            y_train_.append(r)
            j += 1

        X_train = X_train_
        y_train = np.array(y_train_)
        # train the DQN evaluation network.
        self.evaluation_network.fit(X_train, y_train, epochs=epochs, verbose=0)
        self.fitted = True
        return

In [11]:
class AgentsSingle(Agents):
    # class variable
    agent_count = 0
    def __init__(self, evaluation_network,number_of_featuer, buffer_size=800):
        self.agent_id = AgentsSingle.agent_count
        AgentsSingle.agent_count += 1
        super().__init__(evaluation_network,number_of_featuer, buffer_size)

    def update_evaluation_network(self, batch_size=32, epochs=5, discount_factor=0.995):
        # select random batch from the reply buffer
        batch = random.sample(self.reply_buffer, batch_size)

        # inintilize some lists to store transition information
        Q1, actions, Q2, rewards = [], [], [], []

        # from each transition extract its values
        for transition in batch:
            Q1.append(transition[0])
            actions.append(transition[1])
            Q2.append(transition[2])
            rewards.append(transition[3])
        # X_train will be the states from
        X_train = np.array(Q1).reshape(-1, self.number_of_featuer)

        expected_reward = self.evaluation_network.predict(np.array(Q1).reshape(-1, self.number_of_featuer),verbose=0)
        Q2 = self.target_network.predict(np.array(Q2).reshape(-1, self.number_of_featuer),verbose=0)

        # update expected rewards using biliman equation
        for i, act in enumerate(actions[:-1]):
            expected_reward[i, act] = rewards[i] + (discount_factor * np.argmax(Q2[i]))

        y_train = expected_reward.copy()
        # train the DQN evaluation network.
        self.evaluation_network.fit(X_train, y_train, epochs=epochs, verbose=0)
        self.fitted = True
        return

# 3. Reward Calculation Method
* 1- get accuracy of selected feature using logistic regression model
* 2- claculate the reward with reward_strategy function using accuracy from last step.

In [12]:
class Classifier:
    def __init__(self, clf, max_number_of_classes:int=2):
        """
        Wrapping sklearn classifiers
        clf: sklearn classifiers like (KNN, LogRegression, DecisionTree, etc...)
        max_number_of_classes: integer, number of unique values in the predicted variable.
        """
        self.clf = clf
        # decision profile contains the prediction probability values.
        self.decision_profile = None
        self.max_number_of_classes = max_number_of_classes


    # fit the classifier
    def fit(self, X_train, y_train, unselected_features=None):
        """
        Call the training function
        X_train: 2d array with shape num_of_samples x num_of_feautres.
        y_train: 1d array with shape (num_of_samples, ) contains the ground truth values.
        """
        # X_train = np.array(X_train) if not type(X_train).__module__ == np.__name__ else X_train
        # y_train = np.array(y_train) if not type(y_train).__module__ == np.__name__ else y_train


        if type(self.clf) == OSELMClassifier:
            self.clf.fit(X_train, y_train, unselected_features)
        else:
            # print("1234")
            self.clf.fit(X_train, y_train)
            # print(type(self.clf))

    def predict_proba(self, X):
        """
        predict the probability of belonging this `sample` to each class
        """
        # sometimes number of unique values in the predicted variable differ from one chunk to another,
        # so that we need to pad the results of probablity prediction to new size equal to `max_number_of_classes`

        pred = self.clf.predict_proba(X)
        return pred

    def build_decision_profile(self, sample):
        """
        add the predict_probability result to the `decision_profile` list
        sample: one example form the dataset
        """
        self.decision_profile = self.predict_proba(sample.reshape((1, -1)))[0].tolist()


class Ensemble:
    def __init__(self, classifiers, program, apply_model_replacement):

        """
        classfiers : list of Classifier objects
        program: result of genetic programming (SymbolicRegressor)
        """
        self.classifiers = classifiers
        self.program = program
        self.program_history = []
        self.fitted = False
        self.scores = {}
        self.apply_model_replacement = apply_model_replacement

    def fit(self, X_train, y_train, unselected_features=None):
        self.classifier_induction(self.classifiers, X_train, y_train, unselected_features=unselected_features)
        self.update_program(X_train, y_train)


    def classifier_induction(self, new_classifiers, X_train:np.array, y_train:np.array, unselected_features:list=None)->list:
        """
        new_classifiers: list of new classifiers to insert them into ensemble classifiers.
        X_train: training dataset .
        y_train: ground truth values.
        unselected_features: indices of unselected features at each chunk
        ----------------------------------------------------------------
        return new_classifiers after training.
        """
        # use classifier_induction_util for multiprocessing
        def classifier_induction_util(classifier):
            clf = Classifier(classifier, 2)
            clf.fit(X_train.copy(), y_train.copy(), unselected_features)
            return clf
        # train each new classifier in parallel
        trained_classifiers = ThreadPool(len(new_classifiers)).map(classifier_induction_util, new_classifiers)
        # add the trained classifiers to the ensemble classifiers.
        if self.apply_model_replacement:
          self.classifiers += trained_classifiers
        else:
          self.classifiers = trained_classifiers
        # return the trained classifiers (new classifiers after training)
        return trained_classifiers

    def model_replacement(self, criteria='best'):
        if criteria == 'best':
          pass
        elif criteria == 'time':
          self.classifiers = self.classifiers[3:]


    def global_support_degree(self, sample):
        for i,clf in enumerate(self.classifiers):
            if not isinstance(clf,Classifier):
              clf = Classifier(clf,2)
              self.classifiers[i] = clf
            clf.build_decision_profile(sample)
        profile = np.array([self.classifiers[i].decision_profile for i in range(len(self.classifiers))])
        return np.argmax(profile.sum(axis=0))

    def update_program(self, X, y):
        # change the fit flag to True.
        self.fitted = True
        profiles = np.array([self.classifiers[i].predict_proba(X) for i in range(len(self.classifiers))])
        self.program.fit(profiles, y)
        self.program_history.append(self.program)


    def predict(self, X_test):
        X_test = np.squeeze(X_test) if len(list(X_test.shape))>2 else X_test
        profiles = np.array([self.classifiers[i].predict_proba(X_test) for i in range(len(self.classifiers))])
        return self.program.predict(profiles)

    def evaluate(self, X_test, y_test, chunk_id=1):
        y_pred = self.predict(X_test)
        # accuracy_score, precision_score, recall_score, f1_score
        try:
          auc = roc_auc_score(y_test, y_pred)
        except:
          auc = 0.5
        self.scores[chunk_id] = {"accuracy": accuracy_score(y_test, y_pred),
                                 "precision": precision_score(y_test, y_pred),
                                 "recall": recall_score(y_test, y_pred),
                                 "f1-score": f1_score(y_test, y_pred),
                                 "auc": auc}

In [13]:
def genetic_programming():
    return SymbolicRegressor(population_size=10,
            generations=5, stopping_criteria=0.85,
            p_crossover=0.7, p_subtree_mutation=0.1,
            p_hoist_mutation=0.05, p_point_mutation=0.1,
            max_samples=0.7, verbose=1,
            parsimony_coefficient=1e-4, random_state=42,
            function_set=['avg2', 'avg3', 'avg5',
                          'median3', 'median5', 'maximum2', 'maximum3', 'maximum5'],
            metric='f1-score')

In [14]:
def generate_oselm_models(number_of_hidden_neurons, apply_model_replacement=False):
    models= [OSELMClassifier(number_of_hidden_neurons, 'relu', binarizer=CustomLabelBinirizer(), random_state=42),
             OSELMClassifier(number_of_hidden_neurons, 'relu', binarizer=CustomLabelBinirizer(), random_state=42),
             OSELMClassifier(number_of_hidden_neurons, 'relu', binarizer=CustomLabelBinirizer(), random_state=42),
             OSELMClassifier(number_of_hidden_neurons, 'relu', binarizer=CustomLabelBinirizer(), random_state=42),
             ]

    ensemble = Ensemble(classifiers=models, program=genetic_programming(), apply_model_replacement=apply_model_replacement)
    return ensemble

def generate_ml_models(number_of_hidden_neurons, apply_model_replacement=False):
    models = [
              KNeighborsClassifier(5),
              KNeighborsClassifier(5),
              # DecisionTreeClassifier(),
              LogisticRegression(),
              LogisticRegression(),
              GaussianNB(),
              GaussianNB(),
              GaussianNB(),
              ]
    ensemble = Ensemble(classifiers=models, program=genetic_programming(), apply_model_replacement=apply_model_replacement)
    return ensemble

In [15]:
# test set percentage
TESTSIZE=0.2
def get_reward(X,Y ,subset_features,apply_model_replacement=True):
    global TESTSIZE
    # index of selected features
    subset_features = np.where(np.array(subset_features) == 1)[0]
    if subset_features.shape[0] == 0:return 0
    # train test split
    X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=42, test_size=TESTSIZE)

    # classifier = LogisticRegression()

    ensemble1 = generate_oselm_models(number_of_hidden_neurons=X.shape[1]*3 // 2, apply_model_replacement=apply_model_replacement)

    ensemble1.fit(X_train, y_train,np.where(subset_features != 1)[0])

    y_pred1 = ensemble1.predict(X_test)

    acc1 = accuracy_score(y_test, y_pred1)

    return acc1

In [16]:
def reward_strategy(time_step: int, accuracy: float, accuracy_history: list, subset_features: list, error_rate: float,
                    beta: float = 0.99):

    if sum(subset_features) == len(subset_features):
        return -5

    elif sum(subset_features) == 0:
        return -10

    elif accuracy > max(accuracy_history):
        return 0.5

    elif accuracy < max(accuracy_history):
        return -0.1

    else:
        return -1 * (beta * error_rate + ((1 - beta) * (sum(subset_features) / len(subset_features))))

# 4. Some helper function
* 1- Object to store all information about the result like dataset name, accuracy, precision and etc...
* 2- Telegram api to send the result to a chat room
* 3- Send result to the chat room

In [17]:
class Results:
    def __init__(self, method_name, dataset_name, chunk_id, feature_space):
        self.dataset_name = dataset_name
        self.method_name = method_name
        self.chunk_id = chunk_id
        self.feature_space = feature_space
        self.feature_space_size = len(feature_space)
        self.result_information = {}

    def set_chunk_id(self, chunk_id: int):
        self.chunk_id = chunk_id

    def set_feature_space(self, feature_space: list):
        self.feature_space = feature_space
        self.feature_space_size = len(feature_space)

    def add_result(self, model_type:str, result:dict):
        self.result_information[model_type] = result

    def save(self, path='feature_selection_results'):
        file_name = self.method_name + '_' + self.dataset_name + '_' + '{}'.format(self.chunk_id) + '.pkl'
        with open(os.path.join(path, file_name), 'wb') as file_:
            pickle.dump(self, file_, pickle.HIGHEST_PROTOCOL)
        return

In [18]:
class Telegram:

    def __init__(self, bot_token):
        self.end_point = 'https://api.telegram.org/bot'
        self.token = bot_token
        self.full_endpoint = self.end_point + self.token + '/'

    def __repr__(self):
        return 'your token is {}'.format(self.full_endpoint)

    def send_message(self, chat_id, message):
        send_text = self.full_endpoint + 'sendMessage'
        data = {'chat_id': chat_id, 'text': message}
        response = requests.get(send_text, data=data)
        return response

    def send_photo(self, chat_id, photo):
        url = self.full_endpoint + 'sendPhoto'
        data = {'chat_id': chat_id}
        files = {'photo': open(photo, 'rb')}
        response = requests.post(url, data=data, files=files)
        return response

    def send_document(self, chat_id, file):
        url = self.full_endpoint + 'sendDocument'
        data = {'chat_id': chat_id}
        files = {'document':open(file, 'rb')}
        response = requests.post(url, data = data, files = files)
        return response
    def get_updates(self):
        url = self.full_endpoint + 'getUpdates'
        response = requests.get(url)
        return response

    def get_file_information(self, file_id):
        url = f'https://api.telegram.org/bot{self.token}/getFile'
        response = requests.post(url,data = {"file_id":file_id})
        if response.status_code != 200:
            return {"ok":"False"}
        json_response = response.json()
        if json_response['ok'] == False:
            return {"ok":"False"}
        file_path = json_response['result']['file_path']
        file_information = requests.get(f'https://api.telegram.org/file/bot{self.token}/{file_path}')
        return file_information.text

In [19]:
def send_results(telegram_api, results):
    telegram.send_message(1021388563, 'dataset name : {}'.format(results.dataset_name))
    telegram.send_message(1021388563, 'chunk id : {}'.format(results.chunk_id))
    telegram.send_message(1021388563, 'selected features : {}'.format(results.feature_space))
    telegram.send_message(1021388563, 'results')
    for key in  results.result_information.keys():
        telegram.send_message(1021388563, 'model tpye : {}'.format(key))
        telegram.send_message(1021388563, '{}'.format(results.result_information[key]))

In [20]:
def save_object(obj, filename,path):
    """
    _ INPUT (obj) THE OBJECT WE NEED SAVW IT (filename) THE NAME OF OBJECT
    """
    filename = os.path.join(path,filename)
    with open(filename+".pkl", 'wb') as outp:
        pickle.dump(obj, outp, pickle.HIGHEST_PROTOCOL)
    outp.close()
def load_object(filename,path):
    """
    _ INPUT THE NAME OF OBJECT WE NEED LOAD IT
    """
    filename = os.path.join(path,filename)
    with open(filename+".pkl", 'rb') as outp:
        loaded_object = pickle.load(outp)
    outp.close()
    return loaded_object

# 5. Feature Selection Main Algorithm
* For each type of agents we need a specific feature selection algorithm with little difference between them.
* Evaluation function using different machine learning model based on selected feature
* Result Visulization

In [21]:
def softmax_distrbution(agents):
    contrbutions = []
    for agent in agents:
        contrbutions.append(agent.contrbution)
    return softmax(contrbutions)

def random_forest_distrbution(X_train,Y_train,num_of_agents, num_of_samples=1000):#10000
    X = []
    y = []
    for i in range(num_of_samples):
        features_space = np.random.choice([0, 1], size=(num_of_agents,)).tolist()
        accuracy = get_reward(X_train,Y_train, features_space)
        X.append(features_space)
        y.append(accuracy)
    X = np.array(X)
    y = np.array(y)
    rf = RandomForestRegressor(n_estimators=15)
    rf.fit(X, y)
    return rf.feature_importances_.tolist()

In [22]:
def prepare_data(csv_filename, target_column_name='class'):
    # read csv file
    df = pd.read_csv(csv_filename)
    df = df.iloc[:80000, :]
    column_names = df.columns.tolist()
    if target_column_name not in column_names:
        target_column_name = column_names[-1]
    # get unique value in target column
    unique_vlaues = sorted(df[target_column_name].unique().tolist())
    df[target_column_name] = df[target_column_name].apply(lambda x: 0 if x == unique_vlaues[0] else 1)
    df[target_column_name] = df[target_column_name].astype('int')
    # rename the column of the dataframe
    num_of_columns = len(column_names)
    df.columns = list(range(num_of_columns))
    return df

In [23]:
def feature_selection(algo_type, agents, X, Y, eposide=100):

    # column_names = list(range(dataset.shape[1]))
    # column_names[-1] = 'class'
    # dataset.columns = column_names
    epsilon = 0.01
    features_space = []
    NUMBER_OF_AGENTS = X.shape[1]

    # get contrbutions
    contrbutions = []
    if algo_type == 'random_forest':
        contrbutions = random_forest_distrbution(X,Y,NUMBER_OF_AGENTS)
    elif algo_type in ['single_agent', 'average']:
        contrbutions = [1] * NUMBER_OF_AGENTS

    for i in tqdm(range(eposide)):
        # define the initial space
        features_space = np.random.choice([0, 1], size=(NUMBER_OF_AGENTS,)).tolist()
        # rewards history
        rewards = [0]

        # get action of each agent to create new feature space
        next_feature_space = []
        # contrbution of each agent

        if algo_type == 'softmax':
            contrbutions = softmax_distrbution(agents)

        for t in range(0, NUMBER_OF_AGENTS):
            action = agents[t].make_action(np.array(features_space.copy()), epsilon_greedy, epsilon)
            next_feature_space.append(action)
            if algo_type == 'single_agent':
                features_space[t] = action


        # calculate the total accuracy of new state (new feature space) and distrbute it using softmax

        # 1- get the accuracy using machine learning model trained in the current subset feature
        reward_as_accuracy = get_reward(X,Y, next_feature_space)

        # 2- using the reward strategy map the accuracy value (reward_as_accuracy) to new reward value (reward_at_time_t)
        reward_at_time_t = reward_strategy(t, reward_as_accuracy, rewards, next_feature_space, 1 - reward_as_accuracy)
        # add the accuray of machine learning model to rewards list to use it in the mapping reward strategy.
        rewards.append(reward_as_accuracy)

        # total reward = reward after mapping
        total_reward = reward_at_time_t



        # add state and actions to agent buffer reply  and the reward which equals to contrbution of the agent*total reward
        transition = []
        for t in range(0, NUMBER_OF_AGENTS):
            transition.clear()
            # add current state (current feature space )
            feature_space_copy = features_space.copy()
            transition.append(feature_space_copy)
            # add agent's action to the transition
            action = next_feature_space[t]
            transition.append(action)
            # add new state (new feature space) into transition
            transition.append(next_feature_space)
            # add distrbuted reward to the transition
            transition.append(total_reward * contrbutions[t])
            # add the transition to reply buffer
            agents[t].reply_buffer.append(transition)
            if len(agents[t].reply_buffer) > 32 and i % 32 == 0:
                agents[t].update_evaluation_network()
        if i % 64 == 0:
            for agent in agents:
                if agent.fitted:
                    agent.update_target_network()
        epsilon = 0.97 * epsilon
    return next_feature_space

In [24]:
def generate_new_samples(buffer, y_values, n=500, y_col='label'):
    if not y_col in buffer.columns.tolist():
      y_col = buffer.columns.tolist()[-1]
    if y_values.sum() == 0:
       return buffer[buffer[y_col] == 1].sample(n, random_state=41)[:, :-1].values, np.array([1] * n)
    else:
      return buffer[buffer[y_col] == 0].sample(n,random_state=41)[:, :-1].values, np.array([0] * n)

In [25]:
def feature_evolving(evolving_matrix):
    """
    evolving_matrix : list of random list
    """
    random_index = np.random.randint(0, len(evolving_matrix), 1)[0]
    return evolving_matrix[random_index]

In [26]:
def save_pickle(obj, file_name):
  with open(file_name, 'wb') as f:
    pickle.dump(obj, f)
def load_pickle(file_name):
  with open(file_name, 'rb') as f:
    d = pickle.load(f)
  return d

In [27]:
# model_evaluation(algorithm_type, f_name, chunk, result, 'target', chunk_id)
def model_evaluation(chunk_X, chunk_Y, selected_features):
    # ('SVM', svm.SVC(kernel='rbf', max_iter=8000, C=0.2, probability=True)),
    # ('KNN', KNeighborsClassifier(5)),
    # ('DecisionTree', DecisionTreeClassifier(random_state=42)),
    # ('RandomForest', RandomForestClassifier()),
    # ('LogRegression', LogisticRegression(max_iter=500))


    # index of selected features
    subset_features = np.where(np.array(selected_features) == 1)[0]
    if subset_features.shape[0] == 0:return 0
    # train test split
    X, Y = chunk_X, chunk_Y
    X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=42, test_size=TESTSIZE)

    # data normalization.
    # std = StandardScaler()
    # X_train = std.fit_transform(X_train)
    # X_test = std.transform(X_test)

    model = generate_oselm_models(number_of_hidden_neurons=X_train.shape[1]*3 // 2, apply_model_replacement=True)

    # classification and model evaluation
    # y_predictes, f1, recall ,precision ,accuracy = [],[],[],[],[]
    # for model in models:
    # model_name, model_obj = model[0], model[1]
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_predictes = y_pred
    f1 = f1_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    # roc_auc.append(roc_auc_score(y_test, y_pred))
    accuracy = accuracy_score(y_test, y_pred)
    print(f1,recall,precision,accuracy)
    return f1,recall,precision,accuracy,y_predictes # ,np.mean(roc_auc)

In [28]:
def dynamic_feature_selection(chunk_X, chunk_Y,save_path,algorithm_type=['softmax','average','single_agent','random_forest'],chunk_number=0):
  AgentsSoftmax.agent_count,AgentsAverage.agent_count,AgentsSingle.agent_count,AgentsRegression.agent_count = 0,0,0,0
  softmax_agents,average_agents,single_agent_agents,random_forest_agents,result,over_all=[],[],[],[],[],[]
  NUM_OF_FEATURES = chunk_X.shape[1]
  for i in range(NUM_OF_FEATURES):
    if 'random_forest' in algorithm_type:
      softmax_agents.append(AgentsSoftmax(create_model(NUM_OF_FEATURES),NUM_OF_FEATURES))
    if 'average' in algorithm_type:
      average_agents.append(AgentsAverage(create_model(NUM_OF_FEATURES),NUM_OF_FEATURES))
    if 'single_agent' in algorithm_type:
      single_agent_agents.append(AgentsSingle(create_model(NUM_OF_FEATURES),NUM_OF_FEATURES))
    if 'softmax' in algorithm_type:
      random_forest_agents.append(AgentsRegression(create_model(NUM_OF_FEATURES),NUM_OF_FEATURES))


  softmax_results,average_results,single_agent_results,random_forest_results,voting_results = results_dic(save_path,chunk_number)

  if 'softmax' in algorithm_type:
    softmax_result = feature_selection('softmax', softmax_agents,chunk_X, chunk_Y)
    while len(softmax_result)==0:
      softmax_result = feature_selection('softmax', softmax_agents,chunk_X, chunk_Y)
    f1, recall ,precision ,accuracy ,y_predicte = model_evaluation(chunk_X=chunk_X,chunk_Y=chunk_Y,selected_features=softmax_result)
    over_all.append(f1)
    save_object(softmax_result, "softmax_mask_"+str(chunk_number),save_path)
    softmax_results["f1"].append(f1)
    softmax_results["recall"].append(recall)
    softmax_results["precision"].append(precision)
    softmax_results["accuracy"].append(accuracy)
    softmax_results["y_predicte"].append(y_predicte)
    save_object(softmax_results, "softmax_results",save_path)


  if 'average' in algorithm_type:
    average_result = feature_selection('average', average_agents,chunk_X, chunk_Y)
    while len(average_result)==0:
      average_result = feature_selection('average', average_agents,chunk_X, chunk_Y)
    f1, recall ,precision ,accuracy ,y_predicte = model_evaluation(chunk_X=chunk_X,chunk_Y=chunk_Y,selected_features=average_result)
    over_all.append(f1)
    save_object(average_result, "average_mask_"+str(chunk_number),save_path)
    average_results["f1"].append(f1)
    average_results["recall"].append(recall)
    average_results["precision"].append(precision)
    average_results["accuracy"].append(accuracy)
    average_results["y_predicte"].append(y_predicte)
    save_object(average_results, "average_results",save_path)

  if 'single_agent' in algorithm_type:
    single_agent_result = feature_selection('single_agent', single_agent_agents,chunk_X, chunk_Y)
    while len(single_agent_result)==0:
      single_agent_result = feature_selection('single_agent', single_agent_agents,chunk_X, chunk_Y)
    f1, recall ,precision  ,accuracy ,y_predictes = model_evaluation(chunk_X=chunk_X,chunk_Y=chunk_Y,selected_features=single_agent_result)
    over_all.append(f1)
    save_object(single_agent_result, "single_agent_mask_"+str(chunk_number),save_path)
    single_agent_results["f1"].append(f1)
    single_agent_results["recall"].append(recall)
    single_agent_results["precision"].append(precision)
    single_agent_results["accuracy"].append(accuracy)
    single_agent_results["y_predicte"].append(y_predicte)
    save_object(single_agent_results, "single_agent_results",save_path)

  if 'random_forest' in algorithm_type:
    random_forest_result = feature_selection('random_forest', random_forest_agents,chunk_X, chunk_Y)
    while len(random_forest_result)==0:
      random_forest_result = feature_selection('random_forest', random_forest_agents,chunk_X, chunk_Y)
    f1, recall ,precision  ,accuracy ,y_predicte = model_evaluation(chunk_X=chunk_X,chunk_Y=chunk_Y,selected_features=random_forest_result)
    over_all.append(f1)
    save_object(random_forest_result, "random_forest_mask_"+str(chunk_number),save_path)
    random_forest_results["f1"].append(f1)
    random_forest_results["recall"].append(recall)
    random_forest_results["precision"].append(precision)
    random_forest_results["accuracy"].append(accuracy)
    random_forest_results["y_predicte"].append(y_predicte)
    save_object(random_forest_results, "random_forest_results",save_path)

  for softmax,average,single,random in zip(softmax_result,average_result,single_agent_result,random_forest_result):
    sum_votes = sum([softmax,average,single,random])
    if sum_votes > (len(algorithm_type) // 2):result.append(1)
    elif sum_votes == (len(algorithm_type) // 2):
      rand = np.random.uniform(low=0,high=1)
      if rand >0.5:result.append(1)
      else:result.append(0)
    else:result.append(0)


  f1, recall ,precision  ,accuracy ,y_predicte = model_evaluation(chunk_X, chunk_Y, result)
  over_all.append(f1)
  save_object(result, "voting_mask_"+str(chunk_number),save_path)
  voting_results["f1"].append(f1)
  voting_results["recall"].append(recall)
  voting_results["precision"].append(precision)
  voting_results["accuracy"].append(accuracy)
  voting_results["y_predicte"].append(y_predicte)
  save_object(voting_results, "voting_results",save_path)


  re_all = [softmax_result,average_result,single_agent_result,random_forest_result,result]
  print(re_all[over_all.index(max(over_all))])
  return re_all[over_all.index(max(over_all))]

In [29]:
def main(f_name, result_save_path="",ChunkNumber=0,feature_selection_type=''):
  # load the dataset and then process it
  datasets = {}

  d = prepare_data(f_name)
  d = d.sample(frac=1, random_state=42)
  buffer = d.sample(n=5000)
  d.reset_index(inplace=True)
  d.replace([np.inf], 0, inplace=True)
  datasets[f_name.split('/')[-1]] = d
  results = {}

  for key in tqdm(datasets.keys()):
      drift_location = {}
      results[key] = {'model_result': []}
      # convert dataset from dataframe to numpy array.
      data = datasets[key].values
      # split the data into features array and target array.
      X, Y = data[:, 0:-1], data[:, -1].astype('int')
      if not os.path.exists("{}_evolving_matrix.pkl".format(result_save_path)):
        a2 = np.random.randint(low=0, high=X.shape[1], size = X.shape[1] // 6).tolist()
        a3 = np.random.randint(low=0, high=X.shape[1], size = X.shape[1] // 5).tolist()
        a4 = np.random.randint(low=0, high=X.shape[1], size = X.shape[1] // 4).tolist()
        evolving_matrix = [a2, a3, a4]
        save_pickle(evolving_matrix, "{}_evolving_matrix.pkl".format(result_save_path))
      else:
        evolving_matrix = load_pickle("{}_evolving_matrix.pkl".format(result_save_path))
      chunks_features = np.array_split(X, 10)
      chunks_labels = np.array_split(Y, 10)


      # result_save_path_data = os.path.join(result_save_path, key)

      ################# train on each chunk ####################
      print("===================== dataset : {} ======================".format(key))
      for CN,chunk_X, chunk_Y in tqdm(zip([*range(len(chunks_labels))],chunks_features, chunks_labels)):
          if ChunkNumber > CN:
            print("We Skip Chunk Number : {}".format(CN))
            continue
          try:
            chunk_X, chunk_Y = SMOTE().fit_resample(chunk_X, chunk_Y)
          except:
            if chunk_Y.sum() in [0, 1]:
              new_samples, new_labels = generate_new_samples(buffer, chunk_Y)
              chunk_X = np.concatenate((chunk_X, new_samples))
              chunk_Y = np.concatenate((chunk_Y, new_labels))
          gc.collect()
          if feature_selection_type == "feature_evolving":
            unselected_feautres = feature_evolving(evolving_matrix=evolving_matrix)
            chunk_X = np.delete(chunk_X, unselected_feautres, 1)
          selected = dynamic_feature_selection(chunk_X=chunk_X, chunk_Y=chunk_Y,save_path=result_save_path,chunk_number=CN)

In [30]:
filenames = ['kddcup99_csv.csv','ISCX2012.csv','CSE-CIC2018.csv','CICIDS2017.csv','7recurrentDrift.csv', 'blip.csv', 'incrementalDrift.csv',
             '7gradualDrift.csv', '7suddenDrift.csv']
filenames = list(map(lambda x: os.path.join(data_path, x), filenames))
filenames

['/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/data/kddcup99_csv.csv',
 '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/data/ISCX2012.csv',
 '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/data/CSE-CIC2018.csv',
 '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/data/CICIDS2017.csv',
 '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/data/7recurrentDrift.csv',
 '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/data/blip.csv',
 '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/data/incrementalDrift.csv',
 '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/data/7gradualDrift.csv',
 '/content/drive/My Drive/Colab Notebooks/Muawiya/Genetic Programming Combiner with DFS/data/7suddenDrift.csv']

In [31]:
def results_dic(path,chunk_number=0):
  if chunk_number == 0 :
    softmax_results = {"f1":[],"recall":[],"precision":[],"accuracy":[],"y_predicte":[]}
    average_results = {"f1":[],"recall":[],"precision":[],"accuracy":[],"y_predicte":[]}
    single_agent_results = {"f1":[],"recall":[],"precision":[],"accuracy":[],"y_predicte":[]}
    random_forest_results = {"f1":[],"recall":[],"precision":[],"accuracy":[],"y_predicte":[]}
    voting_results = {"f1":[],"recall":[],"precision":[],"accuracy":[],"y_predicte":[]}
  else:
    softmax_results = load_object('softmax_results',path)
    average_results = load_object('average_results',path)
    single_agent_results = load_object('single_agent_results',path)
    random_forest_results = load_object('random_forest_results',path)
    voting_results = load_object('voting_results',path)
  return softmax_results,average_results,single_agent_results,random_forest_results,voting_results

In [None]:
# ChunkNumber = 0
i=0
data_name = ['kddcup99','ISCX2012','CSE-CIC2018','CICIDS2017','7recurrentDrift', 'blip', 'incrementalDrift','7gradualDrift', '7suddenDrift']
for f_name,d_name in zip([filenames[i]],[data_name[i]]):
  path = os.path.join(feature_selection_results,d_name)
  os.makedirs(path, exist_ok=True)
  main(f_name, result_save_path=path,ChunkNumber=7)

  0%|          | 0/1 [00:00<?, ?it/s]



0it [00:00, ?it/s]

We Skip Chunk Number : 0
We Skip Chunk Number : 1
We Skip Chunk Number : 2
We Skip Chunk Number : 3
We Skip Chunk Number : 4
We Skip Chunk Number : 5




  0%|          | 0/100 [00:00<?, ?it/s]

    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0   128.70        0.0298507       81        0.0298507        0.0298507      1.70s
   1   672.00        0.0298507     2437        0.0298507        0.0298507      5.66s
   2   187.60        0.0298507      297        0.0298507        0.0298507      1.61s
   3   779.50        0.0298507        9        0.0298507        0.0298507      2.90s
   4   183.40        0.0298507      789        0.0298507        0.0298507      0.00s
    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0   128.70        0.0392749       81        0.0392749        0.0392749      2.61s




    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0   128.70         0.996051       81         0.996051         0.996051      1.79s
    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0   128.70         0.451881       81         0.451881         0.451881      2.58s
   1   672.00         0.451881     2437         0.451881         0.451881      6.31s
   2   187.60         0.451881      297         0.451881         0.451881      1.02s
   3   779.50         0.451881        9         0.451881         0.451881      1.76s
   4   183.40         0.451881      789         0.451881         0.451881      0.00s


  0%|          | 0/100 [00:00<?, ?it/s]

    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0   128.70         0.451723       81         0.451723         0.451723      1.64s
   1   672.00         0.451723     2437         0.451723         0.451723      5.01s
   2   187.60         0.451723      297         0.451723         0.451723      1.04s
   3   779.50         0.451723        9         0.451723         0.451723      1.80s
   4   183.40         0.451723      789         0.451723         0.451723      0.00s
    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0   128.70        0.0392749       81        0.0392749        0.0392749      2.42s


  0%|          | 0/100 [00:00<?, ?it/s]

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0   128.70       0.00501725       81       0.00501725       0.00501725      1.75s
   1   672.00       0.00501725     2437       0.00501725       0.00501725      5.05s
   2   187.60       0.00501725      297       0.00501725       0.00501725      1.06s
   3   779.50       0.00501725        9       0.00501725       0.00501725      1.86s
   4   183.40       0.00501725      789       0.00501725       0.00501725      0.00s
    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0   128.70       

  0%|          | 0/100 [00:00<?, ?it/s]

    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0   128.70                0       81                0                0      1.70s
   1   672.00                0     2437                0                0      5.08s
   2   187.60                0      297                0                0      1.05s
   3   779.50                0        9                0                0      1.86s
   4   183.40                0      789                0                0      0.00s
    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0   128.70        0.0392749       81        0.0392749        0.0392749      1.70s




  0%|          | 0/100 [00:00<?, ?it/s]

    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0   128.70        0.0571257       81        0.0571257        0.0571257      6.77s
   1   672.00        0.0571257     2437        0.0571257        0.0571257     16.89s
   2   187.60        0.0571257      297        0.0571257        0.0571257      3.63s
   3   779.50        0.0571257        9        0.0571257        0.0571257      7.40s
   4   183.40        0.0571257      789        0.0571257        0.0571257      0.00s
    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0   128.70       0.00190718       81       0.00190718       0.00190718      5.15s


  0%|          | 0/100 [00:00<?, ?it/s]

    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0   128.70                0       81                0                0      4.47s
   1   672.00                0     2437                0                0     18.16s
   2   187.60                0      297                0                0      3.09s
   3   779.50                0        9                0                0      5.85s
   4   183.40                0      789                0                0      0.00s
    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0   128.70       0.00190718       81       0.00190718       0.00190718      6.06s


  0%|          | 0/100 [00:00<?, ?it/s]

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0   128.70      0.000636132       81      0.000636132      0.000636132      1.87s
   1   672.00      0.000636132     2437      0.000636132      0.000636132      5.70s
   2   187.60      0.000636132      297      0.000636132      0.000636132      1.17s
   3   779.50      0.000636132        9      0.000636132      0.000636132      2.20s
   4   183.40      0.000636132      789      0.000636132      0.000636132      0.00s
    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0   128.70      0

  0%|          | 0/100 [00:00<?, ?it/s]

    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0   128.70       0.00190718       81       0.00190718       0.00190718      1.83s
   1   672.00       0.00190718     2437       0.00190718       0.00190718      5.66s
   2   187.60       0.00190718      297       0.00190718       0.00190718      1.16s
   3   779.50       0.00190718        9       0.00190718       0.00190718      2.04s
   4   183.40       0.00190718      789       0.00190718       0.00190718      0.00s
    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0   128.70       0.00190718       81       0.00190718       0.00190718      2.05s




  0%|          | 0/100 [00:00<?, ?it/s]

    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0   128.70        0.0113457       81        0.0113457        0.0113457      3.80s
   1   672.00        0.0113457     2437        0.0113457        0.0113457     12.27s
   2   187.60        0.0113457      297        0.0113457        0.0113457      3.08s
   3   779.50        0.0113457        9        0.0113457        0.0113457      5.29s
   4   183.40        0.0113457      789        0.0113457        0.0113457      0.00s
    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0   128.70        0.0113457       81        0.0113457        0.0113457      3.88s


In [None]:
# ChunkNumber = 0
i=1
data_name = ['kddcup99','ISCX2012','CSE-CIC2018','CICIDS2017','7recurrentDrift', 'blip', 'incrementalDrift','7gradualDrift', '7suddenDrift']
for f_name,d_name in zip([filenames[i]],[data_name[i]]):
  path = os.path.join(feature_selection_results,d_name)
  os.makedirs(path, exist_ok=True)
  main(f_name, result_save_path=path,ChunkNumber=0)

In [None]:
# ChunkNumber = 0
i=2
data_name = ['kddcup99','ISCX2012','CSE-CIC2018','CICIDS2017','7recurrentDrift', 'blip', 'incrementalDrift','7gradualDrift', '7suddenDrift']
for f_name,d_name in zip([filenames[i]],[data_name[i]]):
  path = os.path.join(feature_selection_results,d_name)
  os.makedirs(path, exist_ok=True)
  main(f_name, result_save_path=path,ChunkNumber=0)

In [None]:
# ChunkNumber = 0
i=3
data_name = ['kddcup99','ISCX2012','CSE-CIC2018','CICIDS2017','7recurrentDrift', 'blip', 'incrementalDrift','7gradualDrift', '7suddenDrift']
for f_name,d_name in zip([filenames[i]],[data_name[i]]):
  path = os.path.join(feature_selection_results,d_name)
  os.makedirs(path, exist_ok=True)
  main(f_name, result_save_path=path,ChunkNumber=0)

In [None]:
# ChunkNumber = 0
i=4
data_name = ['kddcup99','ISCX2012','CSE-CIC2018','CICIDS2017','7recurrentDrift', 'blip', 'incrementalDrift','7gradualDrift', '7suddenDrift']
for f_name,d_name in zip([filenames[i]],[data_name[i]]):
  path = os.path.join(feature_selection_results,d_name)
  os.makedirs(path, exist_ok=True)
  main(f_name, result_save_path=path,ChunkNumber=0)

In [None]:
# ChunkNumber = 0
i=5
data_name = ['kddcup99','ISCX2012','CSE-CIC2018','CICIDS2017','7recurrentDrift', 'blip', 'incrementalDrift','7gradualDrift', '7suddenDrift']
for f_name,d_name in zip([filenames[i]],[data_name[i]]):
  path = os.path.join(feature_selection_results,d_name)
  os.makedirs(path, exist_ok=True)
  main(f_name, result_save_path=path,ChunkNumber=0)

In [None]:
# ChunkNumber = 0
i=6
data_name = ['kddcup99','ISCX2012','CSE-CIC2018','CICIDS2017','7recurrentDrift', 'blip', 'incrementalDrift','7gradualDrift', '7suddenDrift']
for f_name,d_name in zip([filenames[i]],[data_name[i]]):
  path = os.path.join(feature_selection_results,d_name)
  os.makedirs(path, exist_ok=True)
  main(f_name, result_save_path=path,ChunkNumber=0)

In [None]:
# ChunkNumber = 0
i=7
data_name = ['kddcup99','ISCX2012','CSE-CIC2018','CICIDS2017','7recurrentDrift', 'blip', 'incrementalDrift','7gradualDrift', '7suddenDrift']
for f_name,d_name in zip([filenames[i]],[data_name[i]]):
  path = os.path.join(feature_selection_results,d_name)
  os.makedirs(path, exist_ok=True)
  main(f_name, result_save_path=path,ChunkNumber=0)

In [None]:
# ChunkNumber = 0
i=8
data_name = ['kddcup99','ISCX2012','CSE-CIC2018','CICIDS2017','7recurrentDrift', 'blip', 'incrementalDrift','7gradualDrift', '7suddenDrift']
for f_name,d_name in zip([filenames[i]],[data_name[i]]):
  path = os.path.join(feature_selection_results,d_name)
  os.makedirs(path, exist_ok=True)
  main(f_name, result_save_path=path,ChunkNumber=0)

With Evolving

In [None]:
# ChunkNumber = 0
i=0
data_name = ['kddcup99','ISCX2012','CSE-CIC2018','CICIDS2017','7recurrentDrift', 'blip', 'incrementalDrift','7gradualDrift', '7suddenDrift']
for f_name,d_name in zip([filenames[i]],[data_name[i]]):
  path = os.path.join(feature_selection_results_evolving,d_name)
  os.makedirs(path, exist_ok=True)
  main(f_name, result_save_path=path,feature_selection_type="feature_evolving",ChunkNumber=0)

In [None]:
# ChunkNumber = 0
i=1
data_name = ['kddcup99','ISCX2012','CSE-CIC2018','CICIDS2017','7recurrentDrift', 'blip', 'incrementalDrift','7gradualDrift', '7suddenDrift']
for f_name,d_name in zip([filenames[i]],[data_name[i]]):
  path = os.path.join(feature_selection_results_evolving,d_name)
  os.makedirs(path, exist_ok=True)
  main(f_name, result_save_path=path,feature_selection_type="feature_evolving",ChunkNumber=0)

In [None]:
# ChunkNumber = 0
i=2
data_name = ['kddcup99','ISCX2012','CSE-CIC2018','CICIDS2017','7recurrentDrift', 'blip', 'incrementalDrift','7gradualDrift', '7suddenDrift']
for f_name,d_name in zip([filenames[i]],[data_name[i]]):
  path = os.path.join(feature_selection_results_evolving,d_name)
  os.makedirs(path, exist_ok=True)
  main(f_name, result_save_path=path,feature_selection_type="feature_evolving",ChunkNumber=0)

In [None]:
# ChunkNumber = 0
i=3
data_name = ['kddcup99','ISCX2012','CSE-CIC2018','CICIDS2017','7recurrentDrift', 'blip', 'incrementalDrift','7gradualDrift', '7suddenDrift']
for f_name,d_name in zip([filenames[i]],[data_name[i]]):
  path = os.path.join(feature_selection_results_evolving,d_name)
  os.makedirs(path, exist_ok=True)
  main(f_name, result_save_path=path,feature_selection_type="feature_evolving",ChunkNumber=0)

In [None]:
# ChunkNumber = 0
i=4
data_name = ['kddcup99','ISCX2012','CSE-CIC2018','CICIDS2017','7recurrentDrift', 'blip', 'incrementalDrift','7gradualDrift', '7suddenDrift']
for f_name,d_name in zip([filenames[i]],[data_name[i]]):
  path = os.path.join(feature_selection_results_evolving,d_name)
  os.makedirs(path, exist_ok=True)
  main(f_name, result_save_path=path,feature_selection_type="feature_evolving",ChunkNumber=0)

In [None]:
# ChunkNumber = 0
i=5
data_name = ['kddcup99','ISCX2012','CSE-CIC2018','CICIDS2017','7recurrentDrift', 'blip', 'incrementalDrift','7gradualDrift', '7suddenDrift']
for f_name,d_name in zip([filenames[i]],[data_name[i]]):
  path = os.path.join(feature_selection_results_evolving,d_name)
  os.makedirs(path, exist_ok=True)
  main(f_name, result_save_path=path,feature_selection_type="feature_evolving")

In [None]:
# ChunkNumber = 0
i=6
data_name = ['kddcup99','ISCX2012','CSE-CIC2018','CICIDS2017','7recurrentDrift', 'blip', 'incrementalDrift','7gradualDrift', '7suddenDrift']
for f_name,d_name in zip([filenames[i]],[data_name[i]]):
  path = os.path.join(feature_selection_results_evolving,d_name)
  os.makedirs(path, exist_ok=True)
  main(f_name, result_save_path=path,feature_selection_type="feature_evolving")

In [None]:
# ChunkNumber = 0
i=7
data_name = ['kddcup99','ISCX2012','CSE-CIC2018','CICIDS2017','7recurrentDrift', 'blip', 'incrementalDrift','7gradualDrift', '7suddenDrift']
for f_name,d_name in zip([filenames[i]],[data_name[i]]):
  path = os.path.join(feature_selection_results_evolving,d_name)
  os.makedirs(path, exist_ok=True)
  main(f_name, result_save_path=path,feature_selection_type="feature_evolving",ChunkNumber=0)

In [None]:
# ChunkNumber = 0
i=8
data_name = ['kddcup99','ISCX2012','CSE-CIC2018','CICIDS2017','7recurrentDrift', 'blip', 'incrementalDrift','7gradualDrift', '7suddenDrift']
for f_name,d_name in zip([filenames[i]],[data_name[i]]):
  path = os.path.join(feature_selection_results_evolving,d_name)
  os.makedirs(path, exist_ok=True)
  main(f_name, result_save_path=path,feature_selection_type="feature_evolving")