**Google drive, directory change, imports**

In [None]:
#google drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
#title dir-change
import os
folder_path = "/content/drive/MyDrive/MH_Algorithms"
os.chdir(folder_path)
os.listdir()

In [None]:
#necessary imports
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn import preprocessing

**Training data process**

In [None]:
#load training data:: UNSW_NB15_training-set.csv
data_read = pd.read_csv('/content/drive/MyDrive/datasets/UNSW_NB15_training-set.csv')

#drop 'id', 'attack_cat', 'label' columns
data_1 = data_read
data_1 = data_1.drop(['id', 'attack_cat', 'label'], axis = 1)

#convert 'proto', 'state' and 'service' column values to numeric values
le = LabelEncoder()
proto = le.fit_transform(data_1['proto'])
data_1.drop("proto", axis=1, inplace=True)
data_1["proto"] = proto

state = le.fit_transform(data_1['state'])
data_1.drop("state", axis=1, inplace=True)
data_1["state"] = state

service = le.fit_transform(data_1['service'])
data_1.drop("service", axis=1, inplace=True)
data_1["service"] = service

#take only the values without column names
data_1 = data_1.values
train_feat = np.asarray(data_1[:, :])

#process only labels
data_2 = data_read.values
train_label = np.asarray(data_2[:, -1])
train_label = train_label.astype('int')

#perform data normalization
scaler = preprocessing.StandardScaler().fit(train_feat)
train_minmax_feat = scaler.transform(train_feat)

**Test Data Process**

In [None]:
#load test data:: UNSW_NB15_testing-set.csv
test_data_read = pd.read_csv('/content/drive/MyDrive/datasets/UNSW_NB15_testing-set.csv')

#drop 'id', 'attack_cat', 'label' columns
test_data_1 = test_data_read
test_data_1 = test_data_1.drop(['id', 'attack_cat', 'label'], axis = 1)

#convert 'proto', 'state' and 'service' column values to numeric values
test_le = LabelEncoder()
proto = test_le.fit_transform(test_data_1['proto'])
test_data_1.drop("proto", axis=1, inplace=True)
test_data_1["proto"] = proto

state = test_le.fit_transform(test_data_1['state'])
test_data_1.drop("state", axis=1, inplace=True)
test_data_1["state"] = state

service = test_le.fit_transform(test_data_1['service'])
test_data_1.drop("service", axis=1, inplace=True)
test_data_1["service"] = service

#take only the values without column names
test_data_1 = test_data_1.values
test_feat = np.asarray(test_data_1[:, :])

#process only labels
test_data_2 = test_data_read.values
test_label = np.asarray(test_data_2[:, -1])
test_label = test_label.astype('int')

#perform data normalization
test_scaler = preprocessing.StandardScaler().fit(test_feat)
test_scaled_feat = test_scaler.transform(test_feat)

**There are four Meta Heuristics model available.**

> * **Particle Swarm Optimization** (use `pso` as short form)
*   **Sine Cosine Algorithm** (use `sca` as short form)
*   **Flower Pollination Algorithm** (use `fpa` as short form)
*   **Differential Evolution** (use `de` as short form)










**First model selection and feature selection**

In [None]:
#import the first MH model
#@title # Select First Model
import importlib
model_selected = 'sca' #@param ['pso', 'sca', 'fpa', 'de'] {allow-input: true}
var = importlib.import_module(model_selected)

In [None]:
# split data into train & validation (80 -- 20)
xtrain, xtest, ytrain, ytest = train_test_split(train_minmax_feat, train_label, test_size=0.2, stratify=train_label)
fold = {'xt':xtrain, 'yt':ytrain, 'xv':xtest, 'yv':ytest}

**Parameter List**

> **PSO contains 3 extra parameters.**
*   c1  = 2     # cognitive factor
* c2  = 2         # social factor 
* w   = 0.9       # inertia weight

> **SCA contains 1 extra parameter**
* alpha  = 2    # constant

> **FPA contains 1 extra parameter**
* P  = 0.8      # switch probability

> **CS contains 1 extra parameter**
* Pa  = 0.25   # discovery rate

> **DE contains 2 extra parameters**
* CR = 0.9    # crossover rate
* F  = 0.5    # constant factor


In [None]:
# set parameters:: change the values and update dictionary fields in "opts"
k    = 5     # k-value in KNN
N    = 10    # number of particles
T    = 50    # maximum number of iterations
#w    = 0.9
#c1   = 0.5
#c2   = 0.5
opts = {'k':k, 'fold':fold, 'N':N, 'T':T, 'alpha':2}

In [None]:
# perform feature selection (first model)
fmdl = var.jfs(train_minmax_feat, train_label, opts)

In [None]:
# selected features and number of selected features from first model
sel_feat = fmdl['sf']
print("Selected Features:", sel_feat)
num_feat = fmdl['nf']
print("Feature Size:", num_feat)

In [None]:
# plot convergence
curve   = fmdl['c']
curve   = curve.reshape(np.size(curve,1))
x       = np.arange(0, opts['T'], 1.0) + 1.0

fig, ax = plt.subplots()
ax.plot(x, curve, 'o-')
ax.set_xlabel('Number of Iterations')
ax.set_ylabel('Cost as Fitness')
ax.set_title(model_selected)
ax.grid()
plt.show()

**Second model selection and Feature Selection**

In [None]:
# data with selected features from first model
num_train = np.size(xtrain, 0)
x_train   = xtrain[:, sel_feat]
y_train   = ytrain.reshape(num_train)

In [None]:
#import the second MH model
#@title # Select Second Model
import importlib
model2_selected = 'pso' #@param ['pso', 'sca', 'fpa', 'de'] {allow-input: true}
var2 = importlib.import_module(model2_selected)

In [None]:
# split data with reduced feature set into train & validation (80 -- 20)
fxtrain, fxtest, fytrain, fytest = train_test_split(x_train, y_train, test_size=0.2, stratify=y_train)
fold = {'xt':fxtrain, 'yt':fytrain, 'xv':fxtest, 'yv':fytest}

In [None]:
# set parameters:: change the values and update dictionary fields in "smdl_opts"
k    = 5     # k-value in KNN
N    = 10    # number of particles
T    = 50     # maximum number of iterations
w    = 0.9
c1   = 0.5
c2   = 1.5
smdl_opts = {'k':k, 'fold':fold, 'N':N, 'T':T, 'c1':1.5, 'c2':2,'w':0.9}

In [None]:
# perform feature selection (second model)
smdl = var2.jfs(x_train, y_train, smdl_opts)

In [None]:
# selected feature list and number of selected features from second model
smdl_sel_feat = smdl['sf']
print("Selected Features:", smdl_sel_feat)
smdl_num_feat = smdl['nf']
print("Feature Size:", smdl_num_feat)

In [None]:
# plot convergence
smdl_curve   = smdl['c']
smdl_curve   = smdl_curve.reshape(np.size(smdl_curve,1))
smdl_x       = np.arange(0, smdl_opts['T'], 1.0) + 1.0

fig, ax = plt.subplots()
ax.plot(smdl_x, smdl_curve, 'o-')
ax.set_xlabel('Number of Iterations')
ax.set_ylabel('Cost as Fitness')
ax.set_title(model2_selected)
ax.grid()
plt.show()

**Classification results on Test DataSet**

In [None]:
# Load test data with finally selected features
num_valid = np.size(test_scaled_feat, 0)
x_valid   = test_scaled_feat[:, smdl_sel_feat]
y_valid   = test_label.reshape(num_valid)

In [None]:
#classification using J48, Random Forest and SVC using 5-fold cross validation with performance evaluation metrics
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_validate
import time

clf1 = DecisionTreeClassifier(criterion = "entropy", random_state = 42, max_depth=3, min_samples_leaf=5)
clf2 = RandomForestClassifier(n_estimators=50, random_state=42)
clf3 = SVC(kernel='linear', probability=True)

scoring = {'accuracy' : make_scorer(accuracy_score), 
           'precision' : make_scorer(precision_score),
           'recall' : make_scorer(recall_score), 
           'f1_score' : make_scorer(f1_score)}

for clf, label in zip([clf1, clf2], ['J48', 'Random Forest', 'SVM']):
  start_time = time.time()
  scores = cross_validate(clf, x_valid, y_valid, scoring=scoring, cv=5)
  print("Accuracy: %0.4f Precision: %0.4f Recall: %0.4f F-score: %0.4f [%s]" % (scores['test_accuracy'].mean(), scores['test_precision'].mean(), scores['test_recall'].mean(), scores['test_f1_score'].mean(), label))
  end_time = time.time()
  exec_time = end_time - start_time
  print("Time:", exec_time)