In [None]:
import pandas as pd
import seaborn as sns
import numpy as np

from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.feature_selection import RFE
from sklearn import metrics
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix

In [None]:
test = pd.read_csv("UNSW_NB15_testing-set.csv", sep=',', header=0)
train = pd.read_csv("UNSW_NB15_training-set.csv", sep=',', header=0)

combined_trainTest = pd.concat([train, test]).drop(['id'], axis=1)

In [None]:
combined_trainTest.head()

In [None]:
cols = ['proto', 'service', 'state', 'attack_cat']
le = preprocessing.LabelEncoder()

combined_trainTest[cols] = combined_trainTest[cols].apply(le.fit_transform)
combined_trainTest.head()

In [None]:
X = combined_trainTest.drop(['label'], axis=1)
y = combined_trainTest.loc[:, ['label']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.20, random_state=1)

In [None]:
n = 40
rfe = RFE(DecisionTreeClassifier(), n).fit(X_train, y_train)

di = np.where(rfe.support_==True)[0]
list = X_train.columns.values[di]
X_train_RFE, X_test_RFE = X_train[list], X_test[list]
print('new shape', X_train_RFE.shape)

In [None]:
params = {'max_depth': [2,4,6,8,10], 
          'min_samples_split': [2,3,4], 
          'min_samples_leaf': [1,2,3,4]}

clf = DecisionTreeClassifier()
gs = GridSearchCV(estimator=clf, param_grid=params, scoring="accuracy",
                cv=10, return_train_score=True, verbose = 1 )
gs.fit(X_train_RFE, y_train)

gs.best_estimator_.fit(X_train_RFE, y_train)
y_pred = gs.best_estimator_.predict(X_test_RFE)
y_true = y_test

In [None]:
print("Test accuracy:", metrics.accuracy_score(y_test, y_pred))