In [None]:
import pandas as pd

train_df = pd.read_csv('train.csv')
train_df.head(5)

In [None]:
test_df = pd.read_csv('test.csv')
test_df.head(5)

## Data Analysis

In [None]:
train_df.info()

In [None]:
test_df.info()

## Find any NULL values in datasets.

In [None]:
train_df.isnull().sum(axis = 0)  ## Column-wise.

In [None]:
test_df.isnull().sum(axis = 0) ## Column-wise

In [None]:
train_df.describe()

In [None]:
test_df.describe()

In [None]:
train_df['Label'].unique()

In [None]:
test_df['Label'].unique()

## Splitting Data

In [None]:
X_train = train_df.drop('Label',axis=1)
X_test = test_df.drop('Label',axis=1)
y_train = train_df['Label']
y_test = test_df['Label']

In [None]:
X_train[0:5]

In [None]:
X_test[0:5]

In [None]:
y_train[0:5]

In [None]:
y_test[0:5]

## Gradient Boost.

In [None]:
from sklearn.ensemble import  GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier

In [None]:
boost = GradientBoostingClassifier(n_estimators=400, learning_rate=1, max_depth=4)
boostmodel = boost.fit(X_train, y_train)

In [None]:
pred_boost = boostmodel.predict(X_test)
pred_boost[0:5]

In [None]:
from sklearn import metrics
from sklearn.metrics import classification_report

dt1_acc = round(metrics.accuracy_score(y_test, pred_boost) * 100, 2)
print('Accuracy:',dt1_acc)

In [None]:
class_rep_dt = classification_report(y_test, pred_boost)
print(class_rep_dt)

In [None]:

from sklearn.metrics import confusion_matrix
import itertools
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plt
# 1.Confusiom Matrix.
def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix',cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
# Compute confusion matrix
cnf_matrix = confusion_matrix(y_test, pred_boost, labels=['BENIGN', 'DoS slowloris', 'DoS Hulk'])
np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=['BENIGN', 'DoS slowloris', 'DoS Hulk'],normalize= False,  title='Confusion matrix')