In [1]:
import os
import random
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn import tree
from sklearn.naive_bayes import MultinomialNB
from sklearn import svm
from sklearn.model_selection import cross_val_score
#from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

DATA_DIR = '../local_data/features/'

X = np.load(os.path.join(DATA_DIR, 'features-new.npy'))
y = np.load(os.path.join(DATA_DIR, 'labels-new.npy'))[0]

In [2]:
def classify(clf, n_fold=False):
    if n_fold == True:
        scores = cross_val_score(clf, X, y, cv=3)
        print("Accuracy: %0.4f (+/- %0.4f)" % (scores.mean()*100, scores.std() * 200))
    else:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

        # Fit the model
        clf.fit(X_train, y_train)

        # Perform the predictions
        y_predicted = clf.predict(X_test)

        from sklearn.metrics import accuracy_score
        print("Accuracy = {} %".format(accuracy_score(y_test, y_predicted)*100))

        from sklearn.metrics import classification_report
        print("Classification Report \n{}".format(classification_report(y_test, y_predicted)))

In [3]:
%%time
## Decision Tree Classifier w/ Max Depth of 3
clf = tree.DecisionTreeClassifier(max_depth=3)
classify(clf)

Accuracy = 98.17170771265555 %
Classification Report 
                 precision    recall  f1-score   support

         Normal       0.99      0.99      0.99    500742
    brute_force       0.00      0.00      0.00       524
           ddos       0.00      0.00      0.00       931
       internal       0.48      0.59      0.53      4921
irc_botnet_ddos       0.92      0.89      0.90      9471
ssh_brute_force       0.00      0.00      0.00      1326

    avg / total       0.98      0.98      0.98    517915

CPU times: user 7.34 s, sys: 756 ms, total: 8.09 s
Wall time: 8.47 s


  'precision', 'predicted', average, warn_for)


In [4]:
%%time
## Decision Tree Classifier w/ no Max Depth

clf = tree.DecisionTreeClassifier()
classify(clf)

Accuracy = 99.69357906220134 %
Classification Report 
                 precision    recall  f1-score   support

         Normal       1.00      1.00      1.00    500829
    brute_force       0.98      0.99      0.99       524
           ddos       0.87      0.62      0.73       960
       internal       0.93      0.92      0.92      5066
irc_botnet_ddos       0.96      0.96      0.96      9274
ssh_brute_force       0.99      0.99      0.99      1262

    avg / total       1.00      1.00      1.00    517915

CPU times: user 17.9 s, sys: 896 ms, total: 18.8 s
Wall time: 21 s


In [5]:
%%time
## Random Forests 5,10,25,100 Trees Classifier

num_trees = [5, 10, 25, 100]
for n in num_trees:
    print("N = {} trees".format(n))
    clf = RandomForestClassifier(n_estimators=n, n_jobs=-1)
    classify(clf)

N = 5 trees
Accuracy = 99.70941177606363 %
Classification Report 
                 precision    recall  f1-score   support

         Normal       1.00      1.00      1.00    500569
    brute_force       0.98      0.99      0.99       510
           ddos       0.88      0.63      0.73       922
       internal       0.93      0.92      0.93      5064
irc_botnet_ddos       0.97      0.96      0.96      9517
ssh_brute_force       0.99      0.99      0.99      1333

    avg / total       1.00      1.00      1.00    517915

N = 10 trees
Accuracy = 99.71230800420918 %
Classification Report 
                 precision    recall  f1-score   support

         Normal       1.00      1.00      1.00    500548
    brute_force       0.97      1.00      0.99       533
           ddos       0.90      0.65      0.75       952
       internal       0.94      0.92      0.93      5142
irc_botnet_ddos       0.97      0.96      0.96      9438
ssh_brute_force       0.99      0.99      0.99      1302

    avg

In [6]:
%%time
## Naive Bayes

clf = MultinomialNB()
classify(clf)

Accuracy = 11.365378488748155 %
Classification Report 
                 precision    recall  f1-score   support

         Normal       1.00      0.10      0.18    500740
    brute_force       0.01      0.97      0.02       539
           ddos       0.00      0.00      0.00       956
       internal       0.00      0.06      0.01      5066
irc_botnet_ddos       0.11      0.99      0.19      9351
ssh_brute_force       0.01      0.58      0.01      1263

    avg / total       0.97      0.11      0.17    517915

CPU times: user 5.58 s, sys: 894 ms, total: 6.48 s
Wall time: 6.85 s
