In [9]:
import os
import random
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn import tree
from sklearn.naive_bayes import MultinomialNB
from sklearn import svm
from sklearn.model_selection import cross_val_score
#from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

DATA_DIR = '../local_data/features/'

X = np.load(os.path.join(DATA_DIR, 'features.npy'))
y = np.load(os.path.join(DATA_DIR, 'labels.npy'))[0]

In [14]:
def classify(clf, n_fold=False):
    if n_fold == True:
        scores = cross_val_score(clf, X, y, cv=3)
        print("Accuracy: %0.4f (+/- %0.4f)" % (scores.mean()*100, scores.std() * 200))
    else:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

        # Fit the model
        clf.fit(X_train, y_train)

        # Perform the predictions
        y_predicted = clf.predict(X_test)

        from sklearn.metrics import accuracy_score
        print("Accuracy = {} %".format(accuracy_score(y_test, y_predicted)*100))

        from sklearn.metrics import classification_report
        print("Classification Report \n{}".format(classification_report(y_test, y_predicted)))

In [15]:
%%time
## Decision Tree Classifier w/ Max Depth of 3
clf = tree.DecisionTreeClassifier(max_depth=3)
classify(clf)

Accuracy = 98.19082281841615 %
Classification Report 
                 precision    recall  f1-score   support

         Normal       0.99      0.99      0.99    500626
    brute_force       0.00      0.00      0.00       523
           ddos       0.00      0.00      0.00       919
       internal       0.49      0.59      0.53      5085
irc_botnet_ddos       0.92      0.90      0.91      9522
ssh_brute_force       0.00      0.00      0.00      1240

    avg / total       0.98      0.98      0.98    517915

CPU times: user 6.3 s, sys: 355 ms, total: 6.65 s
Wall time: 6.8 s


  'precision', 'predicted', average, warn_for)


In [4]:
%%time
## Decision Tree Classifier w/ no Max Depth

clf = tree.DecisionTreeClassifier()
classify(clf)

Accuracy = 99.4091694583088 %
Classification Report 
                 precision    recall  f1-score   support

         Normal       1.00      1.00      1.00    500607
    brute_force       1.00      0.98      0.99       525
           ddos       0.94      0.53      0.68       960
       internal       0.83      0.74      0.79      5022
irc_botnet_ddos       0.96      0.95      0.96      9445
ssh_brute_force       0.98      0.98      0.98      1356

    avg / total       0.99      0.99      0.99    517915

CPU times: user 17.3 s, sys: 468 ms, total: 17.8 s
Wall time: 17.9 s


In [5]:
%%time
## Random Forests 5,10,25,100 Trees Classifier

num_trees = [5, 10, 25, 100]
for n in num_trees:
    print("N = {} trees".format(n))
    clf = RandomForestClassifier(n_estimators=n, n_jobs=-1)
    classify(clf)

N =  0
Accuracy = 99.40009461011941 %
Classification Report 
                 precision    recall  f1-score   support

         Normal       1.00      1.00      1.00    500732
    brute_force       0.99      0.98      0.98       513
           ddos       0.94      0.50      0.65       920
       internal       0.89      0.68      0.77      5139
irc_botnet_ddos       0.96      0.96      0.96      9308
ssh_brute_force       0.99      0.97      0.98      1303

    avg / total       0.99      0.99      0.99    517915

N =  1
Accuracy = 99.40608014828688 %
Classification Report 
                 precision    recall  f1-score   support

         Normal       1.00      1.00      1.00    500633
    brute_force       0.99      0.97      0.98       520
           ddos       0.96      0.52      0.67       984
       internal       0.83      0.73      0.78      5108
irc_botnet_ddos       0.97      0.96      0.96      9358
ssh_brute_force       0.99      0.98      0.98      1312

    avg / total   

In [6]:
%%time
## Naive Bayes

clf = MultinomialNB()
classify(clf)

Accuracy = 23.545176332023594 %
Classification Report 
                 precision    recall  f1-score   support

         Normal       1.00      0.22      0.36    500818
    brute_force       0.00      0.96      0.01       520
           ddos       0.01      0.61      0.01       949
       internal       0.00      0.00      0.00      5067
irc_botnet_ddos       0.09      0.99      0.17      9291
ssh_brute_force       0.01      0.30      0.01      1270

    avg / total       0.97      0.24      0.35    517915

CPU times: user 5.03 s, sys: 641 ms, total: 5.67 s
Wall time: 5.91 s
