In [1]:
import os
import random
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn import tree
from sklearn.naive_bayes import MultinomialNB
from sklearn import svm
from sklearn.model_selection import cross_val_score
#from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

DATA_DIR = '../local_data/features/'

X = np.load(os.path.join(DATA_DIR, 'features-new.npy'))
y = np.load(os.path.join(DATA_DIR, 'labels-new.npy'))[0]
N_FOLD = True

In [2]:
def classify(clf):
    if N_FOLD == True:
        scores = cross_val_score(clf, X, y, cv=3)
        print("Accuracy: %0.4f (+/- %0.4f)" % (scores.mean()*100, scores.std() * 200))
    
    ## Simple classification
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

    # Fit the model
    clf.fit(X_train, y_train)

    # Perform the predictions
    y_predicted = clf.predict(X_test)

    from sklearn.metrics import accuracy_score
    print("Accuracy = {} %".format(accuracy_score(y_test, y_predicted)*100))

    from sklearn.metrics import classification_report
    print("Classification Report \n{}".format(classification_report(y_test, y_predicted)))

In [3]:
%%time
## Decision Tree Classifier w/ Max Depth of 3
clf = tree.DecisionTreeClassifier(max_depth=3)
classify(clf)

Accuracy: 97.8361 (+/- 0.6994)
Accuracy = 98.1771140051939 %
Classification Report 
                 precision    recall  f1-score   support

         Normal       0.99      0.99      0.99    500783
    brute_force       0.00      0.00      0.00       505
           ddos       0.00      0.00      0.00       958
       internal       0.49      0.58      0.53      5073
irc_botnet_ddos       0.93      0.89      0.91      9313
ssh_brute_force       0.00      0.00      0.00      1283

    avg / total       0.98      0.98      0.98    517915

CPU times: user 18.9 s, sys: 1.3 s, total: 20.2 s
Wall time: 20.3 s


  'precision', 'predicted', average, warn_for)


In [4]:
%%time
## Decision Tree Classifier w/ no Max Depth

clf = tree.DecisionTreeClassifier()
classify(clf)

Accuracy: 99.1584 (+/- 0.4450)
Accuracy = 99.69222748906674 %
Classification Report 
                 precision    recall  f1-score   support

         Normal       1.00      1.00      1.00    500668
    brute_force       0.99      0.99      0.99       520
           ddos       0.88      0.64      0.74      1001
       internal       0.93      0.93      0.93      5033
irc_botnet_ddos       0.96      0.95      0.96      9370
ssh_brute_force       1.00      0.99      0.99      1323

    avg / total       1.00      1.00      1.00    517915

CPU times: user 45.8 s, sys: 1.6 s, total: 47.4 s
Wall time: 50.1 s


In [5]:
%%time
## Random Forests 5,10,25,100 Trees Classifier

num_trees = [5, 10, 25, 100]
for n in num_trees:
    print("N = {} trees".format(n))
    clf = RandomForestClassifier(n_estimators=n, n_jobs=-1)
    classify(clf)

N = 5 trees
Accuracy: 99.1743 (+/- 0.4333)
Accuracy = 99.68913817904482 %
Classification Report 
                 precision    recall  f1-score   support

         Normal       1.00      1.00      1.00    500722
    brute_force       0.99      0.98      0.98       525
           ddos       0.88      0.59      0.70       970
       internal       0.93      0.92      0.92      5093
irc_botnet_ddos       0.96      0.96      0.96      9327
ssh_brute_force       0.99      0.99      0.99      1278

    avg / total       1.00      1.00      1.00    517915

N = 10 trees
Accuracy: 99.1752 (+/- 0.4571)
Accuracy = 99.70883253043453 %
Classification Report 
                 precision    recall  f1-score   support

         Normal       1.00      1.00      1.00    500702
    brute_force       0.98      1.00      0.99       535
           ddos       0.90      0.62      0.74       947
       internal       0.93      0.92      0.93      5073
irc_botnet_ddos       0.97      0.96      0.96      9337
ssh

In [6]:
%%time
## Naive Bayes

clf = MultinomialNB()
classify(clf)

Accuracy: 26.4746 (+/- 14.1993)
Accuracy = 24.97243756214823 %
Classification Report 
                 precision    recall  f1-score   support

         Normal       1.00      0.24      0.38    500644
    brute_force       0.01      0.97      0.03       525
           ddos       0.01      0.82      0.02       937
       internal       0.00      0.07      0.01      5084
irc_botnet_ddos       0.09      0.99      0.17      9407
ssh_brute_force       0.01      0.40      0.02      1318

    avg / total       0.97      0.25      0.37    517915

CPU times: user 14.9 s, sys: 1.92 s, total: 16.8 s
Wall time: 16.5 s
