In [1]:
from main import train_and_test_with, train_with_tensorflow
from utils import *

# Handy list of all the files
binet_files = get_binetflow_files()


def print_result(file, data):
    """ Just prints out the data in a nice way """
    print("For %s" % file)
    print("=" * 30)
    for key, value in sorted(data.items()):
        print("%s \t\t= %s" % (key, value))
    print("")


def evaluate_ml(interval, files, ml):
    """ Given the parameters, run the machine learning algorithm
        and print out the metrics.
    """
    if ml == 'tf':
        for file in files:
            time  = get_start_time_for(file)
            features, labels = get_feature_labels(get_saved_data(interval, file))
            result = {'accuracy': train_with_tensorflow(features, labels)}
            print_result(file, result)
    else:
        for file in files:
            time  = get_start_time_for(file)
            features, labels = get_feature_labels(get_saved_data(interval, file))
            result = result = train_and_test_with(features, labels, ml)
            print_result(file, result)


Using TensorFlow backend.


# Evaluating Machine Learning Algorithms

Point of this notebook is to compare how different algorithms perform on all the files. Change the **interval** to see how it does in other intervals. But you should leave the machine learning algorithm the same for each section. 


## Decision Trees

In [2]:
evaluate_ml(1, binet_files, 'dt')


saved_data/saved_1s_10.pk1
For /media/thiago/ubuntu/datasets/network/stratosphere_botnet_2011/ctu_13/raw/capture20110810.binetflow
0 		= 6617, 6640
1 		= 4396, 4374
attacks 		= 8770
f1 score 		= 0.6430949374354265
normal count 		= 13257
precision 		= 0.645838136961033
recall 		= 0.6403749428440787
training size 		= 11013

saved_data/saved_1s_11.pk1
For /media/thiago/ubuntu/datasets/network/stratosphere_botnet_2011/ctu_13/raw/capture20110811.binetflow
0 		= 5053, 5085
1 		= 2373, 2341
attacks 		= 4714
f1 score 		= 0.5449050086355786
normal count 		= 10138
precision 		= 0.550851156700131
recall 		= 0.5390858607432721
training size 		= 7426

saved_data/saved_1s_12.pk1
For /media/thiago/ubuntu/datasets/network/stratosphere_botnet_2011/ctu_13/raw/capture20110812.binetflow
0 		= 109669, 109471
1 		= 10142, 10340
attacks 		= 20482
f1 score 		= 0.7326105252735283
normal count 		= 219140
precision 		= 0.750481793285323
recall 		= 0.7155705996131528
training size 		= 119811

saved_data/saved_1s_

## Random Forest

In [3]:
evaluate_ml(1, binet_files, 'rf')


saved_data/saved_1s_10.pk1
For /media/thiago/ubuntu/datasets/network/stratosphere_botnet_2011/ctu_13/raw/capture20110810.binetflow
0 		= 6617, 6640
1 		= 4396, 4374
attacks 		= 8770
f1 score 		= 0.6835844567803331
normal count 		= 13257
precision 		= 0.8101503759398496
recall 		= 0.5912208504801097
training size 		= 11013

saved_data/saved_1s_11.pk1
For /media/thiago/ubuntu/datasets/network/stratosphere_botnet_2011/ctu_13/raw/capture20110811.binetflow
0 		= 5053, 5085
1 		= 2373, 2341
attacks 		= 4714
f1 score 		= 0.5679287305122496
normal count 		= 10138
precision 		= 0.815347721822542
recall 		= 0.43571123451516447
training size 		= 7426

saved_data/saved_1s_12.pk1
For /media/thiago/ubuntu/datasets/network/stratosphere_botnet_2011/ctu_13/raw/capture20110812.binetflow
0 		= 109669, 109471
1 		= 10142, 10340
attacks 		= 20482
f1 score 		= 0.813783388416319
normal count 		= 219140
precision 		= 0.9252186768510533
recall 		= 0.7263056092843327
training size 		= 119811

saved_data/saved_1

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


For /media/thiago/ubuntu/datasets/network/stratosphere_botnet_2011/ctu_13/raw/capture20110816-2.binetflow
0 		= 608, 608
1 		= 11, 12
attacks 		= 23
f1 score 		= 0.0
normal count 		= 1216
precision 		= 0.0
recall 		= 0.0
training size 		= 619

saved_data/saved_1s_16-3.pk1
For /media/thiago/ubuntu/datasets/network/stratosphere_botnet_2011/ctu_13/raw/capture20110816-3.binetflow
0 		= 33708, 33683
1 		= 1209, 1234
attacks 		= 2443
f1 score 		= 0.25017667844522967
normal count 		= 67391
precision 		= 0.9779005524861878
recall 		= 0.14343598055105347
training size 		= 34917

saved_data/saved_1s_17.pk1
For /media/thiago/ubuntu/datasets/network/stratosphere_botnet_2011/ctu_13/raw/capture20110817.binetflow
0 		= 4653, 4676
1 		= 4675, 4653
attacks 		= 9328
f1 score 		= 0.9350822587816808
normal count 		= 9329
precision 		= 0.968454985033387
recall 		= 0.903932946486138
training size 		= 9328

saved_data/saved_1s_18.pk1
For /media/thiago/ubuntu/datasets/network/stratosphere_botnet_2011/ctu_13/r

## Tensorflow

In [4]:
evaluate_ml(1, binet_files, 'tf')


saved_data/saved_1s_10.pk1
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



  'precision', 'predicted', average, warn_for)


For /media/thiago/ubuntu/datasets/network/stratosphere_botnet_2011/ctu_13/raw/capture20110810.binetflow
accuracy 		= (0.0, 0.6028691)

saved_data/saved_1s_11.pk1
For /media/thiago/ubuntu/datasets/network/stratosphere_botnet_2011/ctu_13/raw/capture20110811.binetflow
accuracy 		= (0.0, 0.6847563)

saved_data/saved_1s_12.pk1
For /media/thiago/ubuntu/datasets/network/stratosphere_botnet_2011/ctu_13/raw/capture20110812.binetflow
accuracy 		= (0.0, 0.9136974)

saved_data/saved_1s_15.pk1
For /media/thiago/ubuntu/datasets/network/stratosphere_botnet_2011/ctu_13/raw/capture20110815.binetflow
accuracy 		= (0.0, 0.9295681)

saved_data/saved_1s_15-2.pk1
For /media/thiago/ubuntu/datasets/network/stratosphere_botnet_2011/ctu_13/raw/capture20110815-2.binetflow
accuracy 		= (0.0, 0.69777775)

saved_data/saved_1s_16.pk1
For /media/thiago/ubuntu/datasets/network/stratosphere_botnet_2011/ctu_13/raw/capture20110816.binetflow
accuracy 		= (0.0, 0.90254796)

saved_data/saved_1s_16-2.pk1
For /media/thiago/ub