In [2]:
import numpy as np
import pandas as pd

In [3]:
import tensorflow as tf

In [4]:
combined_set = pd.read_csv('TimeBasedFeatures-Dataset-15s-AllinOne.csv')

In [5]:
for feature in combined_set.columns: # Loop through all columns in the dataframe
    if combined_set[feature].dtype == 'object': # Only apply for columns with categorical strings
        combined_set[feature] = pd.Categorical(combined_set[feature]).codes

In [6]:
data2 = combined_set

In [7]:
shuffled = data2.sample(frac=1)

In [8]:
predictors = shuffled.iloc[:,:-1]

In [9]:
targets = shuffled.iloc[:,-1]

In [10]:
trainsize = int(len(shuffled['class1']) * 0.8)

In [11]:
testsize = len(shuffled['class1']) - trainsize
npredictors = len(predictors.columns)
noutputs = 1
numiter = 10000
modelfile = '/tmp/trained_model'

In [12]:
from __future__ import print_function
from six.moves import cPickle as pickle
from six.moves import range

In [13]:
num_labels = 7
image_size = 23
def reformat(dataset, labels):
  dataset = dataset.values
  # Map 0 to [1.0, 0.0, 0.0 ...], 1 to [0.0, 1.0, 0.0 ...]
  labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
  return dataset, labels
train_dataset, train_labels = reformat(predictors[:trainsize], targets[:trainsize])
valid_dataset, valid_labels = reformat(predictors[trainsize:trainsize + testsize / 2], targets[trainsize:trainsize + testsize / 2])
test_dataset, test_labels = reformat(predictors[trainsize + testsize / 2:], targets[trainsize + testsize / 2:])
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

Training set (15006, 23) (15006, 7)
Validation set (1876, 23) (1876, 7)
Test set (1876, 23) (1876, 7)
Training set (15006, 23) (15006, 7)
Validation set (1876, 23) (1876, 7)
Test set (1876, 23) (1876, 7)


In [14]:
train_subset = 15006
graph = tf.Graph()
with graph.as_default():
  tf_train_dataset = tf.constant(train_dataset[:train_subset, :].astype(np.float32))
  tf_train_labels = tf.constant(train_labels[:train_subset])
  tf_valid_dataset = tf.constant(valid_dataset.astype(np.float32))
  tf_test_dataset = tf.constant(test_dataset.astype(np.float32))
  weights = tf.Variable(
    tf.truncated_normal([image_size, num_labels]))
  biases = tf.Variable(tf.zeros([num_labels]))
  logits = tf.matmul(tf_train_dataset, weights) + biases
  loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
  optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
  train_prediction = tf.nn.softmax(logits)
  valid_prediction = tf.nn.softmax(
    tf.matmul(tf_valid_dataset, weights) + biases)
  test_prediction = tf.nn.softmax(tf.matmul(tf_test_dataset, weights) + biases)

In [15]:
num_steps = 801
def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])
with tf.Session(graph=graph) as session: 
  tf.initialize_all_variables().run()
  print('Initialized')
  for step in range(num_steps):
    _, l, predictions = session.run([optimizer, loss, train_prediction])
    if (step % 100 == 0):
      print('Loss at step %d: %f' % (step, l))
      print('Training accuracy: %.1f%%' % accuracy(
        predictions, train_labels[:train_subset, :]))
      print('Validation accuracy: %.1f%%' % accuracy(
        valid_prediction.eval(), valid_labels))
  print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))

Initialized
Loss at step 0: 15968755.000000
Training accuracy: 30.9%
Validation accuracy: 26.4%
Initialized
Loss at step 0: 15968755.000000
Training accuracy: 30.9%
Validation accuracy: 26.4%
Loss at step 100: 3065381126144.000000
Training accuracy: 47.1%
Validation accuracy: 32.5%
Loss at step 100: 3065381126144.000000
Training accuracy: 47.1%
Validation accuracy: 32.5%
Loss at step 200: 15248485515264.000000
Training accuracy: 35.5%
Validation accuracy: 41.6%
Loss at step 200: 15248485515264.000000
Training accuracy: 35.5%
Validation accuracy: 41.6%
Loss at step 300: 4114921029632.000000
Training accuracy: 48.7%
Validation accuracy: 48.3%
Loss at step 300: 4114921029632.000000
Training accuracy: 48.7%
Validation accuracy: 48.3%
Loss at step 400: 13390432960512.000000
Training accuracy: 55.1%
Validation accuracy: 24.1%
Loss at step 400: 13390432960512.000000
Training accuracy: 55.1%
Validation accuracy: 24.1%
Loss at step 500: 4474676969472.000000
Training accuracy: 50.0%
Validation a

In [16]:
batch_size = 128
graph = tf.Graph()
train_dataset = train_dataset.astype(np.float32)
valid_dataset = valid_dataset.astype(np.float32)
test_dataset = test_dataset.astype(np.float32)
with graph.as_default():
  tf_train_dataset = tf.placeholder(tf.float32,
                                    shape=(batch_size, image_size))
  tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
  tf_valid_dataset = tf.constant(valid_dataset)
  tf_test_dataset = tf.constant(test_dataset)
  weights = tf.Variable(
    tf.truncated_normal([image_size, num_labels]))
  biases = tf.Variable(tf.zeros([num_labels]))
  logits = tf.matmul(tf_train_dataset, weights) + biases
  loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
  optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
  train_prediction = tf.nn.softmax(logits)
  valid_prediction = tf.nn.softmax(
    tf.matmul(tf_valid_dataset, weights) + biases)
  test_prediction = tf.nn.softmax(tf.matmul(tf_test_dataset, weights) + biases)

In [17]:
num_steps = 3001
with tf.Session(graph=graph) as session:
  tf.initialize_all_variables().run()
  print("Initialized")
  for step in range(num_steps):
    offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    batch_data = train_dataset[offset:(offset + batch_size), :]
    batch_labels = train_labels[offset:(offset + batch_size), :]
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    _, l, predictions = session.run(
      [optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step % 500 == 0):
      print("Minibatch loss at step %d: %f" % (step, l))
      print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
      print("Validation accuracy: %.1f%%" % accuracy(
        valid_prediction.eval(), valid_labels))
  print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))

Initialized
Minibatch loss at step 0: 41860256.000000
Minibatch accuracy: 10.2%
Validation accuracy: 48.9%
Initialized
Minibatch loss at step 0: 41860256.000000
Minibatch accuracy: 10.2%
Validation accuracy: 48.9%
Minibatch loss at step 500: 26188967837696.000000
Minibatch accuracy: 25.0%
Validation accuracy: 47.7%
Minibatch loss at step 500: 26188967837696.000000
Minibatch accuracy: 25.0%
Validation accuracy: 47.7%
Minibatch loss at step 1000: 14405175607296.000000
Minibatch accuracy: 50.0%
Validation accuracy: 24.8%
Minibatch loss at step 1000: 14405175607296.000000
Minibatch accuracy: 50.0%
Validation accuracy: 24.8%
Minibatch loss at step 1500: 5069009321984.000000
Minibatch accuracy: 52.3%
Validation accuracy: 42.1%
Minibatch loss at step 1500: 5069009321984.000000
Minibatch accuracy: 52.3%
Validation accuracy: 42.1%
Minibatch loss at step 2000: 7049229369344.000000
Minibatch accuracy: 54.7%
Validation accuracy: 47.8%
Minibatch loss at step 2000: 7049229369344.000000
Minibatch acc

In [18]:
d1 = pd.read_csv('TimeBasedFeatures-Dataset-15s-AllinOne.csv')

In [19]:
d1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18758 entries, 0 to 18757
Data columns (total 24 columns):
duration              18758 non-null int64
total_fiat            18758 non-null int64
total_biat            18758 non-null int64
min_fiat              18758 non-null int64
min_biat              18758 non-null int64
max_fiat              18758 non-null float64
max_biat              18758 non-null float64
mean_fiat             18758 non-null float64
mean_biat             18758 non-null float64
flowPktsPerSecond     18758 non-null float64
flowBytesPerSecond    18758 non-null float64
min_flowiat           18758 non-null int64
max_flowiat           18758 non-null int64
mean_flowiat          18758 non-null float64
std_flowiat           18758 non-null float64
min_active            18758 non-null int64
mean_active           18758 non-null float64
max_active            18758 non-null int64
std_active            18758 non-null float64
min_idle              18758 non-null int64
mean_idle  

In [20]:
d1.describe()

Unnamed: 0,duration,total_fiat,total_biat,min_fiat,min_biat,max_fiat,max_biat,mean_fiat,mean_biat,flowPktsPerSecond,...,mean_flowiat,std_flowiat,min_active,mean_active,max_active,std_active,min_idle,mean_idle,max_idle,std_idle
count,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,...,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0
mean,9791705.0,608678.3,626780.2,3314204.0,2861716.0,1003929.0,921825.0,844828.6,613357.8,2073.780095,...,476816.7,1035025.0,3253846.0,3635654.0,4091204.0,502500.1,2983753.0,3325142.0,3732466.0,453138.9
std,14384580.0,2243195.0,2326442.0,10621150.0,9652561.0,3141855.0,2495984.0,4336559.0,2516395.0,19115.713624,...,1404428.0,3534085.0,13429550.0,13561620.0,14002110.0,2813959.0,13343230.0,13474300.0,13901550.0,2785573.0
min,0.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0
25%,334669.0,7.0,2.0,25970.5,24542.25,11543.33,5626.786,114.5469,0.0,2.59919,...,9577.449,5574.602,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0
50%,11092630.0,62.0,21.0,282143.0,286580.0,51049.59,35262.5,11649.16,9605.91,11.68122,...,84126.02,75181.75,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0
75%,14945370.0,1645.0,1651.0,4289454.0,4265465.0,526030.2,484372.6,605594.0,534037.0,100.671695,...,311933.0,765921.3,2767727.0,4248512.0,5690602.0,0.0,1943930.0,3333185.0,4696002.0,0.0
max,601405000.0,37680790.0,43002380.0,303595700.0,600109700.0,152000000.0,43000000.0,215000000.0,98000000.0,1000000.0,...,60700000.0,136000000.0,601405000.0,601000000.0,601405000.0,168000000.0,600109700.0,600000000.0,600109700.0,168000000.0


Unnamed: 0,duration,total_fiat,total_biat,min_fiat,min_biat,max_fiat,max_biat,mean_fiat,mean_biat,flowPktsPerSecond,...,mean_flowiat,std_flowiat,min_active,mean_active,max_active,std_active,min_idle,mean_idle,max_idle,std_idle
count,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,...,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0
mean,9791705.0,608678.3,626780.2,3314204.0,2861716.0,1003929.0,921825.0,844828.6,613357.8,2073.780095,...,476816.7,1035025.0,3253846.0,3635654.0,4091204.0,502500.1,2983753.0,3325142.0,3732466.0,453138.9
std,14384580.0,2243195.0,2326442.0,10621150.0,9652561.0,3141855.0,2495984.0,4336559.0,2516395.0,19115.713624,...,1404428.0,3534085.0,13429550.0,13561620.0,14002110.0,2813959.0,13343230.0,13474300.0,13901550.0,2785573.0
min,0.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0
25%,334669.0,7.0,2.0,25970.5,24542.25,11543.33,5626.786,114.5469,0.0,2.59919,...,9577.449,5574.602,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0
50%,11092630.0,62.0,21.0,282143.0,286580.0,51049.59,35262.5,11649.16,9605.91,11.68122,...,84126.02,75181.75,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0
75%,14945370.0,1645.0,1651.0,4289454.0,4265465.0,526030.2,484372.6,605594.0,534037.0,100.671695,...,311933.0,765921.3,2767727.0,4248512.0,5690602.0,0.0,1943930.0,3333185.0,4696002.0,0.0
max,601405000.0,37680790.0,43002380.0,303595700.0,600109700.0,152000000.0,43000000.0,215000000.0,98000000.0,1000000.0,...,60700000.0,136000000.0,601405000.0,601000000.0,601405000.0,168000000.0,600109700.0,600000000.0,600109700.0,168000000.0
