In [2]:
import numpy as np
import pandas as pd

In [3]:
import tensorflow as tf

In [4]:
combined_set = pd.read_csv('TimeBasedFeatures-Dataset-15s-AllinOne.csv')

In [5]:
for feature in combined_set.columns: # Loop through all columns in the dataframe
    if combined_set[feature].dtype == 'object': # Only apply for columns with categorical strings
        combined_set[feature] = pd.Categorical(combined_set[feature]).codes

In [6]:
data2 = combined_set

In [7]:
shuffled = data2.sample(frac=1)

In [8]:
predictors = shuffled.iloc[:,:-1]

In [9]:
targets = shuffled.iloc[:,-1]

In [10]:
trainsize = int(len(shuffled['class1']) * 0.8)

In [11]:
testsize = len(shuffled['class1']) - trainsize
npredictors = len(predictors.columns)
noutputs = 1
numiter = 10000
modelfile = '/tmp/trained_model'

In [12]:
from __future__ import print_function
from six.moves import cPickle as pickle
from six.moves import range

In [13]:
num_labels = 7
image_size = 23
def reformat(dataset, labels):
  dataset = dataset.values
  # Map 0 to [1.0, 0.0, 0.0 ...], 1 to [0.0, 1.0, 0.0 ...]
  labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
  return dataset, labels
train_dataset, train_labels = reformat(predictors[:trainsize], targets[:trainsize])
valid_dataset, valid_labels = reformat(predictors[trainsize:trainsize + testsize / 2], targets[trainsize:trainsize + testsize / 2])
test_dataset, test_labels = reformat(predictors[trainsize + testsize / 2:], targets[trainsize + testsize / 2:])
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

Training set (15006, 23) (15006, 7)
Validation set (1876, 23) (1876, 7)
Test set (1876, 23) (1876, 7)
Training set (15006, 23) (15006, 7)
Validation set (1876, 23) (1876, 7)
Test set (1876, 23) (1876, 7)


In [14]:
train_subset = 15006
graph = tf.Graph()
with graph.as_default():
  tf_train_dataset = tf.constant(train_dataset[:train_subset, :].astype(np.float32))
  tf_train_labels = tf.constant(train_labels[:train_subset])
  tf_valid_dataset = tf.constant(valid_dataset.astype(np.float32))
  tf_test_dataset = tf.constant(test_dataset.astype(np.float32))
  weights = tf.Variable(
    tf.truncated_normal([image_size, num_labels]))
  biases = tf.Variable(tf.zeros([num_labels]))
  logits = tf.matmul(tf_train_dataset, weights) + biases
  loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
  optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
  train_prediction = tf.nn.softmax(logits)
  valid_prediction = tf.nn.softmax(
    tf.matmul(tf_valid_dataset, weights) + biases)
  test_prediction = tf.nn.softmax(tf.matmul(tf_test_dataset, weights) + biases)

In [15]:
num_steps = 801
def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])
with tf.Session(graph=graph) as session: 
  tf.initialize_all_variables().run()
  print('Initialized')
  for step in range(num_steps):
    _, l, predictions = session.run([optimizer, loss, train_prediction])
    if (step % 100 == 0):
      print('Loss at step %d: %f' % (step, l))
      print('Training accuracy: %.1f%%' % accuracy(
        predictions, train_labels[:train_subset, :]))
      print('Validation accuracy: %.1f%%' % accuracy(
        valid_prediction.eval(), valid_labels))
  print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))

Initialized
Loss at step 0: 15968755.000000
Training accuracy: 30.9%
Validation accuracy: 26.4%
Initialized
Loss at step 0: 15968755.000000
Training accuracy: 30.9%
Validation accuracy: 26.4%
Loss at step 100: 3065381126144.000000
Training accuracy: 47.1%
Validation accuracy: 32.5%
Loss at step 100: 3065381126144.000000
Training accuracy: 47.1%
Validation accuracy: 32.5%
Loss at step 200: 15248485515264.000000
Training accuracy: 35.5%
Validation accuracy: 41.6%
Loss at step 200: 15248485515264.000000
Training accuracy: 35.5%
Validation accuracy: 41.6%
Loss at step 300: 4114921029632.000000
Training accuracy: 48.7%
Validation accuracy: 48.3%
Loss at step 300: 4114921029632.000000
Training accuracy: 48.7%
Validation accuracy: 48.3%
Loss at step 400: 13390432960512.000000
Training accuracy: 55.1%
Validation accuracy: 24.1%
Loss at step 400: 13390432960512.000000
Training accuracy: 55.1%
Validation accuracy: 24.1%
Loss at step 500: 4474676969472.000000
Training accuracy: 50.0%
Validation a

In [16]:
batch_size = 128
graph = tf.Graph()
train_dataset = train_dataset.astype(np.float32)
valid_dataset = valid_dataset.astype(np.float32)
test_dataset = test_dataset.astype(np.float32)
with graph.as_default():
  tf_train_dataset = tf.placeholder(tf.float32,
                                    shape=(batch_size, image_size))
  tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
  tf_valid_dataset = tf.constant(valid_dataset)
  tf_test_dataset = tf.constant(test_dataset)
  weights = tf.Variable(
    tf.truncated_normal([image_size, num_labels]))
  biases = tf.Variable(tf.zeros([num_labels]))
  logits = tf.matmul(tf_train_dataset, weights) + biases
  loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
  optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
  train_prediction = tf.nn.softmax(logits)
  valid_prediction = tf.nn.softmax(
    tf.matmul(tf_valid_dataset, weights) + biases)
  test_prediction = tf.nn.softmax(tf.matmul(tf_test_dataset, weights) + biases)

In [17]:
num_steps = 3001
with tf.Session(graph=graph) as session:
  tf.initialize_all_variables().run()
  print("Initialized")
  for step in range(num_steps):
    offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    batch_data = train_dataset[offset:(offset + batch_size), :]
    batch_labels = train_labels[offset:(offset + batch_size), :]
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    _, l, predictions = session.run(
      [optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step % 500 == 0):
      print("Minibatch loss at step %d: %f" % (step, l))
      print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
      print("Validation accuracy: %.1f%%" % accuracy(
        valid_prediction.eval(), valid_labels))
  print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))

Initialized
Minibatch loss at step 0: 41860256.000000
Minibatch accuracy: 10.2%
Validation accuracy: 48.9%
Initialized
Minibatch loss at step 0: 41860256.000000
Minibatch accuracy: 10.2%
Validation accuracy: 48.9%
Minibatch loss at step 500: 26188967837696.000000
Minibatch accuracy: 25.0%
Validation accuracy: 47.7%
Minibatch loss at step 500: 26188967837696.000000
Minibatch accuracy: 25.0%
Validation accuracy: 47.7%
Minibatch loss at step 1000: 14405175607296.000000
Minibatch accuracy: 50.0%
Validation accuracy: 24.8%
Minibatch loss at step 1000: 14405175607296.000000
Minibatch accuracy: 50.0%
Validation accuracy: 24.8%
Minibatch loss at step 1500: 5069009321984.000000
Minibatch accuracy: 52.3%
Validation accuracy: 42.1%
Minibatch loss at step 1500: 5069009321984.000000
Minibatch accuracy: 52.3%
Validation accuracy: 42.1%
Minibatch loss at step 2000: 7049229369344.000000
Minibatch accuracy: 54.7%
Validation accuracy: 47.8%
Minibatch loss at step 2000: 7049229369344.000000
Minibatch acc

In [18]:
d1 = pd.read_csv('TimeBasedFeatures-Dataset-15s-AllinOne.csv')

In [19]:
d1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18758 entries, 0 to 18757
Data columns (total 24 columns):
duration              18758 non-null int64
total_fiat            18758 non-null int64
total_biat            18758 non-null int64
min_fiat              18758 non-null int64
min_biat              18758 non-null int64
max_fiat              18758 non-null float64
max_biat              18758 non-null float64
mean_fiat             18758 non-null float64
mean_biat             18758 non-null float64
flowPktsPerSecond     18758 non-null float64
flowBytesPerSecond    18758 non-null float64
min_flowiat           18758 non-null int64
max_flowiat           18758 non-null int64
mean_flowiat          18758 non-null float64
std_flowiat           18758 non-null float64
min_active            18758 non-null int64
mean_active           18758 non-null float64
max_active            18758 non-null int64
std_active            18758 non-null float64
min_idle              18758 non-null int64
mean_idle  

In [20]:
d1.describe()

Unnamed: 0,duration,total_fiat,total_biat,min_fiat,min_biat,max_fiat,max_biat,mean_fiat,mean_biat,flowPktsPerSecond,...,mean_flowiat,std_flowiat,min_active,mean_active,max_active,std_active,min_idle,mean_idle,max_idle,std_idle
count,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,...,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0
mean,9791705.0,608678.3,626780.2,3314204.0,2861716.0,1003929.0,921825.0,844828.6,613357.8,2073.780095,...,476816.7,1035025.0,3253846.0,3635654.0,4091204.0,502500.1,2983753.0,3325142.0,3732466.0,453138.9
std,14384580.0,2243195.0,2326442.0,10621150.0,9652561.0,3141855.0,2495984.0,4336559.0,2516395.0,19115.713624,...,1404428.0,3534085.0,13429550.0,13561620.0,14002110.0,2813959.0,13343230.0,13474300.0,13901550.0,2785573.0
min,0.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0
25%,334669.0,7.0,2.0,25970.5,24542.25,11543.33,5626.786,114.5469,0.0,2.59919,...,9577.449,5574.602,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0
50%,11092630.0,62.0,21.0,282143.0,286580.0,51049.59,35262.5,11649.16,9605.91,11.68122,...,84126.02,75181.75,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0
75%,14945370.0,1645.0,1651.0,4289454.0,4265465.0,526030.2,484372.6,605594.0,534037.0,100.671695,...,311933.0,765921.3,2767727.0,4248512.0,5690602.0,0.0,1943930.0,3333185.0,4696002.0,0.0
max,601405000.0,37680790.0,43002380.0,303595700.0,600109700.0,152000000.0,43000000.0,215000000.0,98000000.0,1000000.0,...,60700000.0,136000000.0,601405000.0,601000000.0,601405000.0,168000000.0,600109700.0,600000000.0,600109700.0,168000000.0


Unnamed: 0,duration,total_fiat,total_biat,min_fiat,min_biat,max_fiat,max_biat,mean_fiat,mean_biat,flowPktsPerSecond,...,mean_flowiat,std_flowiat,min_active,mean_active,max_active,std_active,min_idle,mean_idle,max_idle,std_idle
count,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,...,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0,18758.0
mean,9791705.0,608678.3,626780.2,3314204.0,2861716.0,1003929.0,921825.0,844828.6,613357.8,2073.780095,...,476816.7,1035025.0,3253846.0,3635654.0,4091204.0,502500.1,2983753.0,3325142.0,3732466.0,453138.9
std,14384580.0,2243195.0,2326442.0,10621150.0,9652561.0,3141855.0,2495984.0,4336559.0,2516395.0,19115.713624,...,1404428.0,3534085.0,13429550.0,13561620.0,14002110.0,2813959.0,13343230.0,13474300.0,13901550.0,2785573.0
min,0.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0
25%,334669.0,7.0,2.0,25970.5,24542.25,11543.33,5626.786,114.5469,0.0,2.59919,...,9577.449,5574.602,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0
50%,11092630.0,62.0,21.0,282143.0,286580.0,51049.59,35262.5,11649.16,9605.91,11.68122,...,84126.02,75181.75,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0
75%,14945370.0,1645.0,1651.0,4289454.0,4265465.0,526030.2,484372.6,605594.0,534037.0,100.671695,...,311933.0,765921.3,2767727.0,4248512.0,5690602.0,0.0,1943930.0,3333185.0,4696002.0,0.0
max,601405000.0,37680790.0,43002380.0,303595700.0,600109700.0,152000000.0,43000000.0,215000000.0,98000000.0,1000000.0,...,60700000.0,136000000.0,601405000.0,601000000.0,601405000.0,168000000.0,600109700.0,600000000.0,600109700.0,168000000.0


In [21]:
d1.head()

Unnamed: 0,duration,total_fiat,total_biat,min_fiat,min_biat,max_fiat,max_biat,mean_fiat,mean_biat,flowPktsPerSecond,...,std_flowiat,min_active,mean_active,max_active,std_active,min_idle,mean_idle,max_idle,std_idle,class1
0,9368711,16,4,1564818,1549373,190205.285714,203290.456522,389822.391917,370323.719754,10.353612,...,267600.198443,1871488,1983656.0,2195089,183219.7,1234883,1420565.0,1523088,161096.539275,CHAT
1,7340238,18,4,1567554,1527893,165686.977273,186914.846154,317267.548742,304370.651301,11.580006,...,221462.862028,1491627,3572433.0,5653239,2942704.0,1131498,1324636.0,1517774,273138.379008,CHAT
2,4644225,29,15,1270547,1079974,165865.178571,195302.130435,329473.126261,300492.588227,11.412022,...,217475.425246,1758922,1758922.0,1758922,0.0,1079974,1079974.0,1079974,0.0,CHAT
3,4978735,19,8,2492050,2457286,239543.25,276596.388889,612435.304238,628339.573544,8.034169,...,436959.716436,1710925,2382905.0,3054885,950323.2,1346073,1894031.5,2441990,774930.342317,CHAT
4,11838189,19,10,3094089,3093543,243766.5,295954.725,599721.781709,625632.703972,7.602514,...,436129.639296,1747431,2400446.0,3240696,623274.4,1394455,1983227.0,3042717,725987.829075,CHAT


Unnamed: 0,duration,total_fiat,total_biat,min_fiat,min_biat,max_fiat,max_biat,mean_fiat,mean_biat,flowPktsPerSecond,...,std_flowiat,min_active,mean_active,max_active,std_active,min_idle,mean_idle,max_idle,std_idle,class1
0,9368711,16,4,1564818,1549373,190205.285714,203290.456522,389822.391917,370323.719754,10.353612,...,267600.198443,1871488,1983656.0,2195089,183219.7,1234883,1420565.0,1523088,161096.539275,CHAT
1,7340238,18,4,1567554,1527893,165686.977273,186914.846154,317267.548742,304370.651301,11.580006,...,221462.862028,1491627,3572433.0,5653239,2942704.0,1131498,1324636.0,1517774,273138.379008,CHAT
2,4644225,29,15,1270547,1079974,165865.178571,195302.130435,329473.126261,300492.588227,11.412022,...,217475.425246,1758922,1758922.0,1758922,0.0,1079974,1079974.0,1079974,0.0,CHAT
3,4978735,19,8,2492050,2457286,239543.25,276596.388889,612435.304238,628339.573544,8.034169,...,436959.716436,1710925,2382905.0,3054885,950323.2,1346073,1894031.5,2441990,774930.342317,CHAT
4,11838189,19,10,3094089,3093543,243766.5,295954.725,599721.781709,625632.703972,7.602514,...,436129.639296,1747431,2400446.0,3240696,623274.4,1394455,1983227.0,3042717,725987.829075,CHAT


In [22]:
d2 = pd.read_csv('TimeBasedFeatures-Dataset-30s-AllinOne.csv')

In [23]:
d2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14651 entries, 0 to 14650
Data columns (total 24 columns):
duration              14651 non-null int64
total_fiat            14651 non-null int64
total_biat            14651 non-null int64
min_fiat              14651 non-null int64
min_biat              14651 non-null int64
max_fiat              14651 non-null float64
max_biat              14651 non-null float64
mean_fiat             14651 non-null float64
mean_biat             14651 non-null float64
flowPktsPerSecond     14651 non-null float64
flowBytesPerSecond    14651 non-null float64
min_flowiat           14651 non-null int64
max_flowiat           14651 non-null int64
mean_flowiat          14651 non-null float64
std_flowiat           14651 non-null float64
min_active            14651 non-null int64
mean_active           14651 non-null float64
max_active            14651 non-null int64
std_active            14651 non-null float64
min_idle              14651 non-null int64
mean_idle  

In [24]:
d2.describe()

Unnamed: 0,duration,total_fiat,total_biat,min_fiat,min_biat,max_fiat,max_biat,mean_fiat,mean_biat,flowPktsPerSecond,...,mean_flowiat,std_flowiat,min_active,mean_active,max_active,std_active,min_idle,mean_idle,max_idle,std_idle
count,14651.0,14651.0,14651.0,14651.0,14651.0,14651.0,14651.0,14651.0,14651.0,14651.0,...,14651.0,14651.0,14651.0,14651.0,14651.0,14651.0,14651.0,14651.0,14651.0,14651.0
mean,16296780.0,1150692.0,1116319.0,5764142.0,5091318.0,1848039.0,1710895.0,1449869.0,1179029.0,3017.38368,...,1032363.0,1922161.0,5176729.0,6049654.0,7150029.0,1081910.0,4871742.0,5655804.0,6650770.0,982847.6
std,19255520.0,4111790.0,4017208.0,13443200.0,12185000.0,4959793.0,4295170.0,5477288.0,3541896.0,22490.275511,...,2855625.0,4853864.0,16130450.0,16304650.0,17065730.0,4046128.0,16025100.0,16193320.0,16910060.0,3956407.0
min,0.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0
25%,274174.0,7.0,1.0,26884.0,24678.5,9557.019,2727.024,22.25135,0.0,1.509018,...,9562.078,6291.811,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0
50%,16232620.0,57.0,21.0,420458.0,451705.0,83635.27,61258.5,18167.45,18840.18,8.729889,...,116547.6,144042.9,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0
75%,29778020.0,1105.0,467.5,8848114.0,8907444.0,915080.5,904470.9,1048425.0,1007539.0,100.541837,...,515142.7,1602566.0,4204065.0,7529336.0,10039730.0,445.4761,3239750.0,6463314.0,9902900.0,404.4638
max,601405000.0,37680790.0,43002380.0,303595700.0,600109700.0,152000000.0,43000000.0,215000000.0,98000000.0,1000000.0,...,60700000.0,136000000.0,601405000.0,601000000.0,601405000.0,168000000.0,600109700.0,600000000.0,600109700.0,168000000.0


Unnamed: 0,duration,total_fiat,total_biat,min_fiat,min_biat,max_fiat,max_biat,mean_fiat,mean_biat,flowPktsPerSecond,...,mean_flowiat,std_flowiat,min_active,mean_active,max_active,std_active,min_idle,mean_idle,max_idle,std_idle
count,14651.0,14651.0,14651.0,14651.0,14651.0,14651.0,14651.0,14651.0,14651.0,14651.0,...,14651.0,14651.0,14651.0,14651.0,14651.0,14651.0,14651.0,14651.0,14651.0,14651.0
mean,16296780.0,1150692.0,1116319.0,5764142.0,5091318.0,1848039.0,1710895.0,1449869.0,1179029.0,3017.38368,...,1032363.0,1922161.0,5176729.0,6049654.0,7150029.0,1081910.0,4871742.0,5655804.0,6650770.0,982847.6
std,19255520.0,4111790.0,4017208.0,13443200.0,12185000.0,4959793.0,4295170.0,5477288.0,3541896.0,22490.275511,...,2855625.0,4853864.0,16130450.0,16304650.0,17065730.0,4046128.0,16025100.0,16193320.0,16910060.0,3956407.0
min,0.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0
25%,274174.0,7.0,1.0,26884.0,24678.5,9557.019,2727.024,22.25135,0.0,1.509018,...,9562.078,6291.811,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0
50%,16232620.0,57.0,21.0,420458.0,451705.0,83635.27,61258.5,18167.45,18840.18,8.729889,...,116547.6,144042.9,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0
75%,29778020.0,1105.0,467.5,8848114.0,8907444.0,915080.5,904470.9,1048425.0,1007539.0,100.541837,...,515142.7,1602566.0,4204065.0,7529336.0,10039730.0,445.4761,3239750.0,6463314.0,9902900.0,404.4638
max,601405000.0,37680790.0,43002380.0,303595700.0,600109700.0,152000000.0,43000000.0,215000000.0,98000000.0,1000000.0,...,60700000.0,136000000.0,601405000.0,601000000.0,601405000.0,168000000.0,600109700.0,600000000.0,600109700.0,168000000.0


In [25]:
d2.head()

Unnamed: 0,duration,total_fiat,total_biat,min_fiat,min_biat,max_fiat,max_biat,mean_fiat,mean_biat,flowPktsPerSecond,...,std_flowiat,min_active,mean_active,max_active,std_active,min_idle,mean_idle,max_idle,std_idle,class1
0,22930417,18,4,10995965,10960941,314115.30137,360993.15873,1306745.0,1389355.0,6.018207,...,952994.511204,1491627,5011278.0,11141326,4507600.0,1079974,3668800.0,10945954,4855368.0,CHAT
1,25199304,16,4,11037969,10851858,359269.9,387414.676923,1373362.0,1392885.0,5.436658,...,976395.38809,1710925,4164396.0,14269600,4973926.0,1234883,3150269.0,10851858,3797336.0,CHAT
2,29772995,17,7,4093396,3956161,261815.256637,310135.364583,644542.2,651271.7,7.086959,...,462297.94793,1363299,2706523.0,6192566,1445372.0,1147658,1955468.0,3956161,893848.3,CHAT
3,29609149,22,6,3954677,3943698,271180.963303,287136.446602,668547.4,659046.0,7.227496,...,472527.270988,1443143,2937555.0,4176130,876966.9,1065834,2022585.0,3895269,930381.3,CHAT
4,7863522,30,8,2067690,2068091,244213.125,252051.483871,571806.2,562812.3,8.266016,...,400581.170736,2092947,2535199.0,3282729,650992.8,1707343,1897912.0,2017289,166786.9,CHAT


Unnamed: 0,duration,total_fiat,total_biat,min_fiat,min_biat,max_fiat,max_biat,mean_fiat,mean_biat,flowPktsPerSecond,...,std_flowiat,min_active,mean_active,max_active,std_active,min_idle,mean_idle,max_idle,std_idle,class1
0,22930417,18,4,10995965,10960941,314115.30137,360993.15873,1306745.0,1389355.0,6.018207,...,952994.511204,1491627,5011278.0,11141326,4507600.0,1079974,3668800.0,10945954,4855368.0,CHAT
1,25199304,16,4,11037969,10851858,359269.9,387414.676923,1373362.0,1392885.0,5.436658,...,976395.38809,1710925,4164396.0,14269600,4973926.0,1234883,3150269.0,10851858,3797336.0,CHAT
2,29772995,17,7,4093396,3956161,261815.256637,310135.364583,644542.2,651271.7,7.086959,...,462297.94793,1363299,2706523.0,6192566,1445372.0,1147658,1955468.0,3956161,893848.3,CHAT
3,29609149,22,6,3954677,3943698,271180.963303,287136.446602,668547.4,659046.0,7.227496,...,472527.270988,1443143,2937555.0,4176130,876966.9,1065834,2022585.0,3895269,930381.3,CHAT
4,7863522,30,8,2067690,2068091,244213.125,252051.483871,571806.2,562812.3,8.266016,...,400581.170736,2092947,2535199.0,3282729,650992.8,1707343,1897912.0,2017289,166786.9,CHAT


In [26]:
d3 = pd.read_csv('TimeBasedFeatures-Dataset-60s-AllinOne.csv')

In [27]:
d3.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15515 entries, 0 to 15514
Data columns (total 24 columns):
duration              15515 non-null int64
min_fiat              15515 non-null int64
min_biat              15515 non-null int64
max_fiat              15515 non-null int64
max_biat              15515 non-null int64
mean_fiat             15515 non-null float64
mean_biat             15515 non-null float64
std_fiat              15515 non-null float64
std_biat              15515 non-null float64
flowPktsPerSecond     15515 non-null float64
flowBytesPerSecond    15515 non-null float64
min_flowiat           15515 non-null int64
max_flowiat           15515 non-null int64
mean_flowiat          15515 non-null float64
std_flowiat           15515 non-null float64
min_active            15515 non-null int64
mean_active           15515 non-null float64
max_active            15515 non-null int64
std_active            15515 non-null float64
min_idle              15515 non-null int64
mean_idle  

In [28]:
d3.describe()

Unnamed: 0,duration,min_fiat,min_biat,max_fiat,max_biat,mean_fiat,mean_biat,std_fiat,std_biat,flowPktsPerSecond,...,mean_flowiat,std_flowiat,min_active,mean_active,max_active,std_active,min_idle,mean_idle,max_idle,std_idle
count,15515.0,15515.0,15515.0,15515.0,15515.0,15515.0,15515.0,15515.0,15515.0,15515.0,...,15515.0,15515.0,15515.0,15515.0,15515.0,15515.0,15515.0,15515.0,15515.0,15515.0
mean,32397930.0,3143556.0,2661148.0,13850630.0,10559090.0,4849028.0,3918406.0,3720701.0,2569899.0,3693.324,...,2090817.0,4447224.0,11704060.0,13152540.0,15150710.0,1821138.0,11356160.0,12688010.0,14498310.0,1687020.0
std,42998950.0,9316081.0,9038970.0,37717730.0,19189930.0,11132880.0,9435423.0,15161730.0,6483652.0,29167.58,...,5027181.0,12386970.0,38645630.0,38691600.0,39320290.0,5889846.0,38536820.0,38593330.0,39202370.0,5767688.0
min,0.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0
25%,239015.5,7.0,-1.0,26656.5,-1.0,6705.191,0.0,0.0,0.0,0.4342195,...,11262.58,5550.071,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0
50%,31589010.0,81.0,25.0,974161.0,475688.0,206885.7,69944.56,27553.69,17494.53,5.090224,...,184229.4,242901.6,1123088.0,1733315.0,1841247.0,0.0,1010755.0,1265097.0,1300435.0,0.0
75%,58943020.0,32586.0,368.0,16825570.0,12401820.0,3772793.0,2437667.0,1534908.0,1277478.0,61.97419,...,1836120.0,5180522.0,10038420.0,11300000.0,17806020.0,8625.629,9976622.0,10000000.0,15444840.0,6161.843
max,646566000.0,59999850.0,59922420.0,646287700.0,600109700.0,152000000.0,59900000.0,264000000.0,98000000.0,2000000.0,...,64700000.0,204000000.0,646566000.0,647000000.0,646566000.0,184000000.0,646287700.0,646000000.0,646287700.0,185000000.0


Unnamed: 0,duration,min_fiat,min_biat,max_fiat,max_biat,mean_fiat,mean_biat,std_fiat,std_biat,flowPktsPerSecond,...,mean_flowiat,std_flowiat,min_active,mean_active,max_active,std_active,min_idle,mean_idle,max_idle,std_idle
count,15515.0,15515.0,15515.0,15515.0,15515.0,15515.0,15515.0,15515.0,15515.0,15515.0,...,15515.0,15515.0,15515.0,15515.0,15515.0,15515.0,15515.0,15515.0,15515.0,15515.0
mean,32397930.0,3143556.0,2661148.0,13850630.0,10559090.0,4849028.0,3918406.0,3720701.0,2569899.0,3693.324,...,2090817.0,4447224.0,11704060.0,13152540.0,15150710.0,1821138.0,11356160.0,12688010.0,14498310.0,1687020.0
std,42998950.0,9316081.0,9038970.0,37717730.0,19189930.0,11132880.0,9435423.0,15161730.0,6483652.0,29167.58,...,5027181.0,12386970.0,38645630.0,38691600.0,39320290.0,5889846.0,38536820.0,38593330.0,39202370.0,5767688.0
min,0.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0
25%,239015.5,7.0,-1.0,26656.5,-1.0,6705.191,0.0,0.0,0.0,0.4342195,...,11262.58,5550.071,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0
50%,31589010.0,81.0,25.0,974161.0,475688.0,206885.7,69944.56,27553.69,17494.53,5.090224,...,184229.4,242901.6,1123088.0,1733315.0,1841247.0,0.0,1010755.0,1265097.0,1300435.0,0.0
75%,58943020.0,32586.0,368.0,16825570.0,12401820.0,3772793.0,2437667.0,1534908.0,1277478.0,61.97419,...,1836120.0,5180522.0,10038420.0,11300000.0,17806020.0,8625.629,9976622.0,10000000.0,15444840.0,6161.843
max,646566000.0,59999850.0,59922420.0,646287700.0,600109700.0,152000000.0,59900000.0,264000000.0,98000000.0,2000000.0,...,64700000.0,204000000.0,646566000.0,647000000.0,646566000.0,184000000.0,646287700.0,646000000.0,646287700.0,185000000.0


In [29]:
d3.head()

Unnamed: 0,duration,min_fiat,min_biat,max_fiat,max_biat,mean_fiat,mean_biat,std_fiat,std_biat,flowPktsPerSecond,...,std_flowiat,min_active,mean_active,max_active,std_active,min_idle,mean_idle,max_idle,std_idle,class1
0,76,-1,-1,-1,-1,0.0,0.0,0.0,0.0,26315.789474,...,0.0,-1,0.0,-1,0.0,-1,0.0,-1,0.0,CHAT
1,54,-1,-1,-1,-1,0.0,0.0,0.0,0.0,37037.037037,...,0.0,-1,0.0,-1,0.0,-1,0.0,-1,0.0,CHAT
2,67,-1,-1,-1,-1,0.0,0.0,0.0,0.0,29850.746269,...,0.0,-1,0.0,-1,0.0,-1,0.0,-1,0.0,CHAT
3,75,-1,-1,-1,-1,0.0,0.0,0.0,0.0,26666.666667,...,0.0,-1,0.0,-1,0.0,-1,0.0,-1,0.0,CHAT
4,14512571,14512571,-1,14512571,-1,14500000.0,0.0,0.0,0.0,0.206717,...,10300000.0,14512571,14500000.0,14512571,0.0,14512502,14500000.0,14512502,0.0,CHAT


Unnamed: 0,duration,min_fiat,min_biat,max_fiat,max_biat,mean_fiat,mean_biat,std_fiat,std_biat,flowPktsPerSecond,...,std_flowiat,min_active,mean_active,max_active,std_active,min_idle,mean_idle,max_idle,std_idle,class1
0,76,-1,-1,-1,-1,0.0,0.0,0.0,0.0,26315.789474,...,0.0,-1,0.0,-1,0.0,-1,0.0,-1,0.0,CHAT
1,54,-1,-1,-1,-1,0.0,0.0,0.0,0.0,37037.037037,...,0.0,-1,0.0,-1,0.0,-1,0.0,-1,0.0,CHAT
2,67,-1,-1,-1,-1,0.0,0.0,0.0,0.0,29850.746269,...,0.0,-1,0.0,-1,0.0,-1,0.0,-1,0.0,CHAT
3,75,-1,-1,-1,-1,0.0,0.0,0.0,0.0,26666.666667,...,0.0,-1,0.0,-1,0.0,-1,0.0,-1,0.0,CHAT
4,14512571,14512571,-1,14512571,-1,14500000.0,0.0,0.0,0.0,0.206717,...,10300000.0,14512571,14500000.0,14512571,0.0,14512502,14500000.0,14512502,0.0,CHAT


In [30]:
d4 = pd.read_csv('TimeBasedFeatures-Dataset-120s-AllinOne.csv')