In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
import time
import matplotlib.pyplot as plt
from keras.callbacks import ModelCheckpoint
from keras.models import Model, load_model, Sequential
from keras.layers import Dense, Activation, Dropout, Input, Masking, TimeDistributed, LSTM, Conv1D
from keras.layers import GRU, Bidirectional, BatchNormalization, Reshape
from keras.optimizers import Adam

Tensorflow version: 2.1.3


Using TensorFlow backend.


In [3]:
# Load data
tic = time.time()

# For local machine:
# path = r'C:\Users\Nick Bashour\Documents\Personal\14. Stanford\2. Academics\3. 2021 Spring\1. CS 230\2. Project\3. Code\\'
# For AWS EC2 instance:
path = "clean_data/"

X_test = np.genfromtxt(str(path+'X_test.csv'), delimiter=',')
Y_test = np.genfromtxt(str(path+'Y_test.csv'), delimiter=',')

toc = time.time()
print("time elapsed: " + str(toc-tic) + " sec's")

time elapsed: 1.231572151184082 sec's


In [4]:
# Data dimensions
print("Test set shapes: ")
print("   X: " + str(X_test.shape))
print("   Y: " + str(Y_test.shape))
# print(np.count_nonzero(np.isnan(X_train)))

nx = X_test.shape[1]
ny = Y_test.shape[1]
print("# of X features:",nx)
print("# of Y labels:", ny)

Test set shapes: 
   X: (11720, 77)
   Y: (11720, 15)
# of X features: 77
# of Y labels: 15


In [5]:
# Create labels
labels = np.array(['BENIGN', 'Bot', 'DDoS', 'DoS GoldenEye', 'DoS Hulk', 'DoS Slowhttptest',
 'DoS slowloris', 'FTP-Patator', 'Heartbleed', 'Infiltration', 'PortScan',
 'SSH-Patator', 'Web Attack - Brute Force', 'Web Attack - Sql Injection',
 'Web Attack - XSS'], dtype=object)

print("Unique label count: "+ str(len(labels)))

Unique label count: 15


In [37]:
# Metrics to calculate, by label type
metrics_cols = ["Model_Name", "Runtime"]
for l in labels:
    metrics_cols.append(l+" TP")
    metrics_cols.append(l+" FP")
    metrics_cols.append(l+" TN")
    metrics_cols.append(l+" FN")
    
# Store metrics in a pandas dataframe
metrics = pd.DataFrame(columns = metrics_cols)

In [38]:
# Sort arrays a and b (with same first dimension length) according to a sorted version of the sorter
def sort_by_sorter(sorter, a, b):
    assert len(sorter) == len(a)
    assert len(a) == len(b)    
    sorter = np.argsort(sorter) # returns array representing sorted index positions
    shuffled_a = np.empty(a.shape, dtype=a.dtype)
    shuffled_b = np.empty(b.shape, dtype=b.dtype)
    for old_index, new_index in enumerate(sorter):
        shuffled_a[new_index] = a[old_index]
        shuffled_b[new_index] = b[old_index]
    return shuffled_a, shuffled_b

In [8]:
# Helper functions for analyzing the model

def logits_to_OH(y):
    # Converts a matrix of shape (classes,) or (samples, classes) from logits to one-hot
    # by selecting the highest probability item as the true label
    one_hot = np.zeros(y.shape)
    
    # Shape is (classes,)
    if(len(y.shape) == 1):
        one_hot[np.argmax(y)] = 1
        return one_hot
    
    # Shape is (samples, classes)
    for i in range(y.shape[0]):
        j = np.argmax(y[i])
        one_hot[i, j] = 1
    return one_hot

def confusion_matrix(y_true, y_pred, negative_index):    
    # Given: true and predicted one-hot matrices of shape (samples, classes)
    #        and 'negative' samples defined as those with a 1 at negative_index
    # Returns: number of TP, FP, TN, FN
    # Note: this function is only meant to work for this application, where
    #       'BENIGN' labels are the only 'negatives'. Correctly classifying
    #       network activity as not benign, but incorrectly identifying the type of
    #       attack, is counted as a true positive.
    
    assert y_true.shape == y_pred.shape
    
    TP, FP, TN, FN = 0, 0, 0, 0
    for i in range(y_pred.shape[0]):        
        # Negative predictions - 'BENIGN'
        if (np.argmax(y_pred[i]) == negative_index):
            if np.all((y_pred[i] == y_true[i])):
                TN += 1
            else:
                FN += 1
        # Positive predictions - anything but 'BENIGN'
        else:
            # Correctly classifying an attack as not benign
            # but incorrectly identifying the type of attack
            # is still considered a true positive
            if (np.argmax(y_true[i]) != negative_index):
                TP += 1
            else:
                FP += 1
    
    assert (TP + FP + TN + FN) == y_pred.shape[0]    
    return TP, FP, TN, FN

# Testing
true = np.array([[0, 1, 0, 0], [0, 1, 0, 0], [1, 0, 0, 0], [0, 0, 0, 1]])
pred = np.array([[1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0], [0, 1, 0, 0]])
print(confusion_matrix(true, pred, 0)) # should be 1,0,1,2

(1, 0, 1, 2)


In [9]:
# First, sort the test data according to their labels
sorter = [i for i in range(len(labels))] # array equal to 0, 1, ... 14
sorter = np.dot(Y_test, sorter) # creates a (test_size,1) array to be used for the sort
X_test, Y_test = sort_by_sorter(sorter, X_test, Y_test)

# Capture the # of samples by label
samples_of_label = np.zeros(len(labels))
for i in range(len(labels)):
    samples_of_label[i] = np.sum(sorter == i)
assert np.sum(samples_of_label) == len(X_test)

In [35]:
# Models to analyze
models = ['210603_4_L_25e.h5', '210603_4_L_50e.h5']
          
# Analyze metrics for all models
for i in range(len(models)):    
    # Load pre-trained model
    path = 'saved_models/'
    model = load_model(path+models[i])
    Y_pred = logits_to_OH(model.predict(X_test))
    
    metrics.loc[i, "Model_Name"] = models[i]
    
    tic = time.time()   
    
    # Iterate through every label type
    start_index = 0
    for j in range(len(labels)):        
        end_index = start_index + int(samples_of_label[j])
        
        # Store confusion matrix in dataframe
        TP, FP, TN, FN = confusion_matrix(Y_test[start_index:end_index,:],
                                          Y_pred[start_index:end_index,:], 0)
        metrics.loc[i,str(labels[j]+" TP")] = TP
        metrics.loc[i,str(labels[j]+" FP")] = FP
        metrics.loc[i,str(labels[j]+" TN")] = TN
        metrics.loc[i,str(labels[j]+" FN")] = FN
        
        start_index += int(samples_of_label[j])
        
    toc = time.time()    
    metrics.loc[i, "Runtime"] = toc-tic
    print(models[i], "runtime:", toc-tic,"seconds")
    
# Run check sums
checks = np.zeros((len(models),len(labels)))

for i in range(len(models)):
    for j in range(len(labels)):
        checks[i, j] += metrics.loc[0,str(labels[j]+" TP")]
        checks[i, j] += metrics.loc[0,str(labels[j]+" FP")]
        checks[i, j] += metrics.loc[0,str(labels[j]+" TN")]
        checks[i, j] += metrics.loc[0,str(labels[j]+" FN")]
    assert np.all(checks[i] == samples_of_label)

210603_4_L_25e.h5 runtime: 0.08439493179321289 seconds
210603_4_L_50e.h5 runtime: 0.08257126808166504 seconds


In [36]:
# Export metrics to CSV for further analytics
path = "model_stats/"
name = "210603_1719_stats.csv"
metrics.to_csv(path+name, index = False)