In [1]:
import pandas as pd
import numpy as np
import warnings
from scipy import stats
from IPython.display import display, HTML
from sklearn import metrics as me

warnings.filterwarnings('ignore')
pd.set_option("display.max_rows",10)
pd.set_option('precision', 2)


%matplotlib inline

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import itertools

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    np.set_printoptions(precision=4)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        #print("Normalized confusion matrix")
    else:
        #print('Confusion matrix, without normalization')
        pass
    
    #print(cm)

    label = [["\n True Negative", "\n False Positive \n Type II Error"],
             ["\n False Negative \n Type I Error", "\n True Positive"]
            ]
    
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        
        plt.text(j, i, "{} {}".format(cm[i, j].round(4), label[i][j]),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

def plot(actual_value, pred_value):
    from sklearn.metrics import confusion_matrix

    cm_2labels = confusion_matrix(y_pred = pred_value, y_true = actual_value)
    plt.figure(figsize=[6,6])
    plot_confusion_matrix(cm_2labels, ['Normal', 'Attack'], normalize = False)


In [3]:
all_scenarios = pd.DataFrame(columns=['Scenarios', 'Number of Features', 'Accuracy', 'F1 Score', 'Precision', 'Recall'])

def evaluate(model, past_scores, past_scores_20, predictions, predictions_, predictions_20, predictions_20_):
    global all_scenarios
    
    def get_best_result(past_scores):
        psg = past_scores.sort_values(by='f1_score', ascending=False).groupby(by=['no_of_features', 'hidden_layers'])
        df = psg.first().sort_values(by='f1_score', ascending=False).head(5)
        #epoch_nof_hidden
        epoch = int(df.iloc[0]['epoch'])
        nof = df.iloc[0].name[0]
        hidden = df.iloc[0].name[1]
        
        return "{}_{}_{}".format(epoch, nof, hidden), nof, df

    def view_first(name, past_scores):
        _, _, df = get_best_result(past_scores)
        display(name)
        display(df)
        
    display("Individual Results for each Scenario")    
    view_first("Results for {} Train+".format(model),past_scores)
    view_first("Results for {} Train-".format(model),past_scores_20)
        
    def get_score(y_true, y_pred):
        f1 = me.f1_score(y_true, y_pred)
        pre = me.precision_score(y_true, y_pred)
        rec = me.recall_score(y_true, y_pred)
        acc = me.accuracy_score(y_true, y_pred)
        return {"F1 Score":f1, "Precision":pre, "Recall":rec, "Accuracy":acc}
    
    display("Combined Results from all Scenarios for {}".format(model))

    
    
    def accumulate_scenarios(predictions, past_scores):
        key, nof, df = get_best_result(past_scores)
        y_true = predictions[key]["Actual"]
        y_pred = predictions[key]["Prediction"]
        scores = get_score(y_true, y_pred)
        scores.update({"Scenarios":scenario,"Number of Features":nof})
        
        return pd.DataFrame(scores, index=[1])
    
    scenario = "Train+_Test+"
    all_scenarios = all_scenarios.append(accumulate_scenarios(predictions, past_scores))
    
    scenario = "Train+_Test-"
    all_scenarios = all_scenarios.append(accumulate_scenarios(predictions_, past_scores))
    
    scenario = "Train-_Test+"
    all_scenarios = all_scenarios.append(accumulate_scenarios(predictions_20, past_scores_20))
    
    scenario = "Train-_Test-"
    all_scenarios = all_scenarios.append(accumulate_scenarios(predictions_20_, past_scores_20))
    
    
    display(all_scenarios.set_index(['Scenarios','Number of Features']) * 100)
    
    
    
    
    
    

In [4]:
past_scores = pd.read_pickle("dataset/scores/tf_dense_only_nsl_kdd_scores_all.pkl")
past_scores_20 = pd.read_pickle("dataset/scores/tf_dense_only_nsl_kdd_scores_all-.pkl")
predictions = pd.read_pickle("dataset/tf_dense_only_nsl_kdd_predictions.pkl")
predictions_ = pd.read_pickle("dataset/tf_dense_only_nsl_kdd_predictions__.pkl")
predictions_20 = pd.read_pickle("dataset/tf_dense_only_nsl_kdd_predictions-.pkl")
predictions_20_ = pd.read_pickle("dataset/tf_dense_only_nsl_kdd_predictions-__.pkl")


In [5]:
evaluate("Fully Connected Network", past_scores, past_scores_20, predictions, predictions_, predictions_20, predictions_20_)

'Individual Results for each Scenario'

'Results for Fully Connected Network Train+'

Unnamed: 0_level_0,Unnamed: 1_level_0,epoch,train_score,test_score,f1_score,test_score_20,f1_score_20,time_taken
no_of_features,hidden_layers,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
12,3,10,0.86,0.89,0.91,0.83,0.9,13.06
48,3,3,0.83,0.89,0.91,0.83,0.9,3.29
122,3,2,0.86,0.89,0.9,0.82,0.89,2.02
24,3,6,0.86,0.87,0.89,0.81,0.88,7.62
122,1,12,0.92,0.87,0.87,0.76,0.83,11.08


'Results for Fully Connected Network Train-'

Unnamed: 0_level_0,Unnamed: 1_level_0,epoch,train_score,test_score,f1_score,test_score_20,f1_score_20,time_taken
no_of_features,hidden_layers,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
122,3,10,0.9,0.9,0.92,0.84,0.91,8.97
12,1,6,0.89,0.87,0.89,0.78,0.86,2.84
48,3,12,0.89,0.85,0.87,0.74,0.83,8.74
24,1,4,0.84,0.83,0.85,0.74,0.83,1.67
24,3,12,0.87,0.84,0.85,0.72,0.8,8.58


'Combined Results from all Scenarios for Fully Connected Network'

Unnamed: 0_level_0,Unnamed: 1_level_0,Accuracy,F1 Score,Precision,Recall
Scenarios,Number of Features,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Train+_Test+,12,89.46,91.09,87.77,94.66
Train+_Test-,12,83.09,90.0,87.23,92.94
Train-_Test+,122,90.42,91.89,88.66,95.36
Train-_Test-,122,84.27,90.71,87.77,93.86


In [6]:
psg = past_scores_20.sort_values(by='f1_score', ascending=False).groupby(by=['no_of_features', 'hidden_layers'])
df = psg.first().sort_values(by='f1_score', ascending=False)

In [7]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,epoch,train_score,test_score,f1_score,test_score_20,f1_score_20,time_taken
no_of_features,hidden_layers,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
122,3,10,0.9,0.9,0.92,0.84,0.91,8.97
12,1,6,0.89,0.87,0.89,0.78,0.86,2.84
48,3,12,0.89,0.85,0.87,0.74,0.83,8.74
24,1,4,0.84,0.83,0.85,0.74,0.83,1.67
24,3,12,0.87,0.84,0.85,0.72,0.8,8.58
48,1,12,0.9,0.8,0.8,0.64,0.73,5.75
12,3,5,0.78,0.79,0.78,0.62,0.72,3.36
1,1,12,0.87,0.77,0.76,0.58,0.66,6.34
122,1,12,0.9,0.75,0.72,0.53,0.61,5.94
1,3,2,0.54,0.43,0.0,0.18,0.0,0.9


In [8]:
predictions_20

<class 'pandas.core.panel.Panel'>
Dimensions: 62 (items) x 22544 (major_axis) x 4 (minor_axis)
Items axis: 10_122_1 to 8_48_3
Major_axis axis: 0 to 22543
Minor_axis axis: Actual to Prediction

In [9]:
hidden = df['hidden_layers']
epoch = df['epoch']
nof = df['no_of_features']


KeyError: 'hidden_layers'