In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import glob
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report, f1_score
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.stats.outliers_influence import variance_inflation_factor  
import warnings
import pickle
import os

warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 200)

In [2]:
cols = [ 'expiration_id', 'protocol', 'src_port', 'dst_port',
       'ip_version',   'bidirectional_duration_ms', 'bidirectional_packets',
       'bidirectional_bytes', 'src2dst_duration_ms', 'src2dst_packets',
       'src2dst_bytes', 'dst2src_duration_ms', 'dst2src_packets', 'dst2src_bytes',
       'bidirectional_min_ps', 'bidirectional_mean_ps',
       'bidirectional_stddev_ps', 'bidirectional_max_ps',
       'src2dst_min_ps', 'src2dst_mean_ps', 'src2dst_stddev_ps',
       'src2dst_max_ps', 'dst2src_min_ps', 'dst2src_mean_ps',
       'dst2src_stddev_ps', 'dst2src_max_ps', 'bidirectional_min_piat_ms',
       'bidirectional_mean_piat_ms', 'bidirectional_stddev_piat_ms',
       'bidirectional_max_piat_ms', 'src2dst_min_piat_ms',
       'src2dst_mean_piat_ms', 'src2dst_stddev_piat_ms',
       'src2dst_max_piat_ms', 'dst2src_min_piat_ms',
       'dst2src_mean_piat_ms', 'dst2src_stddev_piat_ms',
       'dst2src_max_piat_ms', 'bidirectional_syn_packets', 'bidirectional_ack_packets',
       'bidirectional_psh_packets', 'bidirectional_rst_packets',
       'bidirectional_fin_packets', 'src2dst_syn_packets', 'src2dst_ack_packets',
       'src2dst_psh_packets', 'src2dst_rst_packets',
       'src2dst_fin_packets', 'dst2src_syn_packets', 'dst2src_ack_packets',
       'dst2src_psh_packets', 'dst2src_rst_packets',
       'dst2src_fin_packets','application_name',
       'application_category_name', 'application_is_guessed',
       'application_confidence', 'content_type', 'udps.num_pkts_up_to_128_bytes',
       'udps.num_pkts_128_to_256_bytes', 'udps.num_pkts_256_to_512_bytes',
       'udps.num_pkts_512_to_1024_bytes',
       'udps.num_pkts_1024_to_1514_bytes', 'udps.min_ttl', 'udps.max_ttl',
       'udps.min_ip_pkt_len', 'udps.max_ip_pkt_len', 'udps.src2dst_flags',
       'udps.dst2src_flags', 'udps.tcp_flags', 'udps.tcp_win_max_in',
       'udps.tcp_win_max_out', 'udps.icmp_type', 'udps.icmp_v4_type',
       'udps.dns_query_id', 'udps.dns_query_type', 'udps.dns_ttl_answer',
       'udps.ftp_command_ret_code', 'udps.retransmitted_in_packets',
       'udps.retransmitted_out_packets', 'udps.retransmitted_in_bytes',
       'udps.retransmitted_out_bytes', 'udps.src_to_dst_second_bytes',
       'udps.dst_to_src_second_bytes', 'udps.src_to_dst_avg_throughput',
       'udps.dst_to_src_avg_throughput', 'udps.src_to_dst_second_bytes2',
       'udps.dst_to_src_second_bytes2', 'udps.src_to_dst_avg_throughput2',
       'udps.dst_to_src_avg_throughput2', 'udps.tcp_init_ms',
       'udps.tcp_synack_ack_ms', 'udps.tcp_half_closed_time_ms',
       'udps.num_pkts_after_termination',
       'udps.src2dst_first_packet_payload_len',
       'udps.dst2src_first_packet_payload_len',
       'udps.bidirectional_transport_bytes',
       'udps.bidirectional_payload_bytes', 'udps.src2dst_transport_bytes',
       'udps.src2dst_payload_bytes', 'udps.dst2src_transport_bytes',
       'udps.dst2src_payload_bytes',
       'udps.src2dst_most_freq_payload_ratio',
       'udps.src2dst_most_freq_payload_len',
       'udps.dst2src_most_freq_payload_ratio',
       'udps.dst2src_most_freq_payload_len',
       'udps.bidirectional_mean_packet_relative_times',
       'udps.bidirectional_stddev_packet_relative_times',
       'udps.bidirectional_variance_packet_relative_times',
       'udps.bidirectional_coeff_of_var_packet_relative_times',
       'udps.bidirectional_skew_from_median_packet_relative_times',
       'udps.src2dst_mean_packet_relative_times',
       'udps.src2dst_stddev_packet_relative_times',
       'udps.src2dst_variance_packet_relative_times',
       'udps.src2dst_coeff_of_var_packet_relative_times',
       'udps.src2dst_skew_from_median_packet_relative_times',
       'udps.dst2src_mean_packet_relative_times',
       'udps.dst2src_stddev_packet_relative_times',
       'udps.dst2src_variance_packet_relative_times',
       'udps.dst2src_coeff_of_var_packet_relative_times',
       'udps.dst2src_skew_from_median_packet_relative_times',
       'udps.min_req_res_time_diff', 'udps.max_req_res_time_diff',
       'udps.mean_req_res_time_diff', 'udps.stddev_req_res_time_diff',
       'udps.variance_req_res_time_diff',
       'udps.coeff_of_var_req_res_time_diff',
       'udps.skew_from_median_req_res_time_diff',
       'udps.src2dst_small_packet_payload_packets',
       'udps.src2dst_small_packet_payload_ratio',
       'udps.dst2src_small_packet_payload_packets',
       'udps.dst2src_small_packet_payload_ratio',
       'udps.sent_recv_packet_ratio',
       'udps.bidirectional_ps_first_quartile',
       'udps.bidirectional_ps_second_quartile',
       'udps.bidirectional_ps_third_quartile',
       'udps.bidirectional_ps_median_absoulte_deviation',
       'udps.bidirectional_ps_skewness', 'udps.bidirectional_ps_kurtosis',
       'udps.bidirectional_piat_first_quartile',
       'udps.bidirectional_piat_second_quartile',
       'udps.bidirectional_piat_third_quartile',
       'udps.bidirectional_piat_median_absoulte_deviation',
       'udps.bidirectional_piat_skewness',
       'udps.bidirectional_piat_kurtosis',
       'udps.median_req_res_time_diff', 'Attack']

In [2]:
cols = [ 'expiration_id', 'protocol',
       'ip_version',   'bidirectional_duration_ms', 'bidirectional_packets',
       'bidirectional_bytes', 'src2dst_duration_ms', 'src2dst_packets',
       'src2dst_bytes', 'dst2src_duration_ms', 'dst2src_packets', 'dst2src_bytes',
       'bidirectional_min_ps', 'bidirectional_mean_ps',
       'bidirectional_stddev_ps', 'bidirectional_max_ps',
       'src2dst_min_ps', 'src2dst_mean_ps', 'src2dst_stddev_ps',
       'src2dst_max_ps', 'dst2src_min_ps', 'dst2src_mean_ps',
       'dst2src_stddev_ps', 'dst2src_max_ps', 'bidirectional_min_piat_ms',
       'bidirectional_mean_piat_ms', 'bidirectional_stddev_piat_ms',
       'bidirectional_max_piat_ms', 'src2dst_min_piat_ms',
       'src2dst_mean_piat_ms', 'src2dst_stddev_piat_ms',
       'src2dst_max_piat_ms', 'dst2src_min_piat_ms',
       'dst2src_mean_piat_ms', 'dst2src_stddev_piat_ms',
       'dst2src_max_piat_ms', 'bidirectional_syn_packets', 'bidirectional_ack_packets',
       'bidirectional_psh_packets', 'bidirectional_rst_packets',
       'bidirectional_fin_packets', 'src2dst_syn_packets', 'src2dst_ack_packets',
       'src2dst_psh_packets', 'src2dst_rst_packets',
       'src2dst_fin_packets', 'dst2src_syn_packets', 'dst2src_ack_packets',
       'dst2src_psh_packets', 'dst2src_rst_packets',
       'dst2src_fin_packets','application_name',
       'application_category_name', 'application_is_guessed',
       'application_confidence', 'content_type', 'Attack']

In [3]:
def port_feature(port):
    if port < 1024:
        return 1
    elif port < 49152 and port >= 1024:
        return 2
    else:
        return 3        

In [4]:
def encode(df, cols):
    """
    @param df pandas DataFrame
    @param cols a list of columns to encode 
    @return a DataFrame with one-hot encoding
    """
    les = {}
    for each in cols:
        le_col = LabelEncoder()
        df[each] = le_col.fit_transform(df[each])
        les[each] = le_col
       
    return df, les


In [5]:
def save_predictions(predictions, targets, timeout, save_path= "results/ET"):
    name_file_pred = 'predictions_idle_' + str(timeout[0]) + "_active_" + str(timeout[1]) + ".p"
    name_file_y = 'targets_idle_' + str(timeout[0]) + "_active_" + str(timeout[1]) + ".p"

    pickle.dump(predictions, open(os.path.join(save_path, name_file_pred), 'wb') )
    pickle.dump(targets, open(os.path.join(save_path, name_file_y), 'wb') )
    
def load_predictions(timeout, save_path= "results/ET"):
    name_file_pred = 'predictions_idle_' + str(timeout[0]) + "_active_" + str(timeout[1]) + ".p"
    name_file_y = 'targets_idle_' + str(timeout[0]) + "_active_" + str(timeout[1]) + ".p"
    
    predictions =  pickle.load(open(os.path.join(save_path, name_file_pred), 'rb') )
    targets =  pickle.load(open(os.path.join(save_path, name_file_y), 'rb') )
    return predictions, targets

In [6]:
timeouts = [(0.5,2), (1, 2), (2,2), (0.5,3), (1,3), (2, 3), (3,3), (0.5,4), (1, 4), (2,4), (3,4), (4,4), (0.5,5), (1,5), (2,5), (3,5), (4,5), (5,5), (0.5, 30), (1, 30), (2,30), (3,30), (4,30), (5,30), (10, 30), (0.5, 60), (1, 60), (2,60), (3,60), (4,60), (5,60), (10, 60)]

In [7]:
file = open('../results/ET/unsw-nb15_results_ET.txt', "w")

In [8]:
def split_data(data):
    train_idx, test_idx = next(StratifiedKFold(n_splits=3).split(data, data['Attack']))
    train, test = data.iloc[train_idx].reset_index(drop=True), data.iloc[test_idx].reset_index(drop=True)
    return train, test

# Training

In [10]:
best_f1 = 0
best_report = None
best_timeout = None
best_pred = None
best_y = None
for timeout in timeouts:
    print("Processing timeout : ", timeout)
    idle, active = timeout
    out_dir = f'/home/meryem.janati/lustre/nlp_team-um6p-st-sccs-id7fz1zvotk/IDS/janati/IDS/project2/extractions/extractions/new_idle_{idle}min_active_{active}min/UNSW-NB15'
    df = pd.read_csv(out_dir+"/UNSW-NB15.csv")
    file.write("\n==========================================================\n")
    file.write(f" active: {active} min   idle: {idle} min \n")
    file.write(str(df.Attack.value_counts()))
    file.write("\n*******************************************\n")
    file.write(str(df.Attack.value_counts() / df.shape[0] * 100))
    file.write("\n*******************************************\n")
    file.write(str(df['bidirectional_duration_ms'].describe()))
    file.write("\n*******************************************\n")
    df = df[~df.Attack.str.contains('direction_flip')]
    df = df.sort_values(by=['bidirectional_last_seen_ms']).reset_index(drop=True)
    df_new = df[cols]
    df_new['application_name'] = df_new['application_name'].apply(lambda x: x.split(".")[0])
    df_new['content_type'] = df_new['content_type'].fillna("unkown/unkown")
    df_new['content_sub_type'] = df_new['content_type'].apply(lambda x: x.split("/")[1])
    df_new['content_type'] = df_new['content_type'].apply(lambda x: x.split("/")[0])
    #df_new['src_port'] = df_new['src_port'].apply(lambda x: port_feature(x))
    #df_new['dst_port'] = df_new['dst_port'].apply(lambda x: port_feature(x))
    df_new = df_new.fillna(0)
    categ_cols = ["application_name", "application_category_name", "content_sub_type", "content_type" ]
    df_new, lbl_encoders = encode(df_new,categ_cols)    
    train, test = split_data(df_new)
    y_train = train['Attack']
    X_train = train.drop('Attack', axis=1)
    y_test = test['Attack']
    X_test = test.drop('Attack', axis=1)
    le = LabelEncoder()
    y_train = le.fit_transform(y_train)
    y_test = le.transform(y_test)
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    clf = ExtraTreesClassifier(n_estimators=100, random_state=42)#, class_weight='balanced'
    clf.fit(X_train, y_train)
    pred = clf.predict(X_test)
    print("------------------ classes : ", le.classes_,)
    exit(0)
    report = classification_report(y_test, pred,  target_names=le.classes_, digits=4)
    file.write(report)
    #save_predictions(y_test, pred, timeout, save_path='results/ET/')
    f1 = f1_score(y_true=y_test, y_pred=pred, average='macro')
    if f1 > best_f1: 
        best_timeout = timeout
        best_f1 = f1
        best_report= report
        best_pred=pred
        best_y=y_test
    file.write("==========================================================\n\n")
    file.flush()
print("--------------- DONE ---------------------")
#save_predictions(best_pred, best_y, save_path='results/ET/')

Processing timeout :  (0.5, 2)
------------------ classes :  ['Analysis' 'Backdoors' 'Benign' 'DoS' 'Exploits' 'Fuzzers' 'Generic'
 'Reconnaissance' 'Shellcode' 'Worms']
Processing timeout :  (1, 2)
------------------ classes :  ['Analysis' 'Backdoors' 'Benign' 'DoS' 'Exploits' 'Fuzzers' 'Generic'
 'Reconnaissance' 'Shellcode' 'Worms']
Processing timeout :  (2, 2)
------------------ classes :  ['Analysis' 'Backdoors' 'Benign' 'DoS' 'Exploits' 'Fuzzers' 'Generic'
 'Reconnaissance' 'Shellcode' 'Worms']
Processing timeout :  (0.5, 3)
------------------ classes :  ['Analysis' 'Backdoors' 'Benign' 'DoS' 'Exploits' 'Fuzzers' 'Generic'
 'Reconnaissance' 'Shellcode' 'Worms']
Processing timeout :  (1, 3)
------------------ classes :  ['Analysis' 'Backdoors' 'Benign' 'DoS' 'Exploits' 'Fuzzers' 'Generic'
 'Reconnaissance' 'Shellcode' 'Worms']
Processing timeout :  (2, 3)
------------------ classes :  ['Analysis' 'Backdoors' 'Benign' 'DoS' 'Exploits' 'Fuzzers' 'Generic'
 'Reconnaissance' 'Shellcod

KeyboardInterrupt: 

In [27]:
best_f1 = 0
best_report = None
best_timeout = None
best_pred = None
best_y = None
for timeout in timeouts:
    print("Processing timeout : ", timeout)
    idle, active = timeout
    out_dir = f'/home/meryem.janati/lustre/nlp_team-um6p-st-sccs-id7fz1zvotk/IDS/janati/IDS/project2/extractions/extractions/new_idle_{idle}min_active_{active}min/UNSW-NB15'
    df = pd.read_csv(out_dir+"/UNSW-NB15.csv")
    file.write("\n==========================================================\n")
    file.write(f" active: {active} min   idle: {idle} min \n")
    file.write(str(df.Attack.value_counts()))
    file.write("\n*******************************************\n")
    file.write(str(df.Attack.value_counts() / df.shape[0] * 100))
    file.write("\n*******************************************\n")
    file.write(str(df['bidirectional_duration_ms'].describe()))
    file.write("\n*******************************************\n")
    df = df[~df.Attack.str.contains('direction_flip')]
    df = df.sort_values(by=['bidirectional_last_seen_ms']).reset_index(drop=True)
    df_new = df[cols]
    df_new['application_name'] = df_new['application_name'].apply(lambda x: x.split(".")[0])
    df_new['content_type'] = df_new['content_type'].fillna("unkown/unkown")
    df_new['content_sub_type'] = df_new['content_type'].apply(lambda x: x.split("/")[1])
    df_new['content_type'] = df_new['content_type'].apply(lambda x: x.split("/")[0])
    #df_new['src_port'] = df_new['src_port'].apply(lambda x: port_feature(x))
    #df_new['dst_port'] = df_new['dst_port'].apply(lambda x: port_feature(x))
    df_new = df_new.fillna(0)
    categ_cols = ["application_name", "application_category_name", "content_sub_type", "content_type" ]
    df_new, lbl_encoders = encode(df_new,categ_cols)    
    train, test = split_data(df_new)
    y_train = train['Attack']
    X_train = train.drop('Attack', axis=1)
    y_test = test['Attack']
    X_test = test.drop('Attack', axis=1)
    le = LabelEncoder()
    y_train = le.fit_transform(y_train)
    y_test = le.transform(y_test)
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    clf = ExtraTreesClassifier(n_estimators=100, random_state=42)#, class_weight='balanced'
    clf.fit(X_train, y_train)
    pred = clf.predict(X_test)
    report = classification_report(y_test, pred,  target_names=le.classes_, digits=4)
    file.write(report)
    save_predictions(y_test, pred, timeout, save_path='results/ET/')
    f1 = f1_score(y_true=y_test, y_pred=pred, average='macro')
    if f1 > best_f1: 
        best_timeout = timeout
        best_f1 = f1
        best_report= report
        best_pred=pred
        best_y=y_test
    file.write("==========================================================\n\n")
    file.flush()
print("--------------- DONE ---------------------")
#save_predictions(best_pred, best_y, save_path='results/ET/')

Processing timeout :  (0.5, 2)
Processing timeout :  (1, 2)
Processing timeout :  (2, 2)
Processing timeout :  (0.5, 3)
Processing timeout :  (1, 3)
Processing timeout :  (2, 3)
Processing timeout :  (3, 3)
Processing timeout :  (0.5, 4)
Processing timeout :  (1, 4)
Processing timeout :  (2, 4)
Processing timeout :  (3, 4)
Processing timeout :  (4, 4)
Processing timeout :  (0.5, 5)
Processing timeout :  (1, 5)
Processing timeout :  (2, 5)
Processing timeout :  (3, 5)
Processing timeout :  (4, 5)
Processing timeout :  (5, 5)
Processing timeout :  (0.5, 30)
Processing timeout :  (1, 30)
Processing timeout :  (2, 30)
Processing timeout :  (3, 30)
Processing timeout :  (4, 30)
Processing timeout :  (5, 30)
Processing timeout :  (10, 30)
Processing timeout :  (0.5, 60)
Processing timeout :  (1, 60)
Processing timeout :  (2, 60)
Processing timeout :  (3, 60)
Processing timeout :  (4, 60)
Processing timeout :  (5, 60)
Processing timeout :  (10, 60)
--------------- DONE ---------------------


TypeError: save_predictions() missing 1 required positional argument: 'timeout'

In [28]:
file.write("=========================BEST PERFORMANCE===================\n")
file.write(f'best timeouts {best_timeout}\n')
file.write(f'best F1-macro {best_f1}\n')
file.write(best_report)
file.close()

In [29]:
print("best timeouts: ", best_timeout)
print("best F1-macro: ", best_f1)
print("best report: ", best_report)

best timeouts:  (3, 4)
best F1-macro:  0.7394503950533898
best report:                  precision    recall  f1-score   support

      Analysis     0.4000    0.0196    0.0374       102
     Backdoors     0.8700    0.7500    0.8056       116
        Benign     0.9937    0.9990    0.9963    662142
           DoS     0.7796    0.5476    0.6433      1156
      Exploits     0.8586    0.8754    0.8669      7531
       Fuzzers     0.8733    0.4322    0.5782      6138
       Generic     0.9346    0.8708    0.9016      1215
Reconnaissance     0.9183    0.9030    0.9106      3835
     Shellcode     0.8343    0.8591    0.8465       504
         Worms     0.8696    0.7547    0.8081        53

      accuracy                         0.9907    682792
     macro avg     0.8332    0.7011    0.7395    682792
  weighted avg     0.9900    0.9907    0.9896    682792



In [30]:
y_pred = best_pred
y_test = best_y

In [31]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, auc, roc_curve


print('\nAccuracy: {:.4f}\n'.format(accuracy_score(y_test, y_pred)))

print('Micro Precision: {:.4f}'.format(precision_score(y_test, y_pred, average='micro')))
print('Micro Recall: {:.4f}'.format(recall_score(y_test, y_pred, average='micro')))
print('Micro F1-score: {:.4f}\n'.format(f1_score(y_test, y_pred, average='micro')))

print('Macro Precision: {:.4f}'.format(precision_score(y_test, y_pred, average='macro')))
print('Macro Recall: {:.4f}'.format(recall_score(y_test, y_pred, average='macro')))
print('Macro F1-score: {:.4f}\n'.format(f1_score(y_test, y_pred, average='macro')))

print('Weighted Precision: {:.4f}'.format(precision_score(y_test, y_pred, average='weighted')))
print('Weighted Recall: {:.4f}'.format(recall_score(y_test, y_pred, average='weighted')))
print('Weighted F1-score: {:.4f}'.format(f1_score(y_test, y_pred, average='weighted')))

#fpr, tpr, thresholds = roc_curve(y_test, y_pred, pos_label=1)
#print('AUC: {:.4f}'.format(auc(fpr, tpr)))



Accuracy: 0.9907

Micro Precision: 0.9907
Micro Recall: 0.9907
Micro F1-score: 0.9907

Macro Precision: 0.8332
Macro Recall: 0.7011
Macro F1-score: 0.7395

Weighted Precision: 0.9900
Weighted Recall: 0.9907
Weighted F1-score: 0.9896


# Load predictions - Best and Worst case

In [9]:
best_f1 = 0
best_report = None
best_timeout = None
best_pred = None
best_y = None

worst_f1 = 1
worst_report = None
worst_timeout = None
worst_pred = None
worst_y = None


classes =  ['Analysis', 'Backdoors', 'Benign', 'DoS', 'Exploits', 'Fuzzers', 'Generic', 'Reconnaissance', 'Shellcode', 'Worms']
for timeout in timeouts:
    print("Loading timeout : ", timeout)
    idle, active = timeout
    pred, y_test = load_predictions(timeout, save_path= "../results/ET")
    report = classification_report(y_test, pred,  target_names=classes, digits=4)
    f1 = f1_score(y_true=y_test, y_pred=pred, average='macro')
    if f1 > best_f1: 
        best_timeout = timeout
        best_f1 = f1
        best_report= report
        best_pred=pred
        best_y=y_test
        
    if f1 <= worst_f1: 
        worst_timeout = timeout
        worst_f1 = f1
        worst_report= report
        worst_pred=pred
        worst_y=y_test
    file.write("==========================================================\n\n")
    file.flush()
print("------------------- DONE -------------------")

Loading timeout :  (0.5, 2)
Loading timeout :  (1, 2)
Loading timeout :  (2, 2)
Loading timeout :  (0.5, 3)
Loading timeout :  (1, 3)
Loading timeout :  (2, 3)
Loading timeout :  (3, 3)
Loading timeout :  (0.5, 4)
Loading timeout :  (1, 4)
Loading timeout :  (2, 4)
Loading timeout :  (3, 4)
Loading timeout :  (4, 4)
Loading timeout :  (0.5, 5)
Loading timeout :  (1, 5)
Loading timeout :  (2, 5)
Loading timeout :  (3, 5)
Loading timeout :  (4, 5)
Loading timeout :  (5, 5)
Loading timeout :  (0.5, 30)
Loading timeout :  (1, 30)
Loading timeout :  (2, 30)
Loading timeout :  (3, 30)
Loading timeout :  (4, 30)
Loading timeout :  (5, 30)
Loading timeout :  (10, 30)
Loading timeout :  (0.5, 60)
Loading timeout :  (1, 60)
Loading timeout :  (2, 60)
Loading timeout :  (3, 60)
Loading timeout :  (4, 60)
Loading timeout :  (5, 60)
Loading timeout :  (10, 60)
------------------- DONE -------------------


In [10]:
file.write("=========================BEST PERFORMANCE===================\n")
file.write(f'best timeouts {best_timeout}\n')
file.write(f'best F1-macro {best_f1}\n')
file.write(f'best resport {best_report}\n')

file.write("=========================WORST PERFORMANCE===================\n")
file.write(f'worst timeouts {worst_timeout}\n')
file.write(f'worst F1-macro {worst_f1}\n')
file.write(f'xorst resport {worst_report}\n')

file.close()

In [11]:
print("best timeouts: ", best_timeout)
print("best F1-macro: ", best_f1)
print("best report: ", best_report)

print("worst timeouts: ", worst_timeout)
print("worst F1-macro: ", worst_f1)
print("worst report: ", worst_report)

best timeouts:  (3, 4)
best F1-macro:  0.7394503950533898
best report:                  precision    recall  f1-score   support

      Analysis     0.0196    0.4000    0.0374         5
     Backdoors     0.7500    0.8700    0.8056       100
        Benign     0.9990    0.9937    0.9963    665690
           DoS     0.5476    0.7796    0.6433       812
      Exploits     0.8754    0.8586    0.8669      7679
       Fuzzers     0.4322    0.8733    0.5782      3038
       Generic     0.8708    0.9346    0.9016      1132
Reconnaissance     0.9030    0.9183    0.9106      3771
     Shellcode     0.8591    0.8343    0.8465       519
         Worms     0.7547    0.8696    0.8081        46

      accuracy                         0.9907    682792
     macro avg     0.7011    0.8332    0.7395    682792
  weighted avg     0.9936    0.9907    0.9918    682792

worst timeouts:  (0.5, 60)
worst F1-macro:  0.7297928004281716
worst report:                  precision    recall  f1-score   support

      

In [12]:
y_pred =  best_y    # Switched because they were switched when saved
y_test =  best_pred

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, auc, roc_curve


best_accuracy = accuracy_score(y_test, y_pred)
best_precision = precision_score(y_test, y_pred, average='macro')
best_recall = recall_score(y_test, y_pred, average='macro')

print('\nAccuracy: {:.4f}\n'.format(best_accuracy))

print('Micro Precision: {:.4f}'.format(precision_score(y_test, y_pred, average='micro')))
print('Micro Recall: {:.4f}'.format(recall_score(y_test, y_pred, average='micro')))
print('Micro F1-score: {:.4f}\n'.format(f1_score(y_test, y_pred, average='micro')))

print('Macro Precision: {:.4f}'.format(best_precision))
print('Macro Recall: {:.4f}'.format(best_recall))
print('Macro F1-score: {:.4f}\n'.format(best_f1))

print('Weighted Precision: {:.4f}'.format(precision_score(y_test, y_pred, average='weighted')))
print('Weighted Recall: {:.4f}'.format(recall_score(y_test, y_pred, average='weighted')))
print('Weighted F1-score: {:.4f}'.format(f1_score(y_test, y_pred, average='weighted')))


Accuracy: 0.9907

Micro Precision: 0.9907
Micro Recall: 0.9907
Micro F1-score: 0.9907

Macro Precision: 0.8332
Macro Recall: 0.7011
Macro F1-score: 0.7395

Weighted Precision: 0.9900
Weighted Recall: 0.9907
Weighted F1-score: 0.9896


In [13]:
y_pred = worst_y
y_test =  worst_pred

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, auc, roc_curve


worst_accuracy = accuracy_score(y_test, y_pred)
worst_precision = precision_score(y_test, y_pred, average='macro')
worst_recall = recall_score(y_test, y_pred, average='macro')

print('\nAccuracy: {:.4f}\n'.format(worst_accuracy))

print('Micro Precision: {:.4f}'.format(precision_score(y_test, y_pred, average='micro')))
print('Micro Recall: {:.4f}'.format(recall_score(y_test, y_pred, average='micro')))
print('Micro F1-score: {:.4f}\n'.format(f1_score(y_test, y_pred, average='micro')))

print('Macro Precision: {:.4f}'.format(worst_precision))
print('Macro Recall: {:.4f}'.format(worst_recall))
print('Macro F1-score: {:.4f}\n'.format(worst_f1))

print('Weighted Precision: {:.4f}'.format(precision_score(y_test, y_pred, average='weighted')))
print('Weighted Recall: {:.4f}'.format(recall_score(y_test, y_pred, average='weighted')))
print('Weighted F1-score: {:.4f}'.format(f1_score(y_test, y_pred, average='weighted')))


Accuracy: 0.9905

Micro Precision: 0.9905
Micro Recall: 0.9905
Micro F1-score: 0.9905

Macro Precision: 0.8107
Macro Recall: 0.6929
Macro F1-score: 0.7298

Weighted Precision: 0.9897
Weighted Recall: 0.9905
Weighted F1-score: 0.9893


In [15]:
file_best_worst = open('../results/ET/unsw-nb15_best&worst.txt', "w")

file_best_worst.write(f'best timeouts:  {best_timeout}\n')
file_best_worst.write(f'best F1-macro:  {best_f1}\n')
file_best_worst.write(f'best precision:  {best_precision}\n')
file_best_worst.write(f'best recall:  {best_recall}\n')
file_best_worst.write(f'best accuracy:  {best_accuracy}\n')
file_best_worst.write(f'best report:  {best_report}\n')

file_best_worst.write(f'worst timeouts:  {worst_timeout}\n')
file_best_worst.write(f'worst F1-macro:  {worst_f1}\n')
file_best_worst.write(f'worstt precision:  {worst_precision}\n')
file_best_worst.write(f'worst recall:  {worst_recall}\n')
file_best_worst.write(f'worst accuracy:  {worst_accuracy}\n')
file_best_worst.write(f'worst report:  {worst_report}\n')
file_best_worst.flush()
file_best_worst.close

<function TextIOWrapper.close()>