In [2]:
import pandas as pd
import numpy as np
import ast
from IPython.display import display

In [3]:
df = pd.read_csv('cic-ids2017-32768.txt', sep=';')

In [4]:
max_acc = max(df['Source Accuracy'])
user_thres = [0, 0.01, 0.03, 0.1, 0.3, 1]
src_thres = [max_acc * (1-x) for x in user_thres]
src_thres, user_thres

([0.9784502521779,
  0.9686657496561211,
  0.949096744612563,
  0.88060522696011,
  0.68491517652453,
  0.0],
 [0, 0.01, 0.03, 0.1, 0.3, 1])

In [5]:
df['Number of Features'] = df['Features'].apply(lambda x: len(ast.literal_eval(x)))

In [8]:
import os
filename = os.path.basename(globals()['__session__'])
results_list = []

for src_t, user_t in zip(src_thres, user_thres):
    # print("Source Accuracy Threshold:", src_t)
    # print(User Threshold:", user_t)
    
    # Filter rows based on source accuracy threshold
    filtered_df = df[df['Source Accuracy'] >= src_t]
    
    # Find the lowest threat accuracy
    min_threat_accuracy = filtered_df['Threat Accuracy'].min()
    
    # Filter rows with similar threat accuracy (<1% difference)
    similar_threat_rows = filtered_df[
        (filtered_df['Threat Accuracy'] >= min_threat_accuracy) & 
        (filtered_df['Threat Accuracy'] <= min_threat_accuracy * 1.01)
    ]
    
    # Find the row with the minimum number of features among these rows
    # min_feats = similar_threat_rows['Number of Features'].min()
    # result = similar_threat_rows[similar_threat_rows['Number of Features'] <= min_feats+1]
    
    # Sort and select the row with minimum threat accuracy
    selected_row = similar_threat_rows.sort_values(by='Threat Accuracy').iloc[0]
    
    # Append result to list
    results_list.append([
        user_t,
        selected_row['Source Accuracy'],
        selected_row['Threat Accuracy'],
        selected_row['Number of Features']
    ])

columns = ['Threshold', 'Source Accuracy', 'Threat Accuracy', f"# Features (Max {max(df['Number of Features'])})"]
result_df = pd.DataFrame(results_list, columns=columns)
result_df.to_csv(filename + '.csv', index=False)

In [48]:
df[df['Source Accuracy']>=src_thres[1]]

Unnamed: 0,Features,Source Accuracy,Threat Accuracy,Number of Features
3234,"[3, 4, 7, 9, 14]",0.977533,0.510775,5
3266,"[3, 4, 7, 8, 14]",0.976158,0.509399,5
3298,"[3, 4, 7, 8, 9, 14]",0.976158,0.515818,6
3490,"[3, 4, 6, 7, 9, 14]",0.976158,0.506648,6
3714,"[3, 4, 5, 7, 14]",0.976158,0.511233,5
4002,"[3, 4, 5, 6, 7, 9, 14]",0.976158,0.503439,7
4034,"[3, 4, 5, 6, 7, 8, 14]",0.976158,0.517194,7
6273,"[2, 3, 7, 14]",0.977075,0.513067,4
7297,"[2, 3, 4, 7, 14]",0.977075,0.514901,5
7329,"[2, 3, 4, 7, 9, 14]",0.976616,0.516277,6


In [45]:
for src_t in src_thres:
    print("Source Accuracy Threshold:", src_t)
    min_feats = min(df[df['Source Accuracy']>=src_t]['Number of Features'])
    display(df[(df['Source Accuracy']>=src_t) & (df['Number of Features']==min_feats)].sort_values(by='Threat Accuracy'))

Source Accuracy Threshold: 0.9784502521779


Unnamed: 0,Features,Source Accuracy,Threat Accuracy,Number of Features
14465,"[1, 2, 3, 7, 14]",0.97845,0.509399,5


Source Accuracy Threshold: 0.9759502521779001


Unnamed: 0,Features,Source Accuracy,Threat Accuracy,Number of Features
6273,"[2, 3, 7, 14]",0.977075,0.513067,4


Source Accuracy Threshold: 0.9734502521779


Unnamed: 0,Features,Source Accuracy,Threat Accuracy,Number of Features
4226,"[2, 7, 14]",0.974782,0.476845,3
2178,"[3, 7, 14]",0.973865,0.495186,3


Source Accuracy Threshold: 0.9684502521779


Unnamed: 0,Features,Source Accuracy,Threat Accuracy,Number of Features
385,"[6, 7]",0.970197,0.458505,2
161,"[7, 9]",0.970197,0.471343,2


Source Accuracy Threshold: 0.9634502521779


Unnamed: 0,Features,Source Accuracy,Threat Accuracy,Number of Features
4129,"[2, 9]",0.964695,0.44475,2
4225,"[2, 7]",0.967905,0.452086,2
385,"[6, 7]",0.970197,0.458505,2
641,"[5, 7]",0.966071,0.459422,2
193,"[7, 8]",0.963778,0.462173,2
8224,"[1, 9]",0.963778,0.462173,2
130,"[7, 14]",0.967446,0.471343,2
161,"[7, 9]",0.970197,0.471343,2
545,"[5, 9]",0.965612,0.47226,2
289,"[6, 9]",0.964695,0.472719,2


Source Accuracy Threshold: 0.9584502521779


Unnamed: 0,Features,Source Accuracy,Threat Accuracy,Number of Features
129,[7],0.96011,0.442916,1
