In [1]:
import pandas as pd
import numpy as np
import ast
from IPython.display import display

In [2]:
df = pd.read_csv('hypermapper-iot_nprint_3pkts-aligned-nprint.txt', sep=';')

In [3]:
max_acc = max(df['Source Accuracy'])
user_thres = [0, 0.01, 0.03, 0.1, 0.3, 1]
src_thres = [max_acc * (1-x) for x in user_thres]
src_thres, user_thres

([0.9331941544885176,
  0.9238622129436325,
  0.9051983298538621,
  0.8398747390396659,
  0.6532359081419623,
  0.0],
 [0, 0.01, 0.03, 0.1, 0.3, 1])

In [4]:
df['Number of Features'] = df['Features'].apply(lambda x: len(ast.literal_eval(x)))

In [5]:
df[df['Source Accuracy']>=src_thres[1]]

Unnamed: 0,Features,Source Accuracy,Threat Accuracy,Number of Features
5392,"[1, 3, 5, 10, 11, 12, 13]",0.924843,0.584551,7
5400,"[1, 3, 5, 9, 11, 12, 13]",0.924843,0.588727,7
5404,"[1, 3, 5, 9, 10, 12, 13]",0.926931,0.590814,7
5436,"[1, 3, 5, 8, 9, 10, 12, 13]",0.926931,0.576200,8
5452,"[1, 3, 5, 7, 10, 12, 13]",0.929019,0.599165,7
...,...,...,...,...
16363,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 13]",0.924843,0.586639,12
16371,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 13]",0.926931,0.584551,12
16375,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13]",0.929019,0.597077,13
16379,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13]",0.926931,0.601253,13


In [6]:
import os
filename = os.path.basename(globals()['__session__'])
results_list = []

for src_t, user_t in zip(src_thres, user_thres):
    # print("Source Accuracy Threshold:", src_t)
    # print(User Threshold:", user_t)
    
    # Filter rows based on source accuracy threshold
    filtered_df = df[df['Source Accuracy'] >= src_t]
    
    # Find the lowest threat accuracy
    min_threat_accuracy = filtered_df['Threat Accuracy'].min()
    
    # Filter rows with similar threat accuracy (<1% difference)
    similar_threat_rows = filtered_df[
        (filtered_df['Threat Accuracy'] >= min_threat_accuracy) & 
        (filtered_df['Threat Accuracy'] <= min_threat_accuracy * 1.01)
    ]
    
    # Find the row with the minimum number of features among these rows
    # min_feats = similar_threat_rows['Number of Features'].min()
    # result = similar_threat_rows[similar_threat_rows['Number of Features'] <= min_feats+1]
    
    # Sort and select the row with minimum threat accuracy
    selected_row = similar_threat_rows.sort_values(by='Threat Accuracy').iloc[0]
    
    # Append result to list
    results_list.append([
        user_t,
        selected_row['Source Accuracy'],
        selected_row['Threat Accuracy'],
        selected_row['Number of Features']
    ])

columns = ['Threshold', 'Source Accuracy', 'Threat Accuracy', f"# Features (Max {max(df['Number of Features'])})"]
result_df = pd.DataFrame(results_list, columns=columns)
result_df.to_csv(filename + '.csv', index=False)

In [7]:
for src_t in src_thres:
    print("Source Accuracy Threshold:", src_t)
    min_feats = min(df[df['Source Accuracy']>=src_t]['Number of Features'])
    display(df[(df['Source Accuracy']>=src_t) & (df['Number of Features']==min_feats)].sort_values(by='Threat Accuracy'))

Source Accuracy Threshold: 0.9331941544885176


Unnamed: 0,Features,Source Accuracy,Threat Accuracy,Number of Features
12768,"[0, 1, 5, 6, 7, 9, 10, 11, 12, 13]",0.933194,0.597077,10


Source Accuracy Threshold: 0.9238622129436325


Unnamed: 0,Features,Source Accuracy,Threat Accuracy,Number of Features
5892,"[1, 3, 4, 5, 12, 13]",0.926931,0.592902,6
13571,"[0, 1, 3, 5, 12, 13]",0.926931,0.599165,6


Source Accuracy Threshold: 0.9051983298538621


Unnamed: 0,Features,Source Accuracy,Threat Accuracy,Number of Features
8468,"[0, 5, 9, 12, 13]",0.908142,0.557411,5
1796,"[3, 4, 5, 12, 13]",0.908142,0.572025,5
3332,"[2, 3, 5, 12, 13]",0.906054,0.578288,5
8964,"[0, 4, 5, 12, 13]",0.906054,0.578288,5
10500,"[0, 2, 5, 12, 13]",0.906054,0.578288,5
12548,"[0, 1, 5, 12, 13]",0.912317,0.580376,5
12308,"[0, 1, 9, 12, 13]",0.906054,0.588727,5
5380,"[1, 3, 5, 12, 13]",0.916493,0.590814,5


Source Accuracy Threshold: 0.8398747390396659


Unnamed: 0,Features,Source Accuracy,Threat Accuracy,Number of Features
8,"[11, 12, 13]",0.858038,0.532359,3
68,"[7, 12, 13]",0.858038,0.532359,3
12,"[10, 12, 13]",0.849687,0.534447,3
20,"[9, 12, 13]",0.858038,0.536534,3
36,"[8, 12, 13]",0.85595,0.538622,3
132,"[6, 12, 13]",0.85595,0.538622,3
8196,"[0, 12, 13]",0.847599,0.549061,3
260,"[5, 12, 13]",0.858038,0.551148,3
1028,"[3, 12, 13]",0.841336,0.557411,3


Source Accuracy Threshold: 0.6532359081419623


Unnamed: 0,Features,Source Accuracy,Threat Accuracy,Number of Features
7,"[11, 12]",0.668058,0.365344,2
11,"[10, 12]",0.668058,0.365344,2
67,"[7, 12]",0.668058,0.365344,2
19,"[9, 12]",0.653445,0.367432,2
35,"[8, 12]",0.653445,0.367432,2
131,"[6, 12]",0.653445,0.367432,2
18,"[9, 13]",0.736952,0.413361,2
34,"[8, 13]",0.736952,0.413361,2
130,"[6, 13]",0.736952,0.413361,2
258,"[5, 13]",0.713987,0.413361,2


Source Accuracy Threshold: 0.0


Unnamed: 0,Features,Source Accuracy,Threat Accuracy,Number of Features
1,[],1e-05,1.0,0
