In [1]:
import pandas as pd
import numpy as np
import ast
from IPython.display import display

In [2]:
df = pd.read_csv('hypermapper-iot_nprint_3pkts-stats.txt', sep=';')

In [3]:
max_acc = max(df['Source Accuracy'])
user_thres = [0, 0.01, 0.03, 0.1, 0.3, 1]
src_thres = [max_acc * (1-x) for x in user_thres]
src_thres, user_thres

([0.9561586638830898,
  0.9465970772442589,
  0.9274739039665971,
  0.8605427974947808,
  0.6693110647181628,
  0.0],
 [0, 0.01, 0.03, 0.1, 0.3, 1])

In [4]:
df['Number of Features'] = df['Features'].apply(lambda x: len(ast.literal_eval(x)))

In [5]:
df[df['Source Accuracy']>=src_thres[1]]

Unnamed: 0,Features,Source Accuracy,Threat Accuracy,Number of Features
96,"[7, 9, 10, 11, 12, 13]",0.947808,0.594990,6
222,"[6, 7, 9, 10, 11, 13]",0.947808,0.601253,6
311,"[5, 8, 9, 11, 12]",0.947808,0.551148,5
349,"[5, 7, 9, 10, 11]",0.947808,0.601253,5
350,"[5, 7, 9, 10, 11, 13]",0.947808,0.611691,6
...,...,...,...,...
16379,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13]",0.951983,0.636743,13
16380,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",0.954071,0.626305,12
16381,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13]",0.956159,0.638831,13
16382,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]",0.954071,0.653445,13


In [6]:
import os
filename = os.path.basename(globals()['__session__'])
results_list = []

for src_t, user_t in zip(src_thres, user_thres):
    # print("Source Accuracy Threshold:", src_t)
    # print(User Threshold:", user_t)
    
    # Filter rows based on source accuracy threshold
    filtered_df = df[df['Source Accuracy'] >= src_t]
    
    # Find the lowest threat accuracy
    min_threat_accuracy = filtered_df['Threat Accuracy'].min()
    
    # Filter rows with similar threat accuracy (<1% difference)
    similar_threat_rows = filtered_df[
        (filtered_df['Threat Accuracy'] >= min_threat_accuracy) & 
        (filtered_df['Threat Accuracy'] <= min_threat_accuracy * 1.01)
    ]
    
    # Find the row with the minimum number of features among these rows
    # min_feats = similar_threat_rows['Number of Features'].min()
    # result = similar_threat_rows[similar_threat_rows['Number of Features'] <= min_feats+1]
    
    # Sort and select the row with minimum threat accuracy
    selected_row = similar_threat_rows.sort_values(by='Threat Accuracy').iloc[0]
    
    # Append result to list
    results_list.append([
        user_t,
        selected_row['Source Accuracy'],
        selected_row['Threat Accuracy'],
        selected_row['Number of Features']
    ])

columns = ['Threshold', 'Source Accuracy', 'Threat Accuracy', f"# Features (Max {max(df['Number of Features'])})"]
result_df = pd.DataFrame(results_list, columns=columns)
result_df.to_csv(filename + '.csv', index=False)

In [7]:
for src_t in src_thres:
    print("Source Accuracy Threshold:", src_t)
    min_feats = min(df[df['Source Accuracy']>=src_t]['Number of Features'])
    display(df[(df['Source Accuracy']>=src_t) & (df['Number of Features']==min_feats)].sort_values(by='Threat Accuracy'))

Source Accuracy Threshold: 0.9561586638830898


Unnamed: 0,Features,Source Accuracy,Threat Accuracy,Number of Features
4118,"[1, 9, 11, 13]",0.956159,0.563674,4
4372,"[1, 5, 9, 11]",0.956159,0.565762,4
2353,"[2, 5, 8, 9]",0.956159,0.56785,4
5125,"[1, 3, 11, 13]",0.956159,0.574113,4
5392,"[1, 3, 5, 9]",0.956159,0.5762,4
3078,"[2, 3, 11, 13]",0.956159,0.588727,4


Source Accuracy Threshold: 0.9465970772442589


Unnamed: 0,Features,Source Accuracy,Threat Accuracy,Number of Features
1065,"[3, 8, 10]",0.947808,0.528184,3
4114,"[1, 9, 13]",0.949896,0.536534,3
518,"[4, 11, 13]",0.947808,0.54071,3
4368,"[1, 5, 9]",0.951983,0.542797,3
2565,"[2, 4, 11]",0.947808,0.546973,3
785,"[4, 5, 9]",0.947808,0.549061,3
2069,"[2, 9, 11]",0.947808,0.553236,3
2066,"[2, 9, 13]",0.947808,0.569937,3
2321,"[2, 5, 9]",0.951983,0.569937,3
2054,"[2, 11, 13]",0.949896,0.572025,3


Source Accuracy Threshold: 0.9274739039665971


Unnamed: 0,Features,Source Accuracy,Threat Accuracy,Number of Features
1057,"[3, 8]",0.935282,0.517745,2
4113,"[1, 9]",0.931106,0.521921,2
1041,"[3, 9]",0.929019,0.528184,2
273,"[5, 9]",0.929019,0.530271,2
517,"[4, 11]",0.93737,0.530271,2
1033,"[3, 10]",0.939457,0.530271,2
521,"[4, 10]",0.933194,0.532359,2
2081,"[2, 8]",0.939457,0.532359,2
4101,"[1, 11]",0.929019,0.532359,2
1029,"[3, 11]",0.929019,0.546973,2


Source Accuracy Threshold: 0.8605427974947808


Unnamed: 0,Features,Source Accuracy,Threat Accuracy,Number of Features
5,[11],0.908142,0.488518,1
2049,[2],0.891441,0.511482,1
513,[4],0.895616,0.517745,1
1025,[3],0.885177,0.528184,1


Source Accuracy Threshold: 0.6693110647181628


Unnamed: 0,Features,Source Accuracy,Threat Accuracy,Number of Features
2,[13],0.73904,0.340292,1
257,[5],0.76618,0.390397,1
17,[9],0.670146,0.442589,1
4097,[1],0.826722,0.473904,1
9,[10],0.826722,0.478079,1
5,[11],0.908142,0.488518,1
2049,[2],0.891441,0.511482,1
513,[4],0.895616,0.517745,1
1025,[3],0.885177,0.528184,1


Source Accuracy Threshold: 0.0


Unnamed: 0,Features,Source Accuracy,Threat Accuracy,Number of Features
1,[],1e-05,1.0,0
