In [1]:
import pandas as pd
import numpy as np
import ast
from IPython.display import display

In [2]:
df = pd.read_csv('hypermapper-utility-score-serv_rec_full_nprint_10pkts.txt', sep=';')

In [3]:
max_acc = max(df['Source Accuracy'])
user_thres = [0, 0.01, 0.03, 0.1, 0.3, 1]
src_thres = [max_acc * (1-x) for x in user_thres]
src_thres, user_thres

([0.85651575142021,
  0.8479505939060079,
  0.8308202788776037,
  0.770864176278189,
  0.599561025994147,
  0.0],
 [0, 0.01, 0.03, 0.1, 0.3, 1])

In [4]:
df['Number of Features'] = df['Features'].apply(lambda x: len(ast.literal_eval(x)))

In [5]:
df[df['Source Accuracy']>=src_thres[1]]

Unnamed: 0,Features,Source Accuracy,Threat Accuracy,Number of Features
384,"[1, 3, 4, 5, 6, 7, 8, 9]",0.849286,0.781718,8
480,"[1, 2, 3, 5, 6, 7, 8, 9]",0.849544,0.781029,8
512,"[1, 2, 3, 4, 5, 6, 7, 8, 9]",0.851954,0.783612,9
640,"[0, 3, 4, 5, 6, 7, 8, 9]",0.849458,0.778361,8
736,"[0, 2, 3, 5, 6, 7, 8, 9]",0.849113,0.785419,8
768,"[0, 2, 3, 4, 5, 6, 7, 8, 9]",0.850577,0.7843,9
895,"[0, 1, 3, 4, 5, 6, 7, 8, 9]",0.854794,0.784128,9
991,"[0, 1, 2, 3, 5, 6, 7, 8, 9]",0.855913,0.789034,9
1023,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]",0.856516,0.788346,10


In [6]:
import os
filename = os.path.basename(globals()['__session__'])
results_list = []

for src_t, user_t in zip(src_thres, user_thres):
    # print("Source Accuracy Threshold:", src_t)
    # print(User Threshold:", user_t)
    
    # Filter rows based on source accuracy threshold
    filtered_df = df[df['Source Accuracy'] >= src_t]
    
    # Find the lowest threat accuracy
    min_threat_accuracy = filtered_df['Threat Accuracy'].min()
    
    # Filter rows with similar threat accuracy (<1% difference)
    similar_threat_rows = filtered_df[
        (filtered_df['Threat Accuracy'] >= min_threat_accuracy) & 
        (filtered_df['Threat Accuracy'] <= min_threat_accuracy * 1.01)
    ]
    
    # Find the row with the minimum number of features among these rows
    # min_feats = similar_threat_rows['Number of Features'].min()
    # result = similar_threat_rows[similar_threat_rows['Number of Features'] <= min_feats+1]
    
    # Sort and select the row with minimum threat accuracy
    selected_row = similar_threat_rows.sort_values(by='Threat Accuracy').iloc[0]
    
    # Append result to list
    results_list.append([
        user_t,
        selected_row['Source Accuracy'],
        selected_row['Threat Accuracy'],
        selected_row['Number of Features']
    ])

columns = ['Threshold', 'Source Accuracy', 'Threat Accuracy', f"# Features (Max {max(df['Number of Features'])})"]
result_df = pd.DataFrame(results_list, columns=columns)
result_df.to_csv(filename + '.csv', index=False)

In [7]:
for src_t in src_thres:
    print("Source Accuracy Threshold:", src_t)
    min_feats = min(df[df['Source Accuracy']>=src_t]['Number of Features'])
    display(df[(df['Source Accuracy']>=src_t) & (df['Number of Features']==min_feats)].sort_values(by='Threat Accuracy'))

Source Accuracy Threshold: 0.85651575142021


Unnamed: 0,Features,Source Accuracy,Threat Accuracy,Number of Features
1023,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]",0.856516,0.788346,10


Source Accuracy Threshold: 0.8479505939060079


Unnamed: 0,Features,Source Accuracy,Threat Accuracy,Number of Features
640,"[0, 3, 4, 5, 6, 7, 8, 9]",0.849458,0.778361,8
480,"[1, 2, 3, 5, 6, 7, 8, 9]",0.849544,0.781029,8
384,"[1, 3, 4, 5, 6, 7, 8, 9]",0.849286,0.781718,8
736,"[0, 2, 3, 5, 6, 7, 8, 9]",0.849113,0.785419,8


Source Accuracy Threshold: 0.8308202788776037


Unnamed: 0,Features,Source Accuracy,Threat Accuracy,Number of Features
604,"[0, 3, 5, 6, 8, 9]",0.830866,0.765622,6


Source Accuracy Threshold: 0.770864176278189


Unnamed: 0,Features,Source Accuracy,Threat Accuracy,Number of Features
118,"[3, 4, 5, 7, 9]",0.771647,0.714839,5
214,"[2, 3, 5, 7, 9]",0.771217,0.716388,5
328,"[1, 3, 7, 8, 9]",0.778878,0.721208,5
56,"[4, 5, 7, 8, 9]",0.77225,0.721553,5
104,"[3, 4, 7, 8, 9]",0.774316,0.722155,5
152,"[2, 5, 7, 8, 9]",0.771045,0.723446,5
110,"[3, 4, 6, 7, 9]",0.780255,0.724049,5
206,"[2, 3, 6, 7, 9]",0.783267,0.724307,5
334,"[1, 3, 6, 7, 9]",0.777587,0.72534,5
200,"[2, 3, 7, 8, 9]",0.776123,0.725426,5


Source Accuracy Threshold: 0.599561025994147


Unnamed: 0,Features,Source Accuracy,Threat Accuracy,Number of Features
38,"[4, 7, 9]",0.603288,0.573851,3
134,"[2, 7, 9]",0.604493,0.578929,3
262,"[1, 7, 9]",0.605096,0.582544,3
50,"[4, 5, 9]",0.613703,0.588397,3
518,"[0, 7, 9]",0.601739,0.588569,3
98,"[3, 4, 9]",0.615252,0.589344,3
194,"[2, 3, 9]",0.612584,0.590807,3
146,"[2, 5, 9]",0.614133,0.593303,3
322,"[1, 3, 9]",0.61026,0.595455,3
578,"[0, 3, 9]",0.608625,0.596488,3


Source Accuracy Threshold: 0.0


Unnamed: 0,Features,Source Accuracy,Threat Accuracy,Number of Features
1,[],1e-05,1.0,0
