In [67]:
import plotly.express as px
import numpy as np
import pandas as pd
import pickle
from sklearn.metrics import precision_recall_curve, roc_curve, roc_auc_score, auc
import pickle
import turicreate as tc
import re
import glob
from utils import load_net
import plotly.graph_objects as go

In [2]:
labels = pd.read_csv('../res/newclusterres/apks_labels_hashed.csv', index_col=0)

In [3]:
def read_and_conv(fname, labels):
    with open(fname, 'rb') as f:
        eres = pickle.load(f)
    tblre = dict()
    for r, val in eres:
        tblre[r['apk']] = [r['nn'], val, not labels.loc[r['apk']]['malware_label']]
    return pd.DataFrame.from_dict(tblre, orient='index', columns=[ 'nearest', 'prob', 'true_val'])

def process_filelist(fnames, prefix):
    dfs = list()
    res = dict()
    for fname in fnames:
    
        df = read_and_conv(fname=fname, labels=labels)
        dfs.append(dfs)
        [p1, r1, thresholds] = precision_recall_curve(df['true_val'], df['prob'])
        auc1 = auc(r1, p1)

        g = float(re.findall("\d+\.\d+", fname)[0])
        res[f"{prefix}-{g}"] = [p1, r1, auc1]
    
    return [dfs, res]

In [6]:
f_size = 189452

In [7]:
#glob.glob('../res/newclusterres/streamed-0.65-0.2/')
fnames = dict()
aucs = dict()
sizes = dict()
gammas = [0.3, 0.35, 0.4, 0.45, 0.5,0.6, 0.65, 0.75, 0.85, 0.9, 0.95]
for p_size in list(range(2,10,2)):
    a = list()
    b = list()
    c = list()
    for gamma in gammas:
        curr_name = f"../res/newclusterres/2ndrun/streamed-{gamma}-{p_size}/{gamma}-{p_size}-evalresults.pickle"

        net_name = f"../res/newclusterres/2ndrun/streamed-{gamma}-{p_size}/merged-{gamma}-0-tc-nets-voting.pickle"
        _, net = load_net(net_name)
        c.append(len(net))
        a.append(curr_name)
        df = read_and_conv(curr_name, labels)
        [p1, r1, thresholds] = precision_recall_curve(df['true_val'], df['prob'])
        b.append(auc(r1, p1))
        
    fnames[p_size] = a
    aucs[p_size] = b
    sizes[p_size] = c
    

In [66]:

fig = go.Figure() 

for k, ss in sizes.items():

    fig = fig.add_trace(go.Scatter(x = list(map(lambda x: x/f_size, ss)),
                                   y = aucs[k], 
                                   name = f"{k*10} %", text=gammas, 
                                   hovertemplate = 'AuC: %{y:.3f}<br>Comp: %{x}<br>Epsilon: %{text}'))
    
fig.update_layout(
    title="Compression vs. PRAuC",
    xaxis_title="Compression",
    yaxis_title="PRAuC",
    legend_title="Size of the origin partition"
)
    
fig.update_layout(hovermode="x")

fig.show()