# Analyze DPL Results

In [33]:
from pathlib import Path
import json

import numpy as np
import pandas as pd

In [23]:
def load_windowed_data(base_dir: Path, window_size: str, variant: str):
    """
    Load preprocessed windowed data.
    """
    
    dataset_path = base_dir / f"w{window_size}" / variant

    data = {
        split: np.load(dataset_path / f"X_{split}.npy", allow_pickle=True)
        for split in ["train", "test"]
    }

    labels = {
        split: np.load(dataset_path / f"y_{split}_multi_class.npy", allow_pickle=True)
        for split in ["train", "test"]
    }

    return data, labels

In [24]:
# Configuration
dataset = "darpa2000"
scenario = "s1_inside"

processed_dir = Path(f"../data/processed/{dataset}/{scenario}/windowed")

In [25]:
function_name = "multi_step"
train_mode = "scratch"
dataset_variant = "resampled"
window_size = 10

In [26]:
data, labels = load_windowed_data(
    base_dir=processed_dir,
    window_size=window_size,
    variant=dataset_variant,
) 

In [27]:
X_test = data["test"]
y_test = labels["test"]

window = X_test[9498]
label  = y_test[9498]

print("Window shape:", window.shape)
print("Label:", label)

Window shape: (10, 112)
Label: 0


## Load Misclassified Flows

In [43]:
errors_dir = f"../experiments/{dataset}/{scenario}/deepproblog/{function_name}/results"
errors_path =  f"{errors_dir}/{function_name}_{train_mode}_{dataset_variant}_full_w{window_size}_errors.json"

In [44]:
with open(errors_path, "r") as f:
    errors = json.load(f)

print(f"Loaded {len(errors)} misclassified samples")

Loaded 120 misclassified samples


In [46]:
for error in errors:
    idx = error["index"]

    window = X_test[idx]
    label = y_test[idx]

    print("Index:", idx)
    print("Original label:", label)

Index: 7047
Original label: 0
Index: 8324
Original label: 0
Index: 8325
Original label: 0
Index: 8734
Original label: 0
Index: 8858
Original label: 0
Index: 8920
Original label: 0
Index: 8923
Original label: 0
Index: 9242
Original label: 0
Index: 9307
Original label: 0
Index: 9359
Original label: 0
Index: 9360
Original label: 0
Index: 9366
Original label: 0
Index: 9482
Original label: 0
Index: 9483
Original label: 0
Index: 9484
Original label: 0
Index: 9485
Original label: 0
Index: 9488
Original label: 0
Index: 9489
Original label: 0
Index: 9490
Original label: 0
Index: 9497
Original label: 0
Index: 9498
Original label: 0
Index: 9499
Original label: 0
Index: 9500
Original label: 0
Index: 9501
Original label: 0
Index: 9504
Original label: 0
Index: 9566
Original label: 0
Index: 9567
Original label: 0
Index: 9622
Original label: 0
Index: 9630
Original label: 0
Index: 9635
Original label: 0
Index: 9636
Original label: 0
Index: 9637
Original label: 0
Index: 9638
Original label: 0
Index: 963

In [47]:
false_positives = [
    e for e in errors
    if e["actual"] == "benign" and e["predicted"] != "benign"
]

print("False positives:", len(false_positives))

False positives: 102


In [48]:
from collections import defaultdict

by_predicted = defaultdict(list)

for e in errors:
    by_predicted[e["predicted"]].append(e["index"])

for cls, indices in by_predicted.items():
    print(cls, len(indices))

phase1 3
phase2 38
phase3 23
phase4 31
benign 18
phase5 7


In [50]:
df = pd.DataFrame(errors)
print(df.head())

# Count by class pair
print(df.groupby(["actual", "predicted"]).size())

   index  actual predicted                                 confidence  \
0   7047  benign    phase1  tensor(0.9129, grad_fn=<SelectBackward0>)   
1   8324  benign    phase1  tensor(0.5843, grad_fn=<SelectBackward0>)   
2   8325  benign    phase1  tensor(0.9438, grad_fn=<SelectBackward0>)   
3   8734  benign    phase2  tensor(0.9001, grad_fn=<SelectBackward0>)   
4   8858  benign    phase2  tensor(0.5564, grad_fn=<SelectBackward0>)   

                                          test_query  
0  (1.0::multi_step(X,0,0,0,0,X_5), {X: tensor(te...  
1  (1.0::multi_step(X,0,0,0,0,X_5), {X: tensor(te...  
2  (1.0::multi_step(X,0,0,0,0,X_5), {X: tensor(te...  
3  (1.0::multi_step(X,1,0,0,0,X_5), {X: tensor(te...  
4  (1.0::multi_step(X,1,0,0,0,X_5), {X: tensor(te...  
actual  predicted
benign  phase1        3
        phase2       38
        phase3       23
        phase4       31
        phase5        7
phase4  benign        1
phase5  benign       17
dtype: int64


## Load Original Data

In [30]:
df = pd.read_csv(
    f"../data/interim/{dataset}/{scenario}/flows_labeled/all_flows_labeled.csv"
)

df = df.sort_values("start_time").reset_index(drop=True)
df['t_rel'] = df['start_time'] - df['start_time'].min()

In [31]:
phase_bounds = (
    df[df['phase'] > 0]
    .groupby('phase')['t_rel']
    .agg(['min', 'max'])
)

phase_start = phase_bounds['min'].to_dict()
phase_end   = phase_bounds['max'].to_dict()

In [32]:
phase_bounds

Unnamed: 0_level_0,min,max
phase,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1799.665435,1824.341042
2,2790.877254,3333.974143
3,4294.134775,4402.837414
4,5305.272464,5357.444592
5,7478.96066,8383.254878
