In [1]:
# Set the working directory to the parent directory
import sys
sys.path.append('..')
sys.dont_write_bytecode = True

# Import relevant custom libraries
from src.eda import data_info
from src.evaluation import ValidationEvaluation

# Import relevant libraries
import pandas as pd
import warnings
from IPython.display import display
import matplotlib.pyplot as plt
import math
import os
import glob
import numpy as np

# Config
pd.set_option('display.max_columns', None) # Ensure all columns are displayed
warnings.filterwarnings("ignore")

# Read relevant files
X_train = pd.read_feather("../data/processed/X_train.feather")
X_train_validate = pd.read_feather("../data/processed/X_train_validate.feather")

# Get data info
var_info = data_info(X_train)
all_cols = X_train.columns
real_cols = var_info[var_info["var_type"]=="numerical"]["var_name"].tolist()
binary_cols = var_info[var_info["var_type"]=="binary"]["var_name"].tolist()

# Read relevant files
X_validate = pd.read_feather("../data/processed/X_validate.feather")
y_validate = pd.read_feather("../data/processed/y_validate.feather")

# Initialize the validation evaluation
valeval = ValidationEvaluation(X_validate, y_validate, real_cols, binary_cols, all_cols, dp_sgd=True)
    
# Read the log file
log_path = "../logs/dpsgd_tune_log.txt"

# Extract the latest successful Bayesian versions
latest_successful_versions = valeval.extract_latest_successful_bayesian_versions(log_path)
print(latest_successful_versions)

# Evaluate the model performance
eval_results = valeval.evaluate_model_performance(latest_successful_versions)

{'202505161637': ('AUC', 1.0, 1e-05, datetime.datetime(2025, 5, 16, 18, 54, 18, 622859)), '202505070419': ('Precision', 1.0, 1e-05, datetime.datetime(2025, 5, 15, 20, 15, 45, 649085)), '202505170232': ('F1-Score', 1.0, 1e-05, datetime.datetime(2025, 5, 17, 5, 26, 3, 436569)), '202505170910': ('Recall', 1.0, 1e-05, datetime.datetime(2025, 5, 17, 15, 36, 10, 805811)), '202505161903': ('AUC', 3.0, 1e-05, datetime.datetime(2025, 5, 17, 1, 21, 4, 994784)), '202505170526': ('F1-Score', 3.0, 1e-05, datetime.datetime(2025, 5, 17, 7, 32, 36, 458159)), '202505150921': ('Precision', 3.0, 1e-05, datetime.datetime(2025, 5, 15, 20, 44, 23, 491416)), '202505150606': ('Recall', 3.0, 1e-05, datetime.datetime(2025, 5, 16, 0, 37, 31, 384408)), '202505150349': ('AUC', 5.0, 1e-05, datetime.datetime(2025, 5, 16, 1, 29, 3, 290500)), '202505170732': ('F1-Score', 5.0, 1e-05, datetime.datetime(2025, 5, 17, 9, 10, 19, 355577)), '202505151015': ('Precision', 5.0, 1e-05, datetime.datetime(2025, 5, 15, 21, 3, 33, 6

In [4]:
from src.dp_utils_poisson import DPSGDSanitizer

san = DPSGDSanitizer(len(X_train), 64, 3, 500, 1e-5)
print(san.compute_noise_from_eps())

2.468514516045322


In [3]:
import tensorflow as tf
import pickle
import os
os.chdir("/Users/trinhha/Documents/VU AMSTERDAM/STUDY/Thesis/Code/")
from src.models import AnomalyDetector

X_test = pd.read_feather("data/processed/X_test.feather")

for version in eval_results.index.tolist():
    print(version)
    # Load model and hyperparameters
    model = tf.keras.models.load_model(f"models/baseline/{version}")
    with open(f"hyperparams/baseline/{version}.pkl", "rb") as f:
        params = pickle.load(f)
    detector = AnomalyDetector(
                model=model,
                real_cols=real_cols,
                binary_cols=binary_cols,
                all_cols=all_cols,
                lam=params["lam"],
                gamma=params["gamma"],
            )
    # Compute scores
    scores, x_hat = detector._compute_anomaly_scores(X_test, test_set=True)

    # Save reconstructed data
    pd.DataFrame(x_hat, columns=all_cols).to_feather(f"experiments/predictions/baseline/{version}_recons.feather")

    # Detect
    y_pred = detector._detect(scores, params['threshold'])

    # Save predictions
    pd.DataFrame(y_pred, columns=["anomaly"]).to_feather(f"experiments/predictions/baseline/{version}_pred.feather")

202505080154
202505080250
202505112013
202505080012


In [1]:
import pickle

with open("../hyperparams/dpsgd/202506070611.pkl", "rb") as f:
    params = pickle.load(f)

In [2]:
params['threshold'] = 0.06004473716020584
params['q'] = 0.7

In [3]:
with open("../hyperparams/dpsgd/202506070611.pkl", "wb") as f:
    pickle.dump(params, f)

In [4]:
import pickle
import tensorflow as tf
import pandas as pd
import sys
sys.path.append('..')
sys.dont_write_bytecode = True

# Import relevant custom libraries
from src.eda import data_info
from src.evaluation import AnomalyDetector

import os
os.chdir("/Users/trinhha/Documents/VU AMSTERDAM/STUDY/Thesis/Code/")

X_test = pd.read_feather("data/processed/X_test.feather")
y_test = pd.read_feather("data/processed/y_test.feather")
# Get data info
var_info = data_info(X_test)
all_cols = X_test.columns
real_cols = var_info[var_info["var_type"]=="numerical"]["var_name"].tolist()
binary_cols = var_info[var_info["var_type"]=="binary"]["var_name"].tolist()

with open("hyperparams/dpsgd/202506070611.pkl", "rb") as f:
    params = pickle.load(f)
model = tf.keras.models.load_model(f"models/dpsgd/202506070611")



In [5]:
detector = AnomalyDetector(
                model=model,
                real_cols=real_cols,
                binary_cols=binary_cols,
                all_cols=all_cols,
                lam=params["lam"],
                gamma=params["gamma"],
                target_epsilon=3, delta=1e-5
            )

In [6]:
scores = detector._compute_anomaly_scores(X_test)
y_pred = detector._detect(scores, params["threshold"])
perf = detector._evaluate(y_pred, y_test, scores)

In [7]:
perf

{'accuracy': 0.7446102819237148,
 'precision': 0.41311379006174653,
 'recall': 0.6017130620985011,
 'f1_score': 0.4898884239888424,
 'auc': 0.7593880854671883}