In [1]:
# Set the working directory to the parent directory
import sys
sys.path.append('..')
sys.dont_write_bytecode = True

# Import relevant custom libraries
from src.eda import data_info
from src.evaluation import ValidationEvaluation

# Import relevant libraries
import pandas as pd
import warnings
from IPython.display import display
import matplotlib.pyplot as plt
import math
import os
import glob
import numpy as np

# Config
pd.set_option('display.max_columns', None) # Ensure all columns are displayed
warnings.filterwarnings("ignore")

# Read relevant files
X_train = pd.read_feather("../data/processed/X_train.feather")
X_train_validate = pd.read_feather("../data/processed/X_train_validate.feather")

# Get data info
var_info = data_info(X_train)
all_cols = X_train.columns
real_cols = var_info[var_info["var_type"]=="numerical"]["var_name"].tolist()
binary_cols = var_info[var_info["var_type"]=="binary"]["var_name"].tolist()

# Read relevant files
X_validate = pd.read_feather("../data/processed/X_validate.feather")
y_validate = pd.read_feather("../data/processed/y_validate.feather")

# Initialize the validation evaluation
valeval = ValidationEvaluation(X_validate, y_validate, real_cols, binary_cols, all_cols, dp_sgd=True)
    
# Read the log file
log_path = "../logs/dpsgd_tune_log.txt"

# Extract the latest successful Bayesian versions
latest_successful_versions = valeval.extract_latest_successful_bayesian_versions(log_path)
print(latest_successful_versions)

# Evaluate the model performance
eval_results = valeval.evaluate_model_performance(latest_successful_versions)

{'202505161637': ('AUC', 1.0, 1e-05, datetime.datetime(2025, 5, 16, 18, 54, 18, 622859)), '202505070419': ('Precision', 1.0, 1e-05, datetime.datetime(2025, 5, 15, 20, 15, 45, 649085)), '202505170232': ('F1-Score', 1.0, 1e-05, datetime.datetime(2025, 5, 17, 5, 26, 3, 436569)), '202505170910': ('Recall', 1.0, 1e-05, datetime.datetime(2025, 5, 17, 15, 36, 10, 805811)), '202505161903': ('AUC', 3.0, 1e-05, datetime.datetime(2025, 5, 17, 1, 21, 4, 994784)), '202505170526': ('F1-Score', 3.0, 1e-05, datetime.datetime(2025, 5, 17, 7, 32, 36, 458159)), '202505150921': ('Precision', 3.0, 1e-05, datetime.datetime(2025, 5, 15, 20, 44, 23, 491416)), '202505150606': ('Recall', 3.0, 1e-05, datetime.datetime(2025, 5, 16, 0, 37, 31, 384408)), '202505150349': ('AUC', 5.0, 1e-05, datetime.datetime(2025, 5, 16, 1, 29, 3, 290500)), '202505170732': ('F1-Score', 5.0, 1e-05, datetime.datetime(2025, 5, 17, 9, 10, 19, 355577)), '202505151015': ('Precision', 5.0, 1e-05, datetime.datetime(2025, 5, 15, 21, 3, 33, 6

In [4]:
from src.dp_utils_poisson import DPSGDSanitizer

san = DPSGDSanitizer(len(X_train), 64, 3, 500, 1e-5)
print(san.compute_noise_from_eps())

2.468514516045322


In [3]:
import tensorflow as tf
import pickle
import os
os.chdir("/Users/trinhha/Documents/VU AMSTERDAM/STUDY/Thesis/Code/")
from src.models import AnomalyDetector

X_test = pd.read_feather("data/processed/X_test.feather")

for version in eval_results.index.tolist():
    print(version)
    # Load model and hyperparameters
    model = tf.keras.models.load_model(f"models/baseline/{version}")
    with open(f"hyperparams/baseline/{version}.pkl", "rb") as f:
        params = pickle.load(f)
    detector = AnomalyDetector(
                model=model,
                real_cols=real_cols,
                binary_cols=binary_cols,
                all_cols=all_cols,
                lam=params["lam"],
                gamma=params["gamma"],
            )
    # Compute scores
    scores, x_hat = detector._compute_anomaly_scores(X_test, test_set=True)

    # Save reconstructed data
    pd.DataFrame(x_hat, columns=all_cols).to_feather(f"experiments/predictions/baseline/{version}_recons.feather")

    # Detect
    y_pred = detector._detect(scores, params['threshold'])

    # Save predictions
    pd.DataFrame(y_pred, columns=["anomaly"]).to_feather(f"experiments/predictions/baseline/{version}_pred.feather")

202505080154
202505080250
202505112013
202505080012


In [1]:
import pickle

with open("../hyperparams/dpsgd/202506070611.pkl", "rb") as f:
    params = pickle.load(f)

In [2]:
params['threshold'] = 0.06004473716020584
params['q'] = 0.7

In [3]:
with open("../hyperparams/dpsgd/202506070611.pkl", "wb") as f:
    pickle.dump(params, f)

In [4]:
import pickle
import tensorflow as tf
import pandas as pd
import sys
sys.path.append('..')
sys.dont_write_bytecode = True

# Import relevant custom libraries
from src.eda import data_info
from src.evaluation import AnomalyDetector

import os
os.chdir("/Users/trinhha/Documents/VU AMSTERDAM/STUDY/Thesis/Code/")

X_test = pd.read_feather("data/processed/X_test.feather")
y_test = pd.read_feather("data/processed/y_test.feather")
# Get data info
var_info = data_info(X_test)
all_cols = X_test.columns
real_cols = var_info[var_info["var_type"]=="numerical"]["var_name"].tolist()
binary_cols = var_info[var_info["var_type"]=="binary"]["var_name"].tolist()

with open("hyperparams/dpsgd/202506070611.pkl", "rb") as f:
    params = pickle.load(f)
model = tf.keras.models.load_model(f"models/dpsgd/202506070611")



In [5]:
detector = AnomalyDetector(
                model=model,
                real_cols=real_cols,
                binary_cols=binary_cols,
                all_cols=all_cols,
                lam=params["lam"],
                gamma=params["gamma"],
                target_epsilon=3, delta=1e-5
            )

In [6]:
scores = detector._compute_anomaly_scores(X_test)
y_pred = detector._detect(scores, params["threshold"])
perf = detector._evaluate(y_pred, y_test, scores)

In [7]:
perf

{'accuracy': 0.7446102819237148,
 'precision': 0.41311379006174653,
 'recall': 0.6017130620985011,
 'f1_score': 0.4898884239888424,
 'auc': 0.7593880854671883}

In [1]:
import os
os.chdir("/Users/trinhha/Documents/VU AMSTERDAM/STUDY/Thesis/Code/")
import re
from datetime import datetime
import pandas as pd
import tensorflow as tf
import pickle
from src.models import AutoencoderTrainer, AnomalyDetector
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from math import pi
from src.dp_utils import *
from src.eda import data_info
from tqdm import tqdm
import random

class StatisticalEval():
    def __init__(self):

        # Load training data
        self.X_train = pd.read_feather("data/processed/X_train.feather")

        # Load train-validation data
        self.X_train_val = pd.read_feather("data/processed/X_train_validate.feather")
        
        # Load test data
        self.X_test = pd.read_feather("data/processed/X_test.feather")
        self.y_test = pd.read_feather("data/processed/y_test.feather")
        
        # Extract variable types from metadata
        self.var_info = data_info(self.X_test)
        self.all_cols = self.X_test.columns
        self.real_cols = self.var_info[self.var_info["var_type"] == "numerical"]["var_name"].tolist()
        self.binary_cols = self.var_info[self.var_info["var_type"] == "binary"]["var_name"].tolist()

        # Metrics
        self.metric_labels = {
            "precision": "Precision",
            "recall": "Recall",
            "f1_score": "F1-Score",
            "auc": "AUC"
        }
        
    def _single_eval(self, model_type, version, epsilon, delta, seed=None):
        
        with open(f"hyperparams/{model_type}/{version}.pkl", "rb") as f:
            params = pickle.load(f)
        
        tf.random.set_seed(seed)
        np.random.seed(seed)
        random.seed(seed)
        
        # Train model
        trainer = AutoencoderTrainer(
            input_dim=self.X_train.shape[1],
            real_cols=self.real_cols,
            binary_cols=self.binary_cols,
            all_cols=self.all_cols,
            activation='relu',
            patience_limit=10,
            verbose=False,
            dp_sgd=True if model_type == "dpsgd" else False,
            post_hoc=False,
            target_epsilon=epsilon,
            delta=delta,
            version=version,
            save_tracking=True,
            raise_convergence_error=True,
            **{key: value for key, value in params.items() if key not in ['threshold', 'q']}
        )
        model = trainer.train(self.X_train, self.X_train_val)

        return model

    def final_eval(self, metric_used, seed):

        # Load the best models
        # Baseline
        baseline = pd.read_csv("experiments/perf_summary/baseline_val_results.csv")
        baseline_model = baseline.query(f'tuned_by == "{metric_used}"')
        # DP-SGD
        dpsgd = pd.read_csv("experiments/perf_summary/dpsgd_val_results.csv")
        dpsgd_models = dpsgd.query(f'tuned_by == "{metric_used}"')
        
        # Load baseline model versions
        for version in baseline_model["version"].tolist():
            model = self._single_eval("baseline", version, 0, 0, seed)
            model.save(f"models/baseline/{version}_final")

        # Load dpsgd model versions
        for i, row in dpsgd_models.iterrows():
            epsilon = row["epsilon"]
            delta = row["delta"]
            version = row["version"]
            model = self._single_eval("dpsgd", version, epsilon, delta, seed)
            model.save(f"models/dpsgd/{version}_final")

In [2]:
# Define labels for the metrics to be plotted
metric_labels = {
            "precision": "Precision",
            "recall": "Recall",
            "f1_score": "F1-Score",
            "auc": "AUC",
            "fidelity":  "Fidelity"

        }
metric_used = "AUC"

# Get the best baseline model
baseline = pd.read_csv("experiments/perf_summary/baseline_val_results.csv")
baseline_model = baseline.query(f'tuned_by == "{metric_used}"')["version"].astype(str).tolist()
baseline_test_perf = pd.read_csv("results/metrics/baseline.csv")
baseline_test_perf["version"] = baseline_test_perf["version"].astype(str)
baseline_best = baseline_test_perf[baseline_test_perf["version"].isin(baseline_model)]
display(baseline_best)

# Read the test performance of the DP-SGD models
dpsgd = pd.read_csv("experiments/perf_summary/dpsgd_val_results.csv")
dpsgd_models = dpsgd.query(f'tuned_by == "{metric_used}"')["version"].astype(str).tolist()
dpsgd_test_perf = pd.read_csv("results/metrics/dpsgd.csv")
dpsgd_test_perf["version"] = dpsgd_test_perf["version"].astype(str)
dpsgd_best = dpsgd_test_perf[dpsgd_test_perf["version"].isin(dpsgd_models)].sort_values(by="eps", ascending=True)
display(dpsgd_best)

# Gather the performance metrics and perform normality test
model_list = {"Baseline": f"baseline/{m}" for m in baseline_best["version"].tolist()}
model_list.update({r"$\varepsilon=$" + f"{row['eps']:.0f}": f"dpsgd/{row['version']}" for i, row in dpsgd_best.sort_values("eps", ascending=False).iterrows()})

min_len = min([len(pd.read_csv(f"results/stats_eval/{value}.csv")) for value in model_list.values()])

# Get the seed values
seeds = pd.read_csv("results/stats_eval/seeds.txt", header=None).rename(columns={0:"seed"})
median_ind = int(len(seeds)/2)

perf_stats = pd.DataFrame()
for key, value in model_list.items():
    print("Model:", key)
    perf = pd.read_csv(f"results/stats_eval/{value}.csv")[:min_len]*100
    perf.insert(0, "Model", key)
    perf["seed"] = seeds
    display(perf.sort_values(by="AUC")[median_ind:median_ind+1])

    perf_stats = pd.concat([perf_stats, perf], ignore_index=True)

mean_by_seed = perf_stats.drop(columns=["Model"]).groupby("seed").mean().sort_values(by="AUC")
median_seed = mean_by_seed[median_ind:median_ind+1].index[0]
print(median_seed)

Unnamed: 0,accuracy,precision,recall,f1_score,auc,version,timestamp
24,0.780134,0.472909,0.687794,0.560461,0.81813,202505080012,2025-06-10 00:25:54


Unnamed: 0,accuracy,precision,recall,f1_score,auc,version,eps,delta,timestamp
108,0.742254,0.411054,0.611563,0.491651,0.7555,202506071334,1.0,1e-05,2025-06-10 03:54:53
107,0.753775,0.425643,0.595717,0.49652,0.764574,202506070611,3.0,1e-05,2025-06-11 01:29:12
106,0.754473,0.430076,0.62955,0.511038,0.788205,202506070329,5.0,1e-05,2025-06-11 04:26:30


Model: Baseline


Unnamed: 0,Model,Precision,Recall,F1-Score,AUC,seed
93,Baseline,47.861507,70.449679,56.999307,83.510193,9652


Model: $\varepsilon=$5


Unnamed: 0,Model,Precision,Recall,F1-Score,AUC,Fidelity,seed
68,$\varepsilon=$5,44.690137,65.781585,53.222453,79.886016,86.628262,29724


Model: $\varepsilon=$3


Unnamed: 0,Model,Precision,Recall,F1-Score,AUC,Fidelity,seed
65,$\varepsilon=$3,44.267228,62.997859,51.997172,78.722772,85.196823,220153


Model: $\varepsilon=$1


Unnamed: 0,Model,Precision,Recall,F1-Score,AUC,Fidelity,seed
39,$\varepsilon=$1,41.169625,60.599572,49.029799,75.478644,81.338919,910578


943413


In [None]:
# Initialize the evaluation class
eval = StatisticalEval()
# Run the final evaluation
eval.final_eval(metric_used="AUC", seed=int(median_seed))

INFO:tensorflow:Assets written to: models/baseline/202505080012_final/assets


INFO:tensorflow:Assets written to: models/baseline/202505080012_final/assets






INFO:tensorflow:Assets written to: models/dpsgd/202506071334_final/assets


INFO:tensorflow:Assets written to: models/dpsgd/202506071334_final/assets






INFO:tensorflow:Assets written to: models/dpsgd/202506070611_final/assets


INFO:tensorflow:Assets written to: models/dpsgd/202506070611_final/assets






INFO:tensorflow:Assets written to: models/dpsgd/202506070329_final/assets


INFO:tensorflow:Assets written to: models/dpsgd/202506070329_final/assets


In [7]:
X_test = pd.read_feather("data/processed/X_test.feather")
var_info = data_info(X_test)
all_cols = X_test.columns
real_cols = var_info[var_info["var_type"] == "numerical"]["var_name"].tolist()
binary_cols = var_info[var_info["var_type"] == "binary"]["var_name"].tolist()
for key, value in model_list.items():
    model = tf.keras.models.load_model(f"models/{value}_final")
    with open(f"hyperparams/{value}.pkl", "rb") as f:
        params = pickle.load(f)
    detector = AnomalyDetector(
            model=model,
            real_cols=real_cols,
            binary_cols=binary_cols,
            all_cols=all_cols,
            lam=params['lam'],
            gamma=params['gamma'],
        )
    scores, x_hat = detector._compute_anomaly_scores(X_test, test_set=True)

    # Save reconstructed data
    pd.DataFrame(x_hat, columns=all_cols).to_feather(f"experiments/predictions/{value}_recons_final.feather")

    # Detect
    y_pred = detector._detect(scores, params['threshold'])

    # Save predictions
    pd.DataFrame(y_pred, columns=["anomaly"]).to_feather(f"experiments/predictions/{value}_pred_final.feather")



In [10]:
exist_seeds = seeds["seed"].tolist()
seed_no = 1
while len(exist_seeds) < 100:
    random.seed(seed_no)
    new_seeds = random.sample(range(1000000), 100 - len(exist_seeds))
    no_dup_seeds = [s for s in new_seeds if s not in exist_seeds]
    exist_seeds += no_dup_seeds
    seed_no += 1

In [17]:
with open("results/stats_eval/seeds_new.txt", "w") as f:
    for seed in exist_seeds:
        f.write(f"{seed}\n")

In [1]:
# Set the working directory to the parent directory
import sys
sys.path.append('..')
sys.dont_write_bytecode = True

# Import relevant custom libraries
from src.eda import data_info
import pandas as pd
from scipy.stats import kstest, chi2_contingency
import numpy as np

X_train = pd.read_feather("../data/processed/X_train.feather")
# Get data info
var_info = data_info(X_train)
all_cols = X_train.columns
real_cols = var_info[var_info["var_type"]=="numerical"]["var_name"].tolist()
binary_cols = var_info[var_info["var_type"]=="binary"]["var_name"].tolist()
# Ensure X_train is 2D and convert to numpy array
X_train = np.asarray(X_train)  # ensure array
n_train = X_train.shape[0]
# Sample background set
rng = np.random.default_rng(seed=42)
indices = rng.choice(n_train, size=100, replace=False)
background_set = X_train[indices]

print(kstest(pd.DataFrame(X_train, columns=all_cols)[real_cols].values, pd.DataFrame(background_set, columns=all_cols)[real_cols].values).pvalue)

X_train_df = pd.DataFrame(X_train, columns=all_cols)
background_df = pd.DataFrame(background_set, columns=all_cols)
for col in binary_cols:
    # Combine values and label their source
    combined = pd.concat([
        pd.DataFrame({col: X_train_df[col], "source": "train"}),
        pd.DataFrame({col: background_df[col], "source": "background"})
    ])

    # Create contingency table: rows = feature values, columns = dataset source
    contingency = pd.crosstab(combined[col], combined["source"])
    print(chi2_contingency(contingency).pvalue)

[0.39492957 0.00615894 0.16581311 0.96926511 0.44813935 0.48677199
 0.48960901 0.35419113 0.11657152 1.        ]
0.019045308166375837
1.0
1.0
1.0
0.25509972991949575
0.6008583120527011
0.25087243708358564
0.14017722911368236
0.01893047858690207
0.396331183758482
0.6180148051713701
0.7359944553442103
0.9401798515298243
0.44679731561139246
0.31732097333816706
0.37010788155890195
1.0
0.3910714358606976
0.8688014690419097
1.0
0.779346193941038
0.6947531111543801
0.37775887419840637
0.10133457882877947
0.10133457882877996
0.8961003327752
0.5560332123381857
0.6811683679086121
0.29682167945916
0.5560332123381857
0.1341412717816369
0.35149654197131797
0.35149654197131797
0.7215371211262981
1.0
1.0
1.0
1.0
1.0
0.04342190463959022
0.005134102552679468
0.7962589079166523
0.7201436891331803
0.6361025834878413
1.0
0.7928800313974271
1.0
0.4096998682673726
0.42926822299889134
0.4361518907993689
1.0
0.4420006833276727
0.8477522185532955
0.3031442949478514


In [2]:
for col in binary_cols:
    p_train = X_train_df[col].mean()
    p_bg = background_df[col].mean()
    diff = abs(p_train - p_bg)

    print(f"{col}:")
    print(f"  Proportion of 1s — Train: {p_train:.4f}, Background: {p_bg:.4f}")
    print(f"  Absolute difference: {diff:.4f}\n")

job_admin.:
  Proportion of 1s — Train: 0.2463, Background: 0.1400
  Absolute difference: 0.1063

job_blue-collar:
  Proportion of 1s — Train: 0.2389, Background: 0.2400
  Absolute difference: 0.0011

job_entrepreneur:
  Proportion of 1s — Train: 0.0385, Background: 0.0400
  Absolute difference: 0.0015

job_housemaid:
  Proportion of 1s — Train: 0.0273, Background: 0.0300
  Absolute difference: 0.0027

job_management:
  Proportion of 1s — Train: 0.0665, Background: 0.1000
  Absolute difference: 0.0335

job_retired:
  Proportion of 1s — Train: 0.0353, Background: 0.0500
  Absolute difference: 0.0147

job_self-employed:
  Proportion of 1s — Train: 0.0340, Background: 0.0600
  Absolute difference: 0.0260

job_services:
  Proportion of 1s — Train: 0.1004, Background: 0.1500
  Absolute difference: 0.0496

job_student:
  Proportion of 1s — Train: 0.0156, Background: 0.0500
  Absolute difference: 0.0344

job_technician:
  Proportion of 1s — Train: 0.1668, Background: 0.1300
  Absolute differe

In [4]:
import numpy as np
np.tile([[1, 2], [3, 4]], (3, 1))

array([[1, 2],
       [3, 4],
       [1, 2],
       [3, 4],
       [1, 2],
       [3, 4]])

In [40]:
import pandas as pd
from src.explainability import *

n_3000 = pd.read_csv("results/explainability/baseline/202505080012_fixed_final_3000.csv")

In [54]:
#n = pd.read_feather("results/explainability/baseline/202505080012_fixed_final.feather")
n = pd.read_csv("results/explainability/baseline/202505080012_fixed_final.csv")

In [56]:
num_rows = 400
shap_gap(n[:num_rows].values, n_3000[:num_rows].values, gap_type="cosine").mean()

3.42914309230502e-05

In [53]:
from scipy.stats import spearmanr
spearmanr(n[:num_rows].mean(axis=0), n_3000[:num_rows].mean(axis=0), alternative="greater")

SignificanceResult(statistic=0.9956589951034289, pvalue=5.926088135500418e-66)

In [3]:
import pandas as pd

# Read the test performance of the DP-SGD models
dpsgd = pd.read_csv("../experiments/perf_summary/dpsgd_val_results.csv")
dpsgd_models = dpsgd.query(f'tuned_by == "AUC"')["version"].astype(str).tolist()

In [9]:
metric_summary = pd.DataFrame()
for model in dpsgd_models:
    sub_df = pd.read_csv(f"../results/stats_eval/dpsgd/{model}_eval.csv")
    metric_summary[model] = sub_df["median_shapgap_euclidean"]

In [10]:
metric_summary

Unnamed: 0,202506071334,202506070611,202506070329
0,0.287,0.287,0.239
1,0.259,0.267,0.229
2,0.26,0.268,0.228
3,0.258,0.263,0.228
4,0.265,0.271,0.23
5,0.262,0.267,0.228
6,0.25,0.257,0.225
7,0.284,0.286,0.238
8,0.288,0.286,0.237
9,0.267,0.274,0.232


In [15]:
baseline = pd.read_feather("../results/explainability/baseline/202505080012_fixed_final.feather")
eps1 = pd.read_feather("../results/explainability/dpsgd/202506071334_fixed_final.feather")
eps3 = pd.read_feather("../results/explainability/dpsgd/202506070611_fixed_final.feather")
pd.set_option('display.max_columns', None)

In [16]:
baseline

Unnamed: 0,age,duration,campaign,previous,emp_var_rate,cons_price_idx,cons_conf_idx,euribor3m,nr_employed,job_admin.,job_blue-collar,job_entrepreneur,job_housemaid,job_management,job_retired,job_self-employed,job_services,job_student,job_technician,job_unemployed,job_unknown,marital_divorced,marital_married,marital_single,education_basic.4y,education_basic.6y,education_basic.9y,education_high.school,education_illiterate,education_professional.course,education_university.degree,education_unknown,default_no,default_unknown,default_yes,housing_no,housing_unknown,housing_yes,loan_no,loan_unknown,loan_yes,contact_cellular,contact_telephone,month_apr,month_aug,month_dec,month_jul,month_jun,month_mar,month_may,month_nov,month_oct,month_sep,day_of_week_fri,day_of_week_mon,day_of_week_thu,day_of_week_tue,day_of_week_wed,pdays_intermediate,pdays_never,pdays_recent,poutcome_failure,poutcome_nonexistent,poutcome_success
0,0.000000,0.0,0.000000,0.0,0.000000,0.0,-0.078507,-0.127705,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,1.592608,0.0,0.0,0.000000,0.0,0.0,0.0,0.176371,0.000000,0.0,0.000000,0.000000,0.000000,0.0,1.222965,0.000000,0.0,0.0,0.812106,0.0,0.409972,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.0,0.679209,0.000000,0.0,0.0,1.037838,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,1.378682,0.0,0.0
1,0.002379,0.0,0.000000,0.0,0.028117,0.0,0.000000,0.039704,0.0,0.000000,0.901061,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.0,1.610582,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.840668,0.0,0.000000,0.0,0.305055,0.0,0.0,0.000000,0.183385,0.000000,0.0,0.0,0.0,1.087014,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.995947,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0
2,0.000000,0.0,0.000000,0.0,0.026811,0.0,0.000000,0.035471,0.0,0.000000,0.902230,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.0,0.000000,1.230928,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.841265,0.0,0.000000,0.0,0.306599,0.0,0.0,0.000000,0.002045,0.554406,0.0,0.0,0.0,0.000000,1.274636,0.0,0.000000,0.000000,0.0,0.0,1.078217,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0
3,0.000000,0.0,0.035418,0.0,0.020686,0.0,0.000000,0.024689,0.0,1.017763,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.178559,0.000000,0.0,0.000000,0.000000,0.949255,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.307476,0.0,0.0,0.000000,0.185829,0.000000,0.0,0.0,0.0,1.094865,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,1.002279,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0
4,0.000000,0.0,-0.023739,0.0,0.000000,0.0,-0.014458,0.009800,0.0,0.000000,0.000000,1.677744,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.791945,0.0,0.0,0.834259,0.0,0.404197,0.0,0.000000,0.0,0.0,0.000000,0.186393,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.0,0.000000,1.242544,0.0,0.0,0.000000,0.000000,1.014256,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11452,0.000000,0.0,0.007643,0.0,0.027356,0.0,0.000000,0.034704,0.0,1.012632,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.0,0.000000,1.235711,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.845194,0.0,0.000000,0.0,0.308397,0.0,0.0,0.000000,0.000000,0.556072,0.0,0.0,0.0,0.000000,1.278817,0.0,0.000000,0.000000,0.0,0.0,1.082625,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0
11453,0.000000,0.0,0.013550,0.0,0.000000,0.0,-0.043572,-0.068936,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,1.154680,0.0,0.0,0.0,0.000000,0.851795,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.784442,0.0,0.0,0.000000,0.0,0.404661,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.560303,0.0,0.0,0.0,0.000000,0.000000,0.0,0.697961,0.000000,0.0,0.0,1.039536,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0
11454,0.000000,0.0,0.000000,0.0,0.027440,0.0,0.040012,0.042621,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,1.202119,0.0,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,1.291983,0.000000,0.0,0.0,0.000000,0.0,0.410218,0.0,0.000000,0.0,0.0,1.126811,0.000000,0.562235,0.0,0.0,0.0,0.000000,0.000000,0.0,0.720294,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,1.015323,0.0,0.0,0.0,0.000000,0.0,0.0
11455,0.000000,0.0,0.000000,0.0,-0.055980,0.0,0.000000,-0.074295,0.0,0.974827,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.178518,0.000000,0.0,0.000000,0.000000,0.904195,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.293819,0.0,0.0,0.000000,0.178901,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.0,0.688524,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.998764,0.000000,0.0,0.0,0.0,1.386031,0.0,0.0


In [17]:
eps1

Unnamed: 0,age,duration,campaign,previous,emp_var_rate,cons_price_idx,cons_conf_idx,euribor3m,nr_employed,job_admin.,job_blue-collar,job_entrepreneur,job_housemaid,job_management,job_retired,job_self-employed,job_services,job_student,job_technician,job_unemployed,job_unknown,marital_divorced,marital_married,marital_single,education_basic.4y,education_basic.6y,education_basic.9y,education_high.school,education_illiterate,education_professional.course,education_university.degree,education_unknown,default_no,default_unknown,default_yes,housing_no,housing_unknown,housing_yes,loan_no,loan_unknown,loan_yes,contact_cellular,contact_telephone,month_apr,month_aug,month_dec,month_jul,month_jun,month_mar,month_may,month_nov,month_oct,month_sep,day_of_week_fri,day_of_week_mon,day_of_week_thu,day_of_week_tue,day_of_week_wed,pdays_intermediate,pdays_never,pdays_recent,poutcome_failure,poutcome_nonexistent,poutcome_success
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.711610,0.0,0.0,0.000000,0.0,0.0,0.0,0.222478,0.000000,0.0,0.0000,0.000000,0.000000,0.0,0.620638,0.000000,0.0,0.000000,0.494379,0.0,0.386208,0.0,0.000000,0.140386,0.0,0.000000,0.226203,0.000000,0.0,0.0,0.0,0.00000,0.000000,0.0,0.536811,0.000000,0.0,0.0,0.619226,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.674078,0.000000,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.561918,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.225154,0.000000,0.0,0.7299,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.504887,0.0,0.000000,0.0,0.318975,0.143208,0.0,0.000000,0.231997,0.000000,0.0,0.0,0.0,0.59306,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.567404,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.067730,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.562461,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.225408,0.000000,0.0,0.0000,0.649327,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.505971,0.0,0.000000,0.0,0.319928,0.143469,0.0,0.000000,0.000000,0.477410,0.0,0.0,0.0,0.00000,0.650725,0.0,0.000000,0.000000,0.0,0.0,0.622400,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.067397,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.628877,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.224826,0.000000,0.0,0.0000,0.000000,0.578477,0.0,0.000000,0.000000,0.0,0.197005,0.000000,0.0,0.000000,0.0,0.318307,0.143189,0.0,0.000000,0.232151,0.000000,0.0,0.0,0.0,0.59141,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.566638,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.066836,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.703523,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.224969,0.000000,0.0,0.0000,0.000000,0.000000,0.0,0.000000,0.548772,0.0,0.000000,0.503736,0.0,0.390053,0.0,0.000000,0.143273,0.0,0.000000,0.230971,0.000000,0.0,0.0,0.0,0.00000,0.000000,0.0,0.000000,0.603046,0.0,0.0,0.000000,0.000000,0.603538,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.067453,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11452,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.630203,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.225100,0.000000,0.0,0.0000,0.649382,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.506007,0.0,0.000000,0.0,0.319763,0.142907,0.0,0.000000,0.000000,0.477404,0.0,0.0,0.0,0.00000,0.650740,0.0,0.000000,0.000000,0.0,0.0,0.622050,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.067388,0.0
11453,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.638137,0.0,0.0,0.0,0.000000,0.558822,0.0,0.0000,0.000000,0.000000,0.0,0.000000,0.545624,0.0,0.196049,0.000000,0.0,0.387606,0.0,0.000000,0.142850,0.0,0.000000,0.000000,0.470276,0.0,0.0,0.0,0.00000,0.000000,0.0,0.540414,0.000000,0.0,0.0,0.619096,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.065864,0.0
11454,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.642826,0.0,0.0,0.0,0.225064,0.000000,0.0,0.0000,0.000000,0.000000,0.0,0.623893,0.000000,0.0,0.197382,0.000000,0.0,0.390921,0.0,0.000000,0.000000,0.0,0.593922,0.000000,0.476399,0.0,0.0,0.0,0.00000,0.000000,0.0,0.544615,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.555547,0.0,0.0,0.0,0.000000,0.067355,0.0
11455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.626039,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.223317,0.000000,0.0,0.0000,0.000000,0.570168,0.0,0.000000,0.000000,0.0,0.194934,0.000000,0.0,0.000000,0.0,0.313620,0.142818,0.0,0.000000,0.227789,0.000000,0.0,0.0,0.0,0.00000,0.000000,0.0,0.538533,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.576819,0.000000,0.0,0.0,0.0,0.674002,0.000000,0.0


In [19]:
eps3

Unnamed: 0,age,duration,campaign,previous,emp_var_rate,cons_price_idx,cons_conf_idx,euribor3m,nr_employed,job_admin.,job_blue-collar,job_entrepreneur,job_housemaid,job_management,job_retired,job_self-employed,job_services,job_student,job_technician,job_unemployed,job_unknown,marital_divorced,marital_married,marital_single,education_basic.4y,education_basic.6y,education_basic.9y,education_high.school,education_illiterate,education_professional.course,education_university.degree,education_unknown,default_no,default_unknown,default_yes,housing_no,housing_unknown,housing_yes,loan_no,loan_unknown,loan_yes,contact_cellular,contact_telephone,month_apr,month_aug,month_dec,month_jul,month_jun,month_mar,month_may,month_nov,month_oct,month_sep,day_of_week_fri,day_of_week_mon,day_of_week_thu,day_of_week_tue,day_of_week_wed,pdays_intermediate,pdays_never,pdays_recent,poutcome_failure,poutcome_nonexistent,poutcome_success
0,0.0,0.0,0.000000,0.120824,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.0,1.156914,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,1.057601,0.000000,0.0,0.000000,0.641697,0.0,0.432853,0.0,0.0,0.124998,0.0,0.00000,0.265756,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.0,0.618545,0.000000,0.0,0.0,0.640696,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.827829,0.000000,0.0
1,0.0,0.0,0.000000,0.000000,0.024525,0.0,0.000000,0.000000,0.0,0.000000,0.910284,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000,0.918491,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.611249,0.0,0.000000,0.0,0.0,0.110149,0.0,0.00000,0.265945,0.000000,0.0,0.0,0.0,0.501213,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.932015,0.000000,0.000000,0.000000,0.0,0.023675,0.0,0.000000,0.041993,0.0
2,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.913048,0.000000,0.0,0.018998,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.823325,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.660006,0.0,0.000000,0.0,0.0,0.112796,0.0,0.00000,-0.099338,0.577396,0.0,0.0,0.0,0.000000,1.207901,0.0,0.000000,0.000000,0.0,0.0,0.634427,0.004569,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.0
3,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.674428,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.018806,0.000000,0.000000,0.461789,0.0,0.000000,0.000000,0.0,0.138276,0.000000,0.0,0.034425,0.0,0.0,0.109213,0.0,0.00000,0.219364,0.000000,0.0,0.0,0.0,0.482719,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.906340,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.050111,0.0
4,0.0,0.0,0.036107,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,1.062084,0.0,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.719153,0.0,0.000000,0.593418,0.0,0.440152,0.0,0.0,0.104474,0.0,0.00000,0.274251,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.462354,0.0,0.0,0.000000,0.000000,0.795986,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.029229,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11452,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.777259,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.004788,0.000000,0.000000,0.765948,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.630577,0.0,0.000000,0.0,0.0,0.126811,0.0,0.00000,-0.081385,0.581465,0.0,0.0,0.0,0.000000,1.213756,0.0,0.000000,0.000000,0.0,0.0,0.631232,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.019237,0.0
11453,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.097952,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.691772,0.0,0.0,0.0,0.0,0.584409,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.643255,0.0,0.137659,0.000000,0.0,0.357789,0.0,0.0,0.116094,0.0,0.00000,0.000000,0.588640,0.0,0.0,0.0,0.000000,0.000000,0.0,0.564761,0.000000,0.0,0.0,0.754691,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.0
11454,0.0,0.0,0.000000,0.000000,0.013200,0.0,0.000000,0.017059,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.554595,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,1.148152,0.000000,0.0,0.000000,0.000000,0.0,0.459329,0.0,0.0,0.000000,0.0,0.63728,0.000000,0.532257,0.0,0.0,0.0,0.000000,0.000000,0.0,0.625902,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.899076,0.0,0.000000,0.0,0.000000,0.021980,0.0
11455,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.554944,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.585567,0.0,0.000000,0.000000,0.0,0.167846,0.000000,0.0,0.064081,0.0,0.0,0.134341,0.0,0.00000,0.244019,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.0,0.552296,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.717377,0.000000,0.0,0.000000,0.0,0.888890,0.177971,0.0
