### The `wrapper` notebook, to report the standard deviation of the F1 score -
#### Calculated over 5 runs with different random seeds

In [1]:
import os
import time
from datetime import timedelta

import nbformat
import numpy as np
from nbconvert.preprocessors import ExecutePreprocessor

In [2]:
### TO HIDE THE LOGGING FROM SPARK ###

from IPython.display import HTML

HTML('''<script>
var code_show_err = false;
var code_toggle_err = function() {
    var stderrNodes = document.querySelectorAll('[data-mime-type="application/vnd.jupyter.stderr"]')
    var stderr = Array.from(stderrNodes)
    if (code_show_err){
        stderr.forEach(ele => ele.style.display = 'block');
    } else {
        stderr.forEach(ele => ele.style.display = 'none');
    }
    code_show_err = !code_show_err
}
document.addEventListener('DOMContentLoaded', code_toggle_err);
</script>
To toggle on/off output_stderr, click <a onclick="javascript:code_toggle_err()">here</a>.''')

In [3]:
filename = "2_model.ipynb"
with open(filename) as ff:
    nb_in = nbformat.read(ff, nbformat.NO_CONVERT)

In [4]:
f1_scores = []
start = time.time()
for seed in [10, 20, 30, 40, 50]:
    executor = ExecutePreprocessor(timeout=24*60*60, kernel_name="python3")
    os.environ["EXSTRAQT_SEED"] = str(seed)
    nb_out = executor.preprocess(nb_in)
    result = ""
    for row in nb_out[0]["cells"]:
        for output in row.get("outputs", [{"text": ""}]):
            if output.get("text", "").startswith(f"SEED={seed} "):
                result = output["text"]
                break
        if result:
            break
    result = result.split("\n")
    print(result[0])
    delta = round(time.time() - start)
    print(f"Elasped time: {timedelta(seconds=delta)}")
    start = time.time()
    print()
    f1_scores.append(float(result[1]))

Using Spark's default log4j profile: org/apache/spark/log4j2-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
25/10/27 16:04:24 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Setting Spark log level to "ERROR".
                                                                                

SEED=10 f1=67.32 recall=75.28
Elasped time: 3:01:21



Using Spark's default log4j profile: org/apache/spark/log4j2-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
25/10/27 19:05:50 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Setting Spark log level to "ERROR".
                                                                                

SEED=20 f1=67.33 recall=75.74
Elasped time: 2:59:19



Using Spark's default log4j profile: org/apache/spark/log4j2-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
25/10/27 22:05:15 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Setting Spark log level to "ERROR".
                                                                                

SEED=30 f1=66.86 recall=75.03
Elasped time: 3:00:11



Using Spark's default log4j profile: org/apache/spark/log4j2-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
25/10/28 01:05:27 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Setting Spark log level to "ERROR".
                                                                                

SEED=40 f1=66.98 recall=75.28
Elasped time: 2:57:15



Using Spark's default log4j profile: org/apache/spark/log4j2-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
25/10/28 04:02:42 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Setting Spark log level to "ERROR".
                                                                                

SEED=50 f1=66.67 recall=75.03
Elasped time: 2:57:51



In [8]:
f1_mean = round(np.mean(f1_scores), 2)
f1_std = round(np.std(f1_scores), 2)

print(f"{f1_mean} ± {f1_std}")

67.03 ± 0.26


In [9]:
multi_gnn_best = 66.58
multi_gnn_std = 1.60
print(f"FraudGT best: {multi_gnn_best} ± {multi_gnn_std}")

FraudGT best: 66.58 ± 1.6


In [10]:
uplift = round(((f1_mean - multi_gnn_best) / multi_gnn_best) * 100, 2)
print(f"Uplift of {uplift}%")

Uplift of 0.68%
