In [16]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import pearsonr, spearmanr
from matplotlib.ticker import ScalarFormatter, LogLocator, NullFormatter

# ===== Load data =====
df = pd.read_csv("loss_ratio.csv")
df = df.rename(columns={
    "Loss_Ratio": "Loss Ratio",
    "loss_ratio": "Loss Ratio",
    "TPR_0.1FPR": "TPR",
    "tpr": "TPR",
    "TPR@0.1%FPR": "TPR"
})
df = df.replace([np.inf, -np.inf], np.nan).dropna(subset=["Loss Ratio", "TPR"])
df = df.astype({"Loss Ratio": float, "TPR": float}).sort_values("Loss Ratio", kind="mergesort")

x = df["Loss Ratio"].to_numpy()
y = df["TPR"].to_numpy()
x = np.clip(x, 1e-12, None)
logx = np.log10(x)

# ===== Correlations & regression =====
pearson_r, pearson_p = pearsonr(logx, y)
spearman_r, spearman_p = spearmanr(x, y)
a, b = np.polyfit(logx, y, 1)
y_hat = a * logx + b
r2 = 1 - np.sum((y - y_hat)**2) / np.sum((y - np.mean(y))**2)

# ===== ICDE/IEEE style =====
plt.rcParams.update({
    "figure.dpi": 300, "savefig.dpi": 600,
    "font.family": "serif",
    "font.serif": ["Times New Roman", "Times", "DejaVu Serif"],
    "mathtext.fontset": "stix",
    "font.size": 7.5,
    "axes.labelsize": 7.5,
    "axes.titlesize": 7.5,
    "legend.fontsize": 6.5,
    "xtick.labelsize": 6.5,
    "ytick.labelsize": 6.5,
    "axes.linewidth": 0.6,
    "pdf.fonttype": 42,
    "ps.fonttype": 42
})

# ===== Smaller single-column figure =====
fig, ax = plt.subplots(figsize=(2.6, 1.7))  # ~78mm width (small but readable)

# ===== Scatter and regression =====
ax.scatter(x, y, s=12, facecolor="white", edgecolor="black", linewidth=0.55, marker="o")
xx_log = np.linspace(logx.min(), logx.max(), 256)
ax.plot(10**xx_log, a*xx_log + b, color="black", linewidth=0.9)

# ===== Axes formatting =====
ax.set_xscale("log")
ax.set_xlabel("Loss Ratio (Test / Train), log scale")
ax.set_ylabel("TPR @ 0.1% FPR [%]")
ax.grid(True, which="major", linestyle=":", linewidth=0.45)
ax.set_axisbelow(True)
ax.xaxis.set_major_locator(LogLocator(base=10, subs=(1.0, 2.0, 5.0)))
ax.xaxis.set_minor_locator(LogLocator(base=10, subs=np.arange(1, 10)*0.1))
ax.xaxis.set_minor_formatter(NullFormatter())
ax.yaxis.set_major_formatter(ScalarFormatter(useMathText=True))
ax.ticklabel_format(axis="y", style="plain")

# ===== Text boxes =====
# Top-left: correlation metrics
ax.text(
    0.02, 0.98,
    fr"Pearson $r$ (log-x): {pearson_r:.2f} (p={pearson_p:.1e})" "\n"
    fr"Spearman $\rho$: {spearman_r:.2f} (p={spearman_p:.1e})",
    transform=ax.transAxes, ha="left", va="top", fontsize=6.5,
    bbox=dict(facecolor="white", edgecolor="0.6", linewidth=0.45, boxstyle="round,pad=0.2")
)

# Bottom-right: regression fit
ax.text(
    0.98, 0.02,
    fr"$y={a:.2f}\log_{{10}}x+{b:.2f}$, $R^2={r2:.2f}$",
    transform=ax.transAxes, ha="right", va="bottom", fontsize=6.5,
    bbox=dict(facecolor="white", edgecolor="0.6", linewidth=0.45, boxstyle="round,pad=0.2")
)

# ===== Layout and save =====
fig.tight_layout(pad=0.25)
os.makedirs("figures", exist_ok=True)
fig.savefig("figures/lossratio_tpr_small.pdf", bbox_inches="tight")
fig.savefig("figures/lossratio_tpr_small.png", dpi=700, bbox_inches="tight")
plt.close(fig)
