In [1]:
import matplotlib as mpl
from matplotlib import pyplot as plt

import numpy as np

import sklearn

from sklearn.neighbors import KernelDensity
from sklearn.neural_network import MLPClassifier

import tensorflow as tf

In [2]:
import colorcet

conf_cmap = "cet_CET_L20"
diff_cmap = "cet_CET_L20"

In [3]:
def generate_toy_line(seed):
    rng = np.random.default_rng(seed)

    X1 = np.concatenate([np.linspace(0, 4, 5000), np.linspace(6, 10, 5000)])
    X2 = 1 + X1 * 0.5 + rng.normal(loc=0.0, scale=0.1, size=10000)
    X1 += rng.normal(loc=0.0, scale=0.1, size=10000)
    Y = X1*np.sqrt(1.25)
    
    return np.stack([X1, X2], axis=1), Y

def generate_toy_line_test():
    Xl1T, Xl2T = np.mgrid[-2:12:0.01, -1:8:0.01]
    Xl1T = Xl1T.flatten()
    Xl2T = Xl2T.flatten()

    return np.stack([Xl1T, Xl2T], axis=1)

def optimal_toy_line_reconstruction(Xl):
    return tf.constant([[0.0, 1.0]]) + tf.constant([[1.0, 0.5]]) * (
        tf.linalg.matmul((Xl - tf.constant([0.0, 1.0])), tf.constant([[1.0], [0.5]])) /
        tf.linalg.matmul(tf.constant([[1.0, 0.5]]), tf.constant([[1.0], [0.5]]))
    )

Xl, Yl = generate_toy_line(42)
XlT = generate_toy_line_test()

In [4]:
def generate_toy_circle(seed):
    rng = np.random.default_rng(seed)
    
    Y = np.sin(np.linspace(0, np.pi * 8.0, 10000))
    
    X1 = np.sin(np.linspace(0, np.pi * 2.0, 10000)) * (5 + Y)
    X2 = np.cos(np.linspace(0, np.pi * 2.0, 10000)) * (5 + Y)
    
    X1 += rng.normal(loc=0.0, scale=0.1, size=10000)
    X2 += rng.normal(loc=0.0, scale=0.1, size=10000)
    
    return np.stack([X1, X2], axis=1), Y

def generate_toy_circle_test():
    Xl1T, Xl2T = np.mgrid[-7.5:7.5:0.01, -7.5:7.5:0.01]
    Xl1T = Xl1T.flatten()
    Xl2T = Xl2T.flatten()

    return np.stack([Xl1T, Xl2T], axis=1)

def optimal_toy_circle_reconstruction(Xc):
    angle = tf.math.atan2(Xc[:,0], Xc[:,1])
    
    Y = tf.math.sin(angle * 4.0)
    
    X1 = tf.math.sin(angle) * (5.0 + Y)
    X2 = tf.math.cos(angle) * (5.0 + Y)
    
    return tf.stack([X1, X2], axis=1)

Xc, Yc = generate_toy_circle(42)
XcT = generate_toy_circle_test()

In [5]:
def generate_toy_haystack(seed):
    rng = np.random.default_rng(seed)
    
    n = 10
    a = 2
    
    # https://stats.stackexchange.com/a/124554
    A = np.matrix([rng.normal(size=n) + rng.normal(size=1)*a for i in range(n)])
    A = A*np.transpose(A)
    D_half = np.diag(np.diag(A)**(-0.5))
    C = D_half*A*D_half
    covs = C
            
    Cn = rng.choice([-2, -1, 0, 0, 1, 2], size=10)
    
    Xn = rng.multivariate_normal(np.zeros(shape=10), covs, size=10000)
    
    Xn[:,3] = -0.42
    
    Yn = np.dot(Xn, Cn)
    
    return Xn, Yn

def generate_toy_haystack_test(seed):
    rng = np.random.default_rng(seed)
    
    n = 10
    a = 2
    
    # https://stats.stackexchange.com/a/124554
    A = np.matrix([rng.normal(size=n) + rng.normal(size=1)*a for i in range(n)])
    A = A*np.transpose(A)
    D_half = np.diag(np.diag(A)**(-0.5))
    C = D_half*A*D_half
    covs = C
            
    Cn = rng.choice([-2, -1, 0, 0, 1, 2], size=10)
    
    rng = np.random.default_rng(rng.integers(0, 2**30))
    
    XnT = rng.multivariate_normal(np.zeros(shape=10), covs, size=10000)
    
    XnT[:,3] = (rng.random(size=10000) - 0.5) - 0.42
    
    return XnT

def optimal_toy_haystack_reconstruction(Xh):
    return (
        Xh * tf.constant([[1,1,1,0,1,1,1,1,1,1]], dtype=tf.float32) -
        tf.constant([[0,0,0,0.42,0,0,0,0,0,0]], dtype=tf.float32)
    )

Xh, Yh = generate_toy_haystack(42)
XhT = generate_toy_haystack_test(42)

In [6]:
def plot_line_toy_confidence(Xl, XlO, XlT, ClT, short, title):
    fig, ax = plt.subplots(1, 1, figsize=(6, 4))

    ax.imshow(
        ClT.reshape(np.mgrid[-2:12:0.01, -1:8:0.01][0].shape).T, cmap=conf_cmap, vmin=0.0, vmax=1.2,
        extent=[-2,12,-1,8], origin="lower", interpolation="bicubic", rasterized=True,
    )

    ax.set_xlim(-2, 12)
    ax.set_ylim(-1, 8)
    
    ax.scatter(XlO[::25,0], XlO[::25,1], c='white', s=2, rasterized=True)
    ax.scatter(Xl[::500,0], Xl[::500,1], c='white', marker='x', lw=3, s=48)
    ax.scatter(Xl[::500,0], Xl[::500,1], c='black', marker='x')
    
    ax.text(
        0.5, 0.95, title, ha="center", va="top",
        size=20, c="white", bbox=dict(facecolor='black', alpha=0.25, edgecolor='white'),
        transform=ax.transAxes,
    )

    ax.axis('off')

    plt.savefig(f"ood-line-{short}.pdf", dpi=100, transparent=True, bbox_inches='tight')
    # plt.show()
    plt.close(fig)

def plot_line_toy_difference(Xl, XlO, XlOW, XlT, ClT, short, title):
    fig, ax = plt.subplots(1, 1, figsize=(6, 4))
    
    vlim = np.amax(np.abs(ClT))

    ax.imshow(
        ClT.reshape(np.mgrid[-2:12:0.01, -1:8:0.01][0].shape).T, cmap=diff_cmap, vmin=-vlim,
        vmax=vlim, extent=[-2,12,-1,8], origin="lower", interpolation="bicubic", rasterized=True,
    )

    ax.set_xlim(-2, 12)
    ax.set_ylim(-1, 8)
    
    ax.scatter(XlO[::5,0], XlO[::5,1], c='white', alpha=1.0-XlOW[::5], s=2, rasterized=True)
    
    ax.text(
        0.5, 0.95, title, ha="center", va="top",
        size=20, c="black", bbox=dict(facecolor='white', alpha=0.5, edgecolor='black'),
        transform=ax.transAxes,
    )

    ax.axis('off')

    plt.savefig(f"ood-line-{short}.pdf", dpi=100, transparent=True, bbox_inches='tight')
    # plt.show()
    plt.close(fig)

In [7]:
def plot_circle_toy_confidence(Xl, XlO, XlT, ClT, short):
    fig, ax = plt.subplots(1, 1, figsize=(6, 4))

    ax.imshow(
        ClT.reshape(np.mgrid[-7.5:7.5:0.01, -7.5:7.5:0.01][0].shape).T, cmap=conf_cmap, vmin=0.0, vmax=1.2,
        extent=[-7.5,7.5,-7.5,7.5], origin="lower", interpolation="bicubic", rasterized=True,
    )

    ax.set_xlim(-7.5, 7.5)
    ax.set_ylim(-7.5, 7.5)
    
    ax.scatter(XlO[::25,0], XlO[::25,1], c='white', s=4, rasterized=True)
    ax.scatter(Xl[::250,0], Xl[::250,1], c='white', marker='x', lw=3, s=48)
    ax.scatter(Xl[::250,0], Xl[::250,1], c='black', marker='x')

    ax.axis('off')

    plt.savefig(f"ood-circle-{short}.pdf", dpi=100, transparent=True, bbox_inches='tight')
    # plt.show()
    plt.close(fig)

def plot_circle_toy_difference(Xl, XlO, XlOW, XlT, ClT, short):
    fig, ax = plt.subplots(1, 1, figsize=(6, 4))
    
    vlim = np.amax(np.abs(ClT))

    ax.imshow(
        ClT.reshape(np.mgrid[-7.5:7.5:0.01, -7.5:7.5:0.01][0].shape).T, cmap=diff_cmap, vmin=-vlim,
        vmax=vlim, extent=[-7.5,7.5,-7.5,7.5], origin="lower", interpolation="bicubic", rasterized=True,
    )

    ax.set_xlim(-7.5, 7.5)
    ax.set_ylim(-7.5, 7.5)
    
    ax.scatter(XlO[::5,0], XlO[::5,1], c='white', alpha=1.0-XlOW[::5], s=2, rasterized=True)

    ax.axis('off')

    plt.savefig(f"ood-circle-{short}.pdf", dpi=100, transparent=True, bbox_inches='tight')
    # plt.show()
    plt.close(fig)

In [8]:
def plot_haystack_toy_confidence(XlT, ClT, short):
    fig, ax = plt.subplots(1, 1, figsize=(6, 4))
    
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.spines['left'].set_visible(False)
    
    ax.set_xticks([-0.42-0.4, -0.42, -0.42+0.4])
    ax.set_xticklabels([r"$-0.4 \sigma$", "const", "$+0.4 \sigma$"])
    ax.get_yaxis().set_visible(False)
    
    ax.axvline(-0.42, c='white', lw=5, zorder=-1)
    
    ax.scatter(XlT[:, 3], ClT, c='white', s=6, rasterized=True)
    ax.scatter(XlT[:, 3], ClT, c='black', s=1, rasterized=True)
    
    ax.imshow(
        [[x/100, x/100] for x in range(100, -1, -1)], cmap=conf_cmap, interpolation='bicubic',
        vmin=0.0, vmax=1.2, extent=[-0.93, 0.09, -0.01, 1.01], zorder=-2,
    )
    
    class TruncatedColormap(mpl.colors.Colormap):
        def __init__(self, cmap):
            self.cmap = cmap
            self.N = cmap.N
        
        def __call__(self, X, alpha=None, bytes=False):
            return self.cmap.__call__(X/1.2, alpha=alpha, bytes=bytes)
    
    cb = fig.colorbar(mpl.cm.ScalarMappable(
        norm=None, cmap=TruncatedColormap(mpl.cm.get_cmap(conf_cmap)),
    ), ax=ax, ticks=[0.1, 0.9])
    cb.set_label("confidence level $c$", labelpad=-13)
    cb.outline.set_visible(False)
    
    plt.savefig(f"ood-haystack-{short}.pdf", dpi=100, transparent=True, bbox_inches='tight')
    # plt.show()
    plt.close(fig)

def plot_haystack_toy_difference(XlT, ClT, short):
    fig, ax = plt.subplots(1, 1, figsize=(6, 4))
    
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.spines['left'].set_visible(False)
    
    ax.set_xticks([-0.42-0.4, -0.42, -0.42+0.4])
    ax.set_xticklabels([r"$-0.4 \sigma$", "const", "$+0.4 \sigma$"])
    ax.get_yaxis().set_visible(False)
    
    vlim = np.amax(np.abs(ClT))
    
    ax.axvline(-0.42, c='white', lw=5, zorder=-1)
    
    ax.scatter(XlT[:, 3], (ClT / vlim) * 0.5 + 0.5, c='white', s=6, rasterized=True)
    ax.scatter(XlT[:, 3], (ClT / vlim) * 0.5 + 0.5, c='black', s=1, rasterized=True)
    
    ax.imshow(
        [[x/100, x/100] for x in range(100, -1, -1)], cmap=diff_cmap, interpolation='bicubic',
        extent=[-0.93, 0.09, -0.01, 1.01], zorder=-2,
    )
    
    cb = fig.colorbar(mpl.cm.ScalarMappable(
        norm=None, cmap=diff_cmap,
    ), ax=ax, ticks=[0.1, 0.9])
    cb.ax.set_yticklabels(["$<$ ", "$>$ "])
    cb.ax.tick_params(pad=3)
    cb.set_label("change in confidence level $c$", labelpad=-13)
    cb.outline.set_visible(False)
    
    plt.savefig(f"ood-haystack-{short}.pdf", dpi=100, transparent=True, bbox_inches='tight')
    # plt.show()
    plt.close(fig)

In [9]:
Xl_ood_u = np.random.default_rng(24).uniform(np.amin(Xl, axis=0) - 1, np.amax(Xl, axis=0) + 1, size=(10000, 2))

NN_l_u = MLPClassifier(activation="relu", random_state=42).fit(
    np.concatenate([Xl, Xl_ood_u]), np.concatenate([np.ones(shape=len(Xl)), np.zeros(shape=len(Xl_ood_u))])
)

plot_line_toy_confidence(
    Xl, Xl_ood_u, XlT, NN_l_u.predict_proba(XlT)[:,1], "uniform", "OOD $\sim$ Uniform"
)

In [10]:
kde_l = KernelDensity(bandwidth=0.05, kernel="gaussian").fit(Xl)
XlS = kde_l.sample(10000, random_state=42)

xl_id = tf.constant(XlS, dtype=tf.float32)

with tf.GradientTape() as tape:
    tape.watch(xl_id)

    xl_pred_id = optimal_toy_line_reconstruction(xl_id)
    xl_mse_id = (xl_pred_id - xl_id) ** 2

adv_grad_l = tape.gradient(xl_mse_id, xl_id)

Xl_ood_g = xl_id + tf.math.sign(adv_grad_l) * 1.0

NN_l_g = MLPClassifier(activation="relu", random_state=42).fit(
    np.concatenate([Xl, Xl_ood_g]), np.concatenate([np.ones(shape=len(Xl)), np.zeros(shape=len(Xl_ood_g))])
)

plot_line_toy_confidence(
    Xl, Xl_ood_g, XlT, NN_l_g.predict_proba(XlT)[:,1], "fgsm", r"OOD $\sim$ FGSM$(\epsilon = 1)$"
)

2023-03-24 22:21:57.524819: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-24 22:21:57.527168: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [11]:
t = 0.15

for _ in range(10):
    Xl_ood_gt = xl_id + tf.math.sign(adv_grad_l) * t

    NN_l_gt = MLPClassifier(activation="relu", random_state=42).fit(
        np.concatenate([Xl, Xl_ood_gt]), np.concatenate([np.ones(shape=len(Xl)), np.zeros(shape=len(Xl_ood_gt))])
    )
    
    mean_id_conf = np.mean(NN_l_gt.predict_proba(Xl)[:,1])
    
    print(t, mean_id_conf)
    
    if mean_id_conf < 0.95:
        t *= 1.05
    elif mean_id_conf >= 0.955:
        t *= 0.99
    else:
        break
        
print(f"=> {t:.03}")

plot_line_toy_confidence(
    Xl, Xl_ood_gt, XlT, NN_l_gt.predict_proba(XlT)[:,1], "poking-fgsm", f"OOD $\sim$ FGSM$(\epsilon = t_{{poke}})$",
)

0.15 0.9324014771268805
0.1575 0.9409286372330858
0.16537500000000002 0.9484458707448783
0.17364375000000004 0.9551706335674071
0.17190731250000005 0.9533282886932147
=> 0.172


In [12]:
Xl_ood_gu = xl_id + tf.math.sign(adv_grad_l) * tf.math.abs(
    tf.random.uniform(minval=0.0, maxval=1.0, shape=(len(xl_id), 1), seed=242)
)

NN_l_gu = MLPClassifier(activation="relu", random_state=42).fit(
    np.concatenate([Xl, Xl_ood_gu]), np.concatenate([np.ones(shape=len(Xl)), np.zeros(shape=len(Xl_ood_gu))])
)

plot_line_toy_confidence(
    Xl, Xl_ood_gu, XlT, NN_l_gu.predict_proba(XlT)[:,1], "uniform-fgsm", "OOD $\sim$ FGSM$(\epsilon \sim$ U$(0, 1))$"
)

In [13]:
ElT = np.array(optimal_toy_line_reconstruction(Xl) - Xl)
ElOgu = np.array(optimal_toy_line_reconstruction(Xl_ood_gu) - Xl_ood_gu)

kde_lT = KernelDensity(bandwidth=0.1, kernel="linear").fit(ElT)
kde_lOgu = KernelDensity(bandwidth=0.1, kernel="linear").fit(ElOgu)
kde_lOguD = np.exp(kde_lOgu.score_samples(ElOgu))

kde_lTOgu = np.exp(kde_lT.score_samples(ElOgu))

ilgu = np.argmax(kde_lTOgu)
flgu = kde_lOguD[ilgu] / kde_lTOgu[ilgu]

sample_weight_lgu = np.concatenate([
    np.ones(shape=len(Xl)),
    1.0 - ((kde_lTOgu * flgu / np.amax(kde_lOguD)) / np.maximum(
        kde_lTOgu * flgu / np.amax(kde_lOguD), kde_lOguD / np.amax(kde_lOguD)
    )),
])

include_sample_lgu = np.random.default_rng(424).random(size=len(sample_weight_lgu)) < sample_weight_lgu

NN_l_guw = MLPClassifier(activation="relu", random_state=42).fit(
    np.concatenate([Xl, Xl_ood_gu])[include_sample_lgu],
    np.concatenate([np.ones(shape=len(Xl)), np.zeros(shape=len(Xl_ood_gu))])[include_sample_lgu],
)

plot_line_toy_confidence(
    Xl, Xl_ood_gu[include_sample_lgu[len(Xl):]], XlT, NN_l_guw.predict_proba(XlT)[:,1],
    "uniform-fgsm-weighted", "OOD-Weighting for\nOOD $\sim$ FGSM$(\epsilon \sim$ U$(0, 1))$",
)

plot_line_toy_difference(
    Xl, Xl_ood_gu, sample_weight_lgu[len(Xl):], XlT,
    NN_l_guw.predict_proba(XlT)[:,1] - NN_l_gu.predict_proba(XlT)[:,1],
    "uniform-fgsm-weighted-difference", "Impact of OOD-Weighting for\nOOD $\sim$ FGSM$(\epsilon \sim$ U$(0, 1))$",
)

In [14]:
ElT = np.array(optimal_toy_line_reconstruction(Xl) - Xl)
ElO = np.array(optimal_toy_line_reconstruction(Xl_ood_u) - Xl_ood_u)

kde_lT = KernelDensity(bandwidth=0.1, kernel="linear").fit(ElT)
kde_lO = KernelDensity(bandwidth=0.1, kernel="linear").fit(ElO)
kde_lOD = np.exp(kde_lO.score_samples(ElO))

kde_lTO = np.exp(kde_lT.score_samples(ElO))

il = np.argmax(kde_lTO)
fl = kde_lOD[il] / kde_lTO[il]

sample_weight_l = np.concatenate([
    np.ones(shape=len(Xl)),
    1.0 - ((kde_lTO * fl / np.amax(kde_lOD)) / np.maximum(
        kde_lTO * fl / np.amax(kde_lOD), kde_lOD / np.amax(kde_lOD)
    )),
])

include_sample_l = np.random.default_rng(424).random(size=len(sample_weight_l)) < sample_weight_l

NN_l_w = MLPClassifier(activation="relu", random_state=42).fit(
    np.concatenate([Xl, Xl_ood_u])[include_sample_l],
    np.concatenate([np.ones(shape=len(Xl)), np.zeros(shape=len(Xl_ood_u))])[include_sample_l],
)

plot_line_toy_confidence(
    Xl, Xl_ood_u[include_sample_l[len(Xl):]], XlT, NN_l_w.predict_proba(XlT)[:,1],
    "uniform-weighted", "OOD-Weighting for\nOOD $\sim$ Uniform",
)

plot_line_toy_difference(
    Xl, Xl_ood_u, sample_weight_l[len(Xl):], XlT,
    NN_l_w.predict_proba(XlT)[:,1] - NN_l_u.predict_proba(XlT)[:,1],
    "uniform-weighted-difference", "Impact of OOD-Weighting for\nOOD $\sim$ Uniform",
)

In [15]:
Xc_ood_u = np.random.default_rng(24).uniform(np.amin(Xc, axis=0) - 1, np.amax(Xc, axis=0) + 1, size=(10000, 2))

NN_c_u = MLPClassifier(activation="relu", max_iter=500, random_state=42).fit(
    np.concatenate([Xc, Xc_ood_u]), np.concatenate([np.ones(shape=len(Xc)), np.zeros(shape=len(Xc_ood_u))])
)

plot_circle_toy_confidence(
    Xc, Xc_ood_u, XcT, NN_c_u.predict_proba(XcT)[:,1], "uniform",
)

In [16]:
kde_c = KernelDensity(bandwidth=0.05, kernel="gaussian").fit(Xc)
XcS = kde_c.sample(10000, random_state=42)

xc_id = tf.constant(XcS, dtype=tf.float32)

with tf.GradientTape() as tape:
    tape.watch(xc_id)

    xc_pred_id = optimal_toy_circle_reconstruction(xc_id)
    xc_mse_id = (xc_pred_id - xc_id) ** 2

adv_grad_c = tape.gradient(xc_mse_id, xc_id)

Xc_ood_g = xc_id + tf.math.sign(adv_grad_c) * 1.0

NN_c_g = MLPClassifier(activation="relu", max_iter=500, random_state=42).fit(
    np.concatenate([Xc, Xc_ood_g]), np.concatenate([np.ones(shape=len(Xc)), np.zeros(shape=len(Xc_ood_g))])
)

plot_circle_toy_confidence(
    Xc, Xc_ood_g, XcT, NN_c_g.predict_proba(XcT)[:,1], "fgsm",
)

In [17]:
t = 0.46

for _ in range(10):
    Xc_ood_gt = xc_id + tf.math.sign(adv_grad_c) * t

    NN_c_gt = MLPClassifier(activation="relu", max_iter=1500, random_state=42).fit(
        np.concatenate([Xc, Xc_ood_gt]), np.concatenate([np.ones(shape=len(Xc)), np.zeros(shape=len(Xc_ood_gt))])
    )
    
    mean_id_conf = np.mean(NN_c_gt.predict_proba(Xc)[:,1])
    
    print(t, mean_id_conf)
    
    if mean_id_conf < 0.95:
        t *= 1.05
    elif mean_id_conf >= 0.955:
        t *= 0.99
    else:
        break
        
print(f"=> {t:.03}")

plot_circle_toy_confidence(
    Xc, Xc_ood_gt, XcT, NN_c_gt.predict_proba(XcT)[:,1], "poking-fgsm",
)

0.46 0.9590524053261795
0.4554 0.9612162780229968
0.450846 0.9518878958697329
=> 0.451


In [18]:
Xc_ood_gu = xc_id + tf.math.sign(adv_grad_c) * tf.math.abs(
    tf.random.uniform(minval=0.0, maxval=1.0, shape=(len(xc_id), 1), seed=242)
)

NN_c_gu = MLPClassifier(activation="relu", max_iter=1000, random_state=42).fit(
    np.concatenate([Xc, Xc_ood_gu]), np.concatenate([np.ones(shape=len(Xc)), np.zeros(shape=len(Xc_ood_gu))])
)

plot_circle_toy_confidence(
    Xc, Xc_ood_gu, XcT, NN_c_gu.predict_proba(XcT)[:,1], "uniform-fgsm",
)

In [19]:
EcT = np.array(optimal_toy_circle_reconstruction(Xc) - Xc)
EcOgu = np.array(optimal_toy_circle_reconstruction(Xc_ood_gu) - Xc_ood_gu)

kde_cT = KernelDensity(bandwidth=0.1, kernel="linear").fit(EcT)
kde_cOgu = KernelDensity(bandwidth=0.1, kernel="linear").fit(EcOgu)
kde_cOguD = np.exp(kde_cOgu.score_samples(EcOgu))

kde_cTOgu = np.exp(kde_cT.score_samples(EcOgu))

icgu = np.argmax(kde_cTOgu)
fcgu = kde_cOguD[icgu] / kde_cTOgu[icgu]

sample_weight_cgu = np.concatenate([
    np.ones(shape=len(Xc)),
    1.0 - ((kde_cTOgu * fcgu / np.amax(kde_cOguD)) / np.maximum(
        kde_cTOgu * fcgu / np.amax(kde_cOguD), kde_cOguD / np.amax(kde_cOguD)
    )),
])

include_sample_cgu = np.random.default_rng(424).random(size=len(sample_weight_cgu)) < sample_weight_cgu

NN_c_guw = MLPClassifier(activation="relu", max_iter=1000, random_state=42).fit(
    np.concatenate([Xc, Xc_ood_gu])[include_sample_cgu],
    np.concatenate([np.ones(shape=len(Xc)), np.zeros(shape=len(Xc_ood_gu))])[include_sample_cgu],
)

plot_circle_toy_confidence(
    Xc, Xc_ood_gu[include_sample_cgu[len(Xc):]], XcT, NN_c_guw.predict_proba(XcT)[:,1],
    "uniform-fgsm-weighted",
)

plot_circle_toy_difference(
    Xc, Xc_ood_gu, sample_weight_cgu[len(Xc):], XcT,
    NN_c_guw.predict_proba(XcT)[:,1] - NN_c_gu.predict_proba(XcT)[:,1],
    "uniform-fgsm-weighted-difference",
)

In [20]:
EcT = np.array(optimal_toy_circle_reconstruction(Xc) - Xc)
EcO = np.array(optimal_toy_circle_reconstruction(Xc_ood_u) - Xc_ood_u)

kde_cT = KernelDensity(bandwidth=0.1, kernel="linear").fit(EcT)
kde_cO = KernelDensity(bandwidth=0.1, kernel="linear").fit(EcO)
kde_cOD = np.exp(kde_cO.score_samples(EcO))

kde_cTO = np.exp(kde_cT.score_samples(EcO))

ic = np.argmax(kde_cTO)
fc = kde_cOD[ic] / kde_cTO[ic]

sample_weight_c = np.concatenate([
    np.ones(shape=len(Xc)),
    1.0 - ((kde_cTO * fc / np.amax(kde_cOD)) / np.maximum(
        kde_cTO * fc / np.amax(kde_cOD), kde_cOD / np.amax(kde_cOD)
    )),
])

include_sample_c = np.random.default_rng(424).random(size=len(sample_weight_c)) < sample_weight_c

NN_c_w = MLPClassifier(activation="relu", max_iter=500, random_state=42).fit(
    np.concatenate([Xc, Xc_ood_u])[include_sample_c],
    np.concatenate([np.ones(shape=len(Xc)), np.zeros(shape=len(Xc_ood_u))])[include_sample_c],
)

plot_circle_toy_confidence(
    Xc, Xc_ood_u[include_sample_c[len(Xc):]], XcT, NN_c_w.predict_proba(XcT)[:,1],
    "uniform-weighted",
)

plot_circle_toy_difference(
    Xc, Xc_ood_u, sample_weight_c[len(Xc):], XcT,
    NN_c_w.predict_proba(XcT)[:,1] - NN_c_u.predict_proba(XcT)[:,1],
    "uniform-weighted-difference",
)

In [21]:
Xh_ood_u = np.random.default_rng(24).uniform(np.amin(Xh, axis=0) - 1, np.amax(Xh, axis=0) + 1, size=(10000, 10))

NN_h_u = MLPClassifier(activation="relu", random_state=42).fit(
    np.concatenate([Xh, Xh_ood_u]), np.concatenate([np.ones(shape=len(Xh)), np.zeros(shape=len(Xh_ood_u))])
)

plot_haystack_toy_confidence(
    XhT, NN_h_u.predict_proba(XhT)[:,1], "uniform",
)

In [22]:
kde_h = KernelDensity(bandwidth=0.05, kernel="gaussian").fit(Xh)
XhS = kde_h.sample(10000, random_state=42)

xh_id = tf.constant(XhS, dtype=tf.float32)

with tf.GradientTape() as tape:
    tape.watch(xh_id)

    xh_pred_id = optimal_toy_haystack_reconstruction(xh_id)
    xh_mse_id = (xh_pred_id - xh_id) ** 2

adv_grad_h = tape.gradient(xh_mse_id, xh_id)

Xh_ood_g = xh_id + tf.math.sign(adv_grad_h) * 1.0

NN_h_g = MLPClassifier(activation="relu", random_state=42).fit(
    np.concatenate([Xh, Xh_ood_g]), np.concatenate([np.ones(shape=len(Xh)), np.zeros(shape=len(Xh_ood_g))])
)

plot_haystack_toy_confidence(
    XhT, NN_h_g.predict_proba(XhT)[:,1], "fgsm",
)

In [23]:
t = 0.018

for _ in range(10):
    Xh_ood_gt = xh_id + tf.math.sign(adv_grad_h) * t

    NN_h_gt = MLPClassifier(activation="relu", max_iter=500, random_state=42).fit(
        np.concatenate([Xh, Xh_ood_gt]), np.concatenate([np.ones(shape=len(Xh)), np.zeros(shape=len(Xh_ood_gt))])
    )
    
    mean_id_conf = np.mean(NN_h_gt.predict_proba(Xh)[:,1])
    
    print(t, mean_id_conf)
    
    if mean_id_conf < 0.95:
        t *= 1.05
    elif mean_id_conf >= 0.955:
        t *= 0.99
    else:
        break
        
print(f"=> {t:.03}")

plot_haystack_toy_confidence(
    XhT, NN_h_gt.predict_proba(XhT)[:,1], "poking-fgsm",
)

0.018 0.998051672185284
0.01782 0.517419055964483
0.018711 0.9966334477095538
0.018523889999999998 0.9980953197803528
0.0183386511 0.517321444909457
0.019255583655 0.9974442166289939
0.01906302781845 0.9971731933284816
0.018872397540265498 0.9976521947544364
0.018683673564862842 0.9977951474129669
0.018496836829214214 0.9972955754889088
=> 0.0183


In [24]:
Xh_ood_gu = xh_id + tf.math.sign(adv_grad_h) * tf.math.abs(
    tf.random.uniform(minval=0.0, maxval=1.0, shape=(len(xh_id), 1), seed=242)
)

NN_h_gu = MLPClassifier(activation="relu", random_state=42).fit(
    np.concatenate([Xh, Xh_ood_gu]), np.concatenate([np.ones(shape=len(Xh)), np.zeros(shape=len(Xh_ood_gu))])
)

plot_haystack_toy_confidence(
    XhT, NN_h_gu.predict_proba(XhT)[:,1], "uniform-fgsm",
)

In [25]:
EhT = np.array(optimal_toy_haystack_reconstruction(Xh) - Xh)
EhOgu = np.array(optimal_toy_haystack_reconstruction(Xh_ood_gu) - Xh_ood_gu)

kde_hT = KernelDensity(bandwidth=0.1, kernel="linear").fit(EhT)
kde_hOgu = KernelDensity(bandwidth=0.1, kernel="linear").fit(EhOgu)
kde_hOguD = np.exp(kde_hOgu.score_samples(EhOgu))

kde_hTOgu = np.exp(kde_hT.score_samples(EhOgu))

ihgu = np.argmax(kde_hTOgu)
fhgu = kde_hOguD[ihgu] / kde_hTOgu[ihgu]

sample_weight_hgu = np.concatenate([
    np.ones(shape=len(Xh)),
    1.0 - ((kde_hTOgu * fhgu / np.amax(kde_hOguD)) / np.maximum(
        kde_hTOgu * fhgu / np.amax(kde_hOguD), kde_hOguD / np.amax(kde_hOguD)
    )),
])

include_sample_hgu = np.random.default_rng(424).random(size=len(sample_weight_hgu)) < sample_weight_hgu

NN_h_guw = MLPClassifier(activation="relu", max_iter=500, random_state=42).fit(
    np.concatenate([Xh, Xh_ood_gu])[include_sample_hgu],
    np.concatenate([np.ones(shape=len(Xh)), np.zeros(shape=len(Xh_ood_gu))])[include_sample_hgu],
)

plot_haystack_toy_confidence(
    XhT, NN_h_guw.predict_proba(XhT)[:,1],
    "uniform-fgsm-weighted",
)

plot_haystack_toy_difference(
    XhT, NN_h_guw.predict_proba(XhT)[:,1] - NN_h_gu.predict_proba(XhT)[:,1],
    "uniform-fgsm-weighted-difference",
)

In [26]:
EhT = np.array(optimal_toy_haystack_reconstruction(Xh) - Xh)
EhO = np.array(optimal_toy_haystack_reconstruction(Xh_ood_u) - Xh_ood_u)

kde_hT = KernelDensity(bandwidth=0.1, kernel="linear").fit(EhT)
kde_hO = KernelDensity(bandwidth=0.1, kernel="linear").fit(EhO)
kde_hOD = np.exp(kde_hO.score_samples(EhO))

kde_hTO = np.exp(kde_hT.score_samples(EhO))

ih = np.argmax(kde_hTO)
fh = kde_hOD[ih] / kde_hTO[ih]

sample_weight_h = np.concatenate([
    np.ones(shape=len(Xh)),
    1.0 - ((kde_hTO * fh / np.amax(kde_hOD)) / np.maximum(
        kde_hTO * fh / np.amax(kde_hOD), kde_hOD / np.amax(kde_hOD)
    )),
])

include_sample_h = np.random.default_rng(424).random(size=len(sample_weight_h)) < sample_weight_h

NN_h_w = MLPClassifier(activation="relu", random_state=42).fit(
    np.concatenate([Xh, Xh_ood_u])[include_sample_h],
    np.concatenate([np.ones(shape=len(Xh)), np.zeros(shape=len(Xh_ood_u))])[include_sample_h],
)

plot_haystack_toy_confidence(
    XhT, NN_h_w.predict_proba(XhT)[:,1],
    "uniform-weighted",
)

plot_haystack_toy_difference(
    XhT, NN_h_w.predict_proba(XhT)[:,1] - NN_h_u.predict_proba(XhT)[:,1],
    "uniform-weighted-difference",
)