In [1]:
from nuq import NuqClassifier

import numpy as np
from tqdm.auto import tqdm

%load_ext autoreload
%autoreload 2

In [2]:
n_points = 100
circle_r = 10
dim = 2
h = 4

method = 'all_data' # all_data
kernel = 'RBF'
use_uniform_prior = False

In [None]:
def generate_points_on_circle(circle_r=10, circle_x=0, circle_y=0, n_points=10, uniform=True):
    if not uniform:
        alpha = 2 * np.pi * np.random.rand(n_points)
    else:
        alpha = 2 * np.pi * np.linspace(start=0, stop=1, num=n_points)

    x = circle_r * np.cos(alpha) + circle_x
    y = circle_r * np.sin(alpha) + circle_y

    return np.vstack((x, y)).T

In [None]:
X = generate_points_on_circle(n_points=n_points, circle_r=circle_r)

In [5]:
import matplotlib.pyplot as plt
%matplotlib widget

In [6]:
plt.close()
plt.figure()

plt.scatter(X[:, 0], X[:, 1])
plt.scatter(0, 0, color='red')
plt.axis('equal')
plt.show();

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [7]:
nuq = NuqClassifier(
    kernel_type=kernel,
    use_uniform_prior=use_uniform_prior,
    method=method,
    tune_bandwidth=False,
    bandwidth=np.array([h, h])
)

In [8]:
y = np.zeros(X.shape[0], dtype='int')

In [9]:
nuq.fit(X, y)

NuqClassifier(bandwidth=array([4, 4]), method='all_data', n_neighbors=100,
              tune_bandwidth=False, use_uniform_prior=False)

In [10]:
def test_circle(nuq_trained, dim, r, i=0):
    origin = np.zeros((1, dim))
    rand_vect = np.random.randn(1, dim)
    N = nuq_trained.n_neighbors
    
    kde_pred = nuq_trained.get_kde(origin)
    kernel_output = nuq_trained.kernel(rand_vect, rand_vect)
    uncertainties_pred = nuq_trained.predict_uncertainty(np.zeros((1, 2)), )
    
    aleatoric_pred = uncertainties_pred['aleatoric']
    epistemic_pred = uncertainties_pred['epistemic']
    
    if len(np.unique(nuq_trained.bandwidth)) > 1:
        raise ValueError('Bandwidth must be isotropic')
    h = nuq_trained.bandwidth[0]
    KDE_error = np.abs(-dim * (np.log(2 * np.pi) * 0.5 + np.log(h)) - (r**2 / (2 * h**2)) - kde_pred)
    Kernel_error = np.abs(-dim / 2. * np.log(2 * np.pi) - kernel_output)
    Aleatoric_error = np.abs(np.log(nuq_trained.coeff + min(1 - i / N, i / N)) - aleatoric_pred)
    Epistemic_error = np.abs((6 - dim) * 0.25 * np.log(2.) - 0.5 * np.log(np.pi) - 0.5 * np.log(nuq_trained.n_neighbors) + 
                             0.5 * np.log(nuq_trained.coeff + i * (N - i) / N ** 2) + (r**2 / (4 * h**2)) - epistemic_pred)
    
    
    print(f'{KDE_error=}')
    print(f'{Kernel_error=}')
    print(f'{Aleatoric_error=}')
    print(f'{Epistemic_error=}')
    
    return {
        "kde_pred": kde_pred,
        "kernel_output": kernel_output,
        **uncertainties_pred
    }

In [11]:
# print("KDE error:", -dim * (np.log(2 * np.pi) * 0.5 + np.log(h)) - (circle_r**2 / (2 * h**2)) - nuq.get_kde(np.zeros((1, 2))))

In [12]:
# print("Ue error:", (6 - dim) * 0.25 * np.log(2.) - 0.5 * np.log(np.pi) - 0.5 * np.log(nuq.n_neighbors) + 0.5 * np.log(nuq.coeff) + (circle_r**2 / (4 * h**2))- uncertainties['epistemic'])

In [13]:
# rand_vect = np.random.randn(1, 2)
# print("Kernel:", -dim / 2. * np.log(2 * np.pi) - nuq.kernel(rand_vect, rand_vect))

In [14]:
_ = test_circle(nuq_trained=nuq, dim=dim, r=circle_r)

KDE_error=array([[8.8817842e-16]])
Kernel_error=array([0.])
Aleatoric_error=0.0
Epistemic_error=8.881784197001252e-16


In [15]:
indices = np.random.permutation(np.arange(n_points))

In [16]:
y = np.zeros(X.shape[0], dtype='int')

dict_of_results = {
    'epistemic': [],
    'aleatoric': [],
    'total': [],
    'KDE': [],
    'kernel': []
}

for i in tqdm(range(n_points + 1)):
    
    if i > 0:
        ind = indices[i - 1]
        y[ind] = 1
    
    nuq = NuqClassifier(
        kernel_type=kernel,
        use_uniform_prior=use_uniform_prior,
        method=method,
        tune_bandwidth=False,
        bandwidth=np.array([h, h])
    )
    
    nuq.fit(X, y)
    res = test_circle(nuq_trained=nuq, dim=dim, r=circle_r, i=i)
    
    dict_of_results['epistemic'].append(res['epistemic'].squeeze())
    dict_of_results['aleatoric'].append(res['aleatoric'].squeeze())
    dict_of_results['total'].append(res['total'].squeeze())
    dict_of_results['KDE'].append(res['kde_pred'].squeeze())
    dict_of_results['kernel'].append(res['kernel_output'].squeeze())

  0%|          | 0/101 [00:00<?, ?it/s]

KDE_error=array([[8.8817842e-16]])
Kernel_error=array([0.])
Aleatoric_error=0.0
Epistemic_error=8.881784197001252e-16
KDE_error=array([[8.8817842e-16]])
Kernel_error=array([0.])
Aleatoric_error=0.0
Epistemic_error=4.440892098500626e-16
KDE_error=array([[8.8817842e-16]])
Kernel_error=array([0.])
Aleatoric_error=4.440892098500626e-16
Epistemic_error=4.440892098500626e-16
KDE_error=array([[8.8817842e-16]])
Kernel_error=array([0.])
Aleatoric_error=8.881784197001252e-16
Epistemic_error=8.881784197001252e-16
KDE_error=array([[8.8817842e-16]])
Kernel_error=array([0.])
Aleatoric_error=4.440892098500626e-16
Epistemic_error=4.440892098500626e-16
KDE_error=array([[8.8817842e-16]])
Kernel_error=array([0.])
Aleatoric_error=4.440892098500626e-16
Epistemic_error=4.440892098500626e-16
KDE_error=array([[8.8817842e-16]])
Kernel_error=array([0.])
Aleatoric_error=4.440892098500626e-16
Epistemic_error=4.440892098500626e-16
KDE_error=array([[8.8817842e-16]])
Kernel_error=array([0.])
Aleatoric_error=0.0
Epis

In [18]:
plt.close()
_, ax = plt.subplots(nrows=1, ncols=len(dict_of_results.keys()), sharex=True, figsize=(12, 4))

for i, key in enumerate(dict_of_results.keys()):
    ax[i].set_title(key)
    ax[i].plot(np.arange(n_points + 1), dict_of_results[key])

plt.show();

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [19]:
y = np.zeros(X.shape[0], dtype='int')

dict_of_results = {
    'epistemic': [],
    'aleatoric': [],
    'total': [],
    'KDE': [],
    'kernel': []
}

radii = [1, 5, 10, 20, 50, 100]

for r in tqdm(radii):
    
    X = generate_points_on_circle(n_points=n_points, circle_r=r)
    
    nuq = NuqClassifier(
        kernel_type=kernel,
        use_uniform_prior=use_uniform_prior,
        method=method,
        tune_bandwidth=False,
        bandwidth=np.array([h, h])
    )
    
    nuq.fit(X, y)
    res = test_circle(nuq_trained=nuq, dim=dim, r=r, i=0)
    
    dict_of_results['epistemic'].append(res['epistemic'].squeeze())
    dict_of_results['aleatoric'].append(res['aleatoric'].squeeze())
    dict_of_results['total'].append(res['total'].squeeze())
    dict_of_results['KDE'].append(res['kde_pred'].squeeze())
    dict_of_results['kernel'].append(res['kernel_output'].squeeze())

  0%|          | 0/6 [00:00<?, ?it/s]

KDE_error=array([[8.8817842e-16]])
Kernel_error=array([0.])
Aleatoric_error=0.0
Epistemic_error=8.881784197001252e-16
KDE_error=array([[8.8817842e-16]])
Kernel_error=array([0.])
Aleatoric_error=0.0
Epistemic_error=8.881784197001252e-16
KDE_error=array([[8.8817842e-16]])
Kernel_error=array([0.])
Aleatoric_error=0.0
Epistemic_error=8.881784197001252e-16
KDE_error=array([[3.55271368e-15]])
Kernel_error=array([0.])
Aleatoric_error=0.0
Epistemic_error=1.887379141862766e-15
KDE_error=array([[1.42108547e-14]])
Kernel_error=array([0.])
Aleatoric_error=0.0
Epistemic_error=7.105427357601002e-15
KDE_error=array([[0.]])
Kernel_error=array([0.])
Aleatoric_error=0.0
Epistemic_error=2.842170943040401e-14


In [20]:
plt.close()
_, ax = plt.subplots(nrows=1, ncols=len(dict_of_results.keys()), sharex=True, figsize=(12, 4))

for i, key in enumerate(dict_of_results.keys()):
    ax[i].set_title(key)
    ax[i].plot(radii, dict_of_results[key])

plt.show();

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Test bandwidth selection

In [21]:
import torch

In [22]:
covariance = torch.tensor([[1., 0.99], [0.99, 1.]])
mean1 = torch.tensor([-3., 0.])
mean2 = torch.tensor([0., 0.])
mean3 = torch.tensor([3., 0.])

In [23]:
samples1 = torch.distributions.MultivariateNormal(loc=mean1, covariance_matrix=covariance).sample((1000, ))
y_1 = np.ones(1000, dtype=int)
samples2 = torch.distributions.MultivariateNormal(loc=mean2, covariance_matrix=covariance).sample((1000, ))
y_2 = np.ones(1000, dtype=int) + 1
samples3 = torch.distributions.MultivariateNormal(loc=mean3, covariance_matrix=covariance).sample((1000, ))
y_3 = np.ones(1000, dtype=int) + 2

In [24]:
samples = torch.cat([samples1, samples2, samples3], dim=0).numpy()
y = np.concatenate([y_1, y_2, y_3])

In [25]:
nuq_toy = NuqClassifier(
    kernel_type='RBF',
    method='hnsw',
    n_neighbors=20,
    coeff=0.0001,
    tune_bandwidth=True,
    strategy='classification',
    bandwidth=np.array([1.]),
    precise_computation=True,
    use_centroids=False,
    use_uniform_prior=True,
)

In [26]:
import seaborn as sns
import pandas as pd

In [27]:
plt.close()

# plt.scatter(samples[:, 0], samples[:, 1])
sns.jointplot(x=samples[:, 0], y=samples[:, 1])
plt.axis('equal')

plt.show();

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [28]:
samples_x = samples[:, 0][..., None]
samples_y = samples[:, 1][..., None]

In [29]:
from KDEpy.bw_selection import improved_sheather_jones, silvermans_rule, scotts_rule

In [30]:
nuq_toy.fit(samples, y)

mean distance = [0.         0.00284421 0.00464653 0.00706049 0.00980973 0.01168261
 0.01422034 0.01591131 0.01787769 0.01988229 0.02206825 0.02412787
 0.02588523 0.02803879 0.03009396 0.03189409 0.03368265 0.03598214
 0.03797555 0.03989822]
0.03189408779144287
Best accuracy  0.996


NuqClassifier(bandwidth=array(0.03189409, dtype=float32),
              strategy='classification')

In [36]:
x_band = nuq_toy.bandwidth.squeeze()#silvermans_rule(samples_x)
x_band

array(0.03189409, dtype=float32)

In [37]:
y_band = nuq_toy.bandwidth.squeeze() #silvermans_rule(samples_y)
y_band

array(0.03189409, dtype=float32)

In [38]:
plt.close()

plt.scatter(samples_x, samples_y)
plt.plot([-1, -1 + x_band], [0, 0], marker = 'o', color='black')
plt.plot([0, 0], [-1, -1 + y_band], marker = 'o', color='red')
plt.axis('equal')

plt.show();

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …