In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

import data
import uncertainty_estimation as ue
from main_bayesian import getModel

In [None]:
# CUDA settings
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

### Calculate uncertainties
Load the model from checkpoint and test on testset

In [None]:
# Load data
_, testset, inputs, num_classes = data.getDataset('POCUS')

# Load model
ckpt = 'PATH_TO_CHECKPOINT'
layer_type = cfg.layer_type
activation_type = cfg.activation_type

net = getModel('POCUS', inputs, num_classes, priors=None, layer_type=layer_type, activation_type=activation_type)
net.load_state_dict(torch.load(weight_path)["model_state_dict"])
net.train() # should this be train or eval?
net.to(device)


In [None]:
def evaluate_uncertainties(model):


### Data inspection 
Look at the data with PCA and t-SNE

In [None]:
# PCA with 3 components
pca = PCA(n_components=3)
pca_result = pca.fit_transform(testset.data.numpy().reshape(-1, 28*28))
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(xs=[i[0] for i in pca_result], ys=[i[1] for i in pca_result], zs=[i[2] for i in pca_result], c=[i[5] for i in uncertainties])
plt.title('PCA with 3 components')
plt.show()


In [None]:
# t-SNE with 3 components (use PCA to reduce the dimensionality of the data first), color by label
pca = PCA(n_components=30)
pca_result = pca.fit_transform(testset.data.numpy().reshape(-1, 28*28))
tsne = TSNE(n_components=3)
tsne_result = tsne.fit_transform(pca_result)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(xs=[i[0] for i in tsne_result], ys=[i[1] for i in tsne_result], zs=[i[2] for i in tsne_result], c=[i[5] for i in uncertainties])
plt.title('t-SNE')
plt.show()


Check how many correct and wrong prediction there were

In [None]:
# count how many right and worng predictions there are for each class
right = [0 for i in range(num_classes)]
wrong = [0 for i in range(num_classes)]
for i in uncertainties:
    if i[4]:
        right[int(i[5])] += 1
    else:
        wrong[int(i[5])] += 1

for i in range(10):
    print('Class', i, ' - ', 'Right:', right[i], ' , ', 'Wrong:', wrong[i])


check range of uncertainties


In [None]:
print('Epistemic Uncertainty (Normalized):', min([i[0] for i in uncertainties]), max([i[0] for i in uncertainties]))
print('Aleatoric Uncertainty (Normalized):', min([i[1] for i in uncertainties]), max([i[1] for i in uncertainties]))
print('Epistemic Uncertainty (Softmax):', min([i[2] for i in uncertainties]), max([i[2] for i in uncertainties]))
print('Aleatoric Uncertainty (Softmax):', min([i[3] for i in uncertainties]), max([i[3] for i in uncertainties]))

### Uncertainty visualization

Normalized epistemic uncertainty

In [None]:
def sort_uncertainties(u_type, normalized, uncertainties):
    
    # get the correct index at which the uncertainty is stored
    if u_type == 'epistemic':
        if normalized:
            u = 0
        else:
            u = 2
    elif u_type == 'aleatoric':
        if normalized:
            u = 1
        else:
            u = 3
    
    # split the testset into 5 groups based on epistemic uncertainty (normalized)
    testset0, testset1, testset2, testset3, testset4 = [], [], [], [], []
    labels0, labels1, labels2, labels3, labels4 = [], [], [], [], []

    for i, elem in enumerate(uncertainties):
        if elem[u] < t1:
            testset0.append(testset.data.numpy().reshape(-1, 28*28)[i])
            labels0.append(elem[5])
        elif elem[u] < t2:
            testset1.append(testset.data.numpy().reshape(-1, 28*28)[i])
            labels1.append(elem[5])
        elif elem[u] < t3:
            testset2.append(testset.data.numpy().reshape(-1, 28*28)[i])
            labels2.append(elem[5])
        elif elem[u] < t4:
            testset3.append(testset.data.numpy().reshape(-1, 28*28)[i])
            labels3.append(elem[5])
        else:
            testset4.append(testset.data.numpy().reshape(-1, 28*28)[i])
            labels4.append(elem[5])

    # create thresholds 
    t0 = min([i[0] for i in uncertainties])
    t5 = max([i[0] for i in uncertainties])
    steps = (t5-t0) / 5

    t1 = t0 + steps
    t2 = t1 + steps
    t3 = t2 + steps
    t4 = t3 + steps

    testsets = [testset0, testset1, testset2, testset3, testset4]
    thresholds = [t0, t1, t2, t3, t4, t5]
    labelssets = [labels0, labels1, labels2, labels3, labels4]
    vals = ["small", "small-medium", "medium", "medium-large", "large"]

    return testsets, thresholds, labelssets, vals


In [None]:
# t-SNE with 3 components (use PCA to reduce the dimensionality of the data first), color by label
def plot_tsne(uncertainties, u_type='epistemic', normalized=True):

    testsets, thresholds, labelssets, vals = sort_uncertainties(u_type, normalized, uncertanties)

    for i in range(len(testsets)):
        set_i = testsets[i]
        labels_i = labelssets[i]
        if len(labels_i) < 30:
            n_components = len(labels_i)
        else:
            n_components = 30
        pca = PCA(n_components=n_components)
        pca_result = pca.fit_transform(set_i)
        tsne = TSNE(n_components=3)
        tsne_result = tsne.fit_transform(pca_result)
        fig = plt.figure()
        ax = fig.add_subplot(111, projection='3d')
        ax.scatter(xs=[i[0] for i in tsne_result], ys=[i[1] for i in tsne_result], zs=[i[2] for i in tsne_result], c=labels_i, cmap='tab10')
        plt.title('t-SNE for ' + vals[i] + ' epistemic uncertainty (normalized) (between ' + str(thresholds[i]) + ' and ' + str(thresholds[i+1]) + ')')
        plt.show()

In [None]:
plot_tsne(uncertainties, u_type='epistemic', normalized=True)

In [None]:
plot_tsne(uncertainties, u_type='epistemic', normalized=False)

In [None]:
plot_tsne(uncertainties, u_type='aleatoric', normalized=True)

In [None]:
plot_tsne(uncertainties, u_type='aleatoric', normalized=False)