In [20]:
from scipy.spatial import distance
from scipy.spatial.distance import cdist
import numpy as np
# paper: https://arxiv.org/pdf/1807.03888.pdf

In [21]:
def empirical_class_mean(X, Y, c, func, penultimate_neuron_num):
    """
    X is of shame (batch, num_of_small_segs, feats_in_one_seg).
    Y is of shape (batch, class).
    c is a class, of type integer.
    func is the output of the penultimate layer of DNNs. 
    func should be called as func(x), s.t. x is one xample in X.
    """
    
    # get 1/N_c, the number of training samples with label c
    N_c = 0
    for item in Y:
        if np.amax(item) == c:
            N_c += 1
    
    sum_func = np.array([0] * penultimate_neuron_num)
    for index in range(0, len(Y)):
        if Y[index] == c:
            sum_func = np.add(func(X[index]), sum_func) #func() results in the output of the penultimate layer of the DNN 
    
    return sum_func / N_c

In [22]:
def empirical_class_means(X, Y, C, func, penultimate_neuron_num):
    """
    Calculate the empirical class means for all classes and output a numpy array.
    """
    result = []
    for c in C:
        miu = empirical_class_mean(X, Y, c, func, penultimate_neuron_num)
        result.append(miu)
        
    return np.array(result)

In [23]:
def emprical_covariance(X, Y, C, func, penultimate_neuron_num):
    assert len(X) == len(Y)
    N = len(Y)
    
    mius = empirical_class_means(X, Y, C, func, penultimate_neuron_num)
    miu = mius[0]
    
    sum_all_classes = np.array(miu.shape)
    
    print(mius)
    
    for c in C:
        sum_single_c = np.array(miu.shape)
        for index in range(0, len(Y)):
            if Y[index] == c:
                miu = mius[c] # may cause problem during integration. Pay attention to the types of class labels
                
                difference = func(X[index]) - miu
                transpose = np.transpose(difference)
                result = difference * transpose
                sum_single_c = np.add(sum_single_c, result)
        
        sum_all_classes = np.add(sum_all_classes, sum_single_c)
        
    covariance = sum_all_classes/N
    
    return covariance

In [24]:
def mahalanobis(x, y):
    i, j, k = x.shape
    xx = x.reshape(i, j*k).T
    yy = y.reshape(i, j*k).T
    
    X = np.vstack([xx,yy])
    V = np.cov(X.T)
    VI = np.linalg.inv(V)
    return np.diag(np.sqrt(np.dot(np.dot((xx-yy),VI),(xx-yy).T)))

In [25]:
X = np.array([[1,2,5], [3,4,6]])
Y = np.array([[0],[1]])
C = [0, 1]

In [28]:
def func(x):
    return np.random.rand()

emprical_covariance(X, Y, C, func, 6)
#empirical_class_means(X, Y, C, func, 3)

[[0.39443132 0.39443132 0.39443132 0.39443132 0.39443132 0.39443132]
 [0.99908048 0.99908048 0.99908048 0.99908048 0.99908048 0.99908048]]


array([9.43719778, 9.43719778, 9.43719778, 9.43719778, 9.43719778,
       9.43719778])

Covariance of training samples (x, y)