In [2]:
import numpy as np

In [3]:
data  = np.random.rand(6,6)*1000 # some random data

In [11]:
P = np.array([
             [.2, .3, .5],
             [.3, .4, .3],
             [.5, .3, .2]
             ])

### 3.1 Connectivity

In [5]:
def RBF(x, y, alpha=1):
    """ Gaussian kernel for two vectors x and y. With neighborhood distance alpha.
    
    Arguments:
        - x : First vector
        - y : second vector
        - alpha: neighborhood factor. *Note bigger alpha increases RBF value.
    
    return: Real num in [0, 1]
    """
    return np.exp(-np.linalg.norm(np.array(x) - np.array(y))**2 / alpha)

In [6]:
# calculation check
assert((RBF([0,0], [1,1], 1) - np.exp(-2)) < 0.0001)
assert((RBF([0,0], [0,0], 1) - 1 < 0.0001))

In [7]:
def kernelize(data, k, alpha=1):
    """ Given some data convert it to a kernel matrix using some given kernel.
    
    Arguments:
        data : Data matrix (N, C)
        k: Kernel function
        alpha: neighborhood factor. *Note bigger alpha increases kernel function value.
    """
    pass

### 3.3 Diffusion Distance

$
\begin{align}
D_t(X_i, X_j)^2 &= \sum_{u\in{}X}\mid{}p_t(X_i, u) - p_t(X_j, u)\mid{}^2 \\
                &= \sum_{u\in{}X}\mid{}p_{ik}^t - p_{kj}^t\mid{}^2
\end{align}
$

In [8]:
# Note theres no square in the diffusion distance
def diffusion_dist(i, j, P):
    """ Computes the diffusion distance between two vectors X_i and X_j.
    
    Arguments:
        i: index for the first data point
        j: index for the second data point
        P: Probability matrix P[i, j] is the probability to go from i to j.
    """
    return np.linalg.norm(P[i, :] - P[j, :])**2

In [89]:
# P_1 = [.3, .4, .3]
# P_2 = [.5, .3, .2]
assert(np.abs(diffusion_dist(0,1, P) == (.2**2 + .1**2 + .1**2)))
print(diffusion_dist(0,2, P) )

0.18


### 3.4 Diffusion Map

In [90]:
P**14

array([[  1.63840000e-10,   4.78296900e-08,   6.10351562e-05],
       [  4.78296900e-08,   2.68435456e-06,   4.78296900e-08],
       [  6.10351562e-05,   4.78296900e-08,   1.63840000e-10]])

In [91]:
# Define Y = P^T
Y = np.transpose(P)

# Get eigen vectors and values of P
eigen_vals, eigen_vecs = np.linalg.eig(P)
Y

array([[ 0.2,  0.3,  0.5],
       [ 0.3,  0.4,  0.3],
       [ 0.5,  0.3,  0.2]])

### TODO

1. Notice eigen_vecs.. the first column if the first eigen vector (so eigen_vecs[0, :] is first element of each eigen vector
2. the eigen_vals and eigen_vecs are not sorted in increasing e_val order

In [99]:
eigen_vecs[0 , :]

array([-0.57735027, -0.70710678,  0.40824829])

In [103]:
eigen_vecs

array([[ -5.77350269e-01,  -7.07106781e-01,   4.08248290e-01],
       [ -5.77350269e-01,  -2.94492254e-17,  -8.16496581e-01],
       [ -5.77350269e-01,   7.07106781e-01,   4.08248290e-01]])

In [100]:
a = (eigen_vals * eigen_vecs[0 , :])
b = (eigen_vals * eigen_vecs[2 , :])
print(a)
print(b)

[-0.57735027  0.21213203  0.04082483]
[-0.57735027 -0.21213203  0.04082483]


In [101]:
# Note theres no square in the diffusion distance
def diffusion_dist2(i, j):
    """ Computes the diffusion distance between two vectors X_i and X_j.
    
    Arguments:
        i: index for the first data point
        j: index for the second data point
        P: Probability matrix P[i, j] is the probability to go from i to j.
    """
    return np.linalg.norm(i - j)**2

In [102]:
diffusion_dist2(a, b)

0.17999999999999991