Skip to content

Commit

Permalink
Merge pull request #2 from kristianeschenburg/multidim
Browse files Browse the repository at this point in the history
multidimensional pairwise distance computations
  • Loading branch information
kristianeschenburg committed Jun 30, 2020
2 parents aa6b285 + 152bbd9 commit 5b6b4c7
Show file tree
Hide file tree
Showing 3 changed files with 154 additions and 20 deletions.
91 changes: 88 additions & 3 deletions submet/metrics.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,29 @@
import numpy as np
import scipy.spatial.distance as ssd
from scipy.stats import spearmanr

class Metric(object):
class UnivariateMetric(object):

"""
Wrapper class to compute the similarity between two matrices.
Here, we provide only Spearman rho and Pearson R correlation
similarities.
"""

def __init__(self, metric='spearman'):

self.metric=metric

def fit(self, X, y=None):

function_map = {'spearman': spearman,
'pearson': pearson}

return function_map[self.metric](X, y)


class SubspaceMetric(object):

"""
Wrapper class to compute the distance between two equi-dimensional subspaces.
Expand Down Expand Up @@ -145,12 +168,74 @@ def projection(theta):
def spectral(theta):

"""
Compute Projection distance between two equi-dimensional subspaces.
Compute Spectral distance between two equi-dimensional subspaces.
Parameters:
- - - - -
theta: float, array
arccos(singular values) of inner product of two equi-dimensional subspaces
"""

return 2*np.sin(theta[-1]/2)
return 2*np.sin(theta[-1]/2)


def spearman(X, y=None):

"""
Compute the Spearman rho rank correlation of a matrix's features.
Parameters:
- - - - -
X: float, array
N by K matrix
y: float, array
optional
N by M matrix
Returns:
- - - -
rho: float, array
(M+K) by (M+K) correlation matrix
"""

rho = spearmanr(X, y)[0]

return rho

def pearson(X, y=None):

"""
Compute the Pearson R correlation of a matrix's features.
Parameters:
- - - - -
X: float, array
N by K matrix
y: float, array
optional
N by M matrix
Returns:
- - - -
sim: float, array
K by K correlation matrix
K by M cross-correlation matrix (if y defined)
"""

xd = X.ndim

if not y:
sim = ssd.squareform(ssd.pdist(X.T, metric='correlation'))
sim = 1-sim

if y:
yd = y.ndim
if yd == 1:
y = y[None,:]

sim = ssd.cdist(X.T, y, metric='correlation')
sim = 1-sim

return sim


70 changes: 53 additions & 17 deletions submet/subspace.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from numba import prange
import numpy as np
from submet.metrics import Metric
from submet.metrics import SubspaceMetric


class SubspaceDistance(object):
Expand All @@ -22,37 +23,72 @@ def __init__(self, metric='Grassmann'):

self.metric = metric

def fit(self, X, Y):
def fit(self, X, Y=None, axis=0):
"""
Fit subspace distance between two equi-dimensional subspaces.
Parameters:
- - - - -
X, Y: float, array
two equi-dimensional subspaces
Y is optional
If Y is not provided, output is an Sx by Sx matrix, where Sx is
the number of samples in X.
If Y is provided, output is an Sx by Sy matrix, where Sx/Sy are
the number of samples in each matrix.
"""

M = Metric(self.metric)
M = SubspaceMetric(self.metric)
n = X.shape[axis]
p = Y.shape[axis] if np.any(Y) else X.shape[axis]

# return dimensions of each subspace
if X.ndim == 1:
X = X[:, None]
d = np.zeros((n, p))

if Y.ndim == 1:
Y = Y[:, None]
for i in prange(n):
for j in prange(p):

# orthogonalize each subspace
q1,_ = np.linalg.qr(X)
q2,_ = np.linalg.qr(Y)
theta = self._pabs(X[i], Y[j] if np.any(Y) else X[j])
d[i, j] = M.fit(theta)

if not np.any(Y):
d[np.diag_indices(n)] = 0

self.distance_ = d

def _pabs(self, x, y):

"""
Compute the principle angles between two subspaces.
Parameters:
- - - - -
x,y: float, array
two subspaces of each dimensions
Returns:
- - - -
theta: float, array
principle angles between the subspaces spanned
by the columns of x and y
"""

if x.ndim == 1:
x = x[:, None]

if y.ndim == 1:
y = y[:, None]

# orthogonalize each subspace
q1,_ = np.linalg.qr(x)
q2,_ = np.linalg.qr(y)

# compute inner product matrix
S = q1.T.dot(q2)[None, :]
[u, s, v] = np.linalg.svd(S, full_matrices=False)

# compute principle angles
theta = np.arccos(s).squeeze()

# compute subspace distance
self.distance_ = M.fit(theta)
self.x_ = q1.dot(u).squeeze()
self.y_ = q2.dot(v).squeeze()

return theta
13 changes: 13 additions & 0 deletions submet/univariate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import numpy as np
from scipy.stats import spearmanr

class UnivariateSimilarity(object):

"""
"""

self __init__(self, metric='pearson'):

self.metric=metric
self.simmap = {'pearson': }

0 comments on commit 5b6b4c7

Please sign in to comment.