Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

88 lines (71 sloc) 1.948 kB
# -*- coding: utf-8 -*-
# Copyright (C) 2008-2012, Luis Pedro Coelho <luis@luispedro.org>
# vim: set ts=4 sts=4 sw=4 expandtab smartindent:
#
# License: MIT. See COPYING.MIT file in the milk distribution
from __future__ import division
import numpy as np
__all__ = [
'pdist',
'plike',
]
def pdist(X, Y=None, distance='euclidean2'):
'''
D = pdist(X, Y={X}, distance='euclidean2')
Compute distance matrix::
D[i,j] == np.sum( (X[i] - Y[j])**2 )
Parameters
----------
X : feature matrix
Y : feature matrix (default: use `X`)
distance : one of 'euclidean' or 'euclidean2' (default)
Returns
-------
D : matrix of doubles
'''
# Use Dij = np.dot(Xi, Xi) + np.dot(Xj,Xj) - 2.*np.dot(Xi,Xj)
if Y is None:
D = np.dot(X, X.T)
x2 = D.diagonal()
x2 = x2.copy()
y2 = x2
else:
D = np.dot(X, Y.T)
x2 = np.array([np.dot(x,x) for x in X])
y2 = np.array([np.dot(y,y) for y in Y])
D *= -2.
D += x2[:,np.newaxis]
D += y2
# Because of numerical imprecision, we might get negative numbers
# (which cause problems down the road, e.g., when doing the sqrt):
np.maximum(D, 0, D)
if distance == 'euclidean':
np.sqrt(D, D)
return D
def plike(X, sigma2=None):
'''
L = plike(X, sigma2={guess based on X})
Compute likelihood that any two objects come from the same distribution
under a Gaussian distribution hypothesis::
L[i,j] = exp( ||X[i] - X[j]||^2 / sigma2 )
Parameters
----------
X : ndarray
feature matrix
sigma2 : float, optional
bandwidth
Returns
-------
L : ndarray
likelihood matrix
See Also
--------
pdist : function
Compute distances between objects
'''
L = pdist(X)
if sigma2 is None:
sigma2 = np.median(L)
L /= -sigma2
np.exp(L, L)
return L
Jump to Line
Something went wrong with that request. Please try again.