Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
mindboggle/mindboggle/guts/compute.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
1149 lines (977 sloc)
37.4 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Compute functions. | |
Authors: | |
- Arno Klein, 2012-2016 (arno@mindboggle.info) http://binarybottle.com | |
Copyright 2016, Mindboggle team (http://mindboggle.info), Apache v2.0 License | |
""" | |
def distcorr(X, Y): | |
""" | |
Compute the distance correlation function. | |
Parameters | |
---------- | |
X : list or array of numbers | |
Y : list or array of numbers | |
Returns | |
------- | |
dcor : float | |
distance correlation | |
Examples | |
-------- | |
>>> from mindboggle.guts.compute import distcorr | |
>>> import numpy as np | |
>>> a = [1,2,3,4,5] | |
>>> b = [1,2,9,4,4] | |
>>> dcor = distcorr(a, b) | |
>>> np.float("{0:.{1}f}".format(dcor, 5)) | |
0.76268 | |
Copyright (2014-2015) MIT | |
Written by Satrajit S. Ghosh (Apache License v2.0) as part of the | |
mapalign GitHub repository (https://github.com/satra/mapalign) | |
""" | |
import numpy as np | |
from scipy.spatial.distance import pdist, squareform | |
X = np.atleast_1d(X) | |
Y = np.atleast_1d(Y) | |
if np.prod(X.shape) == len(X): | |
X = X[:, None] | |
if np.prod(Y.shape) == len(Y): | |
Y = Y[:, None] | |
X = np.atleast_2d(X) | |
Y = np.atleast_2d(Y) | |
n = X.shape[0] | |
if Y.shape[0] != X.shape[0]: | |
raise ValueError('Number of samples must match') | |
a = squareform(pdist(X)) | |
b = squareform(pdist(Y)) | |
A = a - a.mean(axis=0)[None, :] - a.mean(axis=1)[:, None] + a.mean() | |
B = b - b.mean(axis=0)[None, :] - b.mean(axis=1)[:, None] + b.mean() | |
dcov2_xy = (A * B).sum()/float(n * n) | |
dcov2_xx = (A * A).sum()/float(n * n) | |
dcov2_yy = (B * B).sum()/float(n * n) | |
dcor = np.sqrt(dcov2_xy)/np.sqrt(np.sqrt(dcov2_xx) * np.sqrt(dcov2_yy)) | |
return dcor | |
def point_distance(point, points): | |
""" | |
Compute the Euclidean distance from one point to a second (set) of points. | |
Parameters | |
---------- | |
point : list of three floats | |
coordinates for a single point | |
points : list with one or more lists of three floats | |
coordinates for a second point (or multiple points) | |
Returns | |
------- | |
min_distance : float | |
Euclidean distance between two points, | |
or the minimum distance between a point and a set of points | |
min_index : int | |
index of closest of the points (zero if only one) | |
Examples | |
-------- | |
>>> from mindboggle.guts.compute import point_distance | |
>>> point = [1,2,3] | |
>>> points = [[10,2.0,3], [0,1.5,2]] | |
>>> point_distance(point, points) | |
(1.5, 1) | |
Notes | |
----- | |
Future plan is to use scipy.spatial.distance.cdist to compute distances | |
scipy.spatial.distance.cdist is available in scipy v0.12 or later | |
""" | |
import numpy as np | |
# If points is a single point | |
if np.ndim(points) == 1: | |
#return np.linalg.norm(np.array(point) - np.array(points)) | |
return np.sqrt((point[0] - points[0]) ** 2 + \ | |
(point[1] - points[1]) ** 2 + \ | |
(point[2] - points[2]) ** 2), 0 | |
# If points is a set of multiple points | |
elif np.ndim(points) == 2: | |
min_distance = np.Inf | |
min_index = 0 | |
point = np.array(point) | |
for index, point2 in enumerate(points): | |
distance = np.sqrt((point[0] - point2[0]) ** 2 + \ | |
(point[1] - point2[1]) ** 2 + \ | |
(point[2] - point2[2]) ** 2) | |
#distance = np.linalg.norm(point - np.array(point2)) | |
if distance < min_distance: | |
min_distance = distance | |
min_index = index | |
return min_distance, min_index | |
# Else return None | |
else: | |
return None, None | |
def vector_distance(vector1, vector2, normalize=False): | |
""" | |
Compute the Euclidean distance between two equal-sized vectors. | |
Parameters | |
---------- | |
vector1 : numpy array of floats | |
vector of values | |
vector2 : numpy array of floats | |
vector of values | |
normalize : bool | |
normalize each element of the vectors? | |
Returns | |
------- | |
distance : float | |
Euclidean distance between two vectors | |
Examples | |
-------- | |
>>> import numpy as np | |
>>> from mindboggle.guts.compute import vector_distance | |
>>> vector1 = np.array([1.,2.,3.]) | |
>>> vector2 = np.array([0,1,5]) | |
>>> distance = vector_distance(vector1, vector2) | |
>>> print('{0:0.5f}'.format(distance)) | |
0.81650 | |
""" | |
import numpy as np | |
if np.size(vector1) == np.size(vector2): | |
# Make sure arguments are numpy arrays | |
if not isinstance(vector1, np.ndarray): | |
vector1 = np.asarray(vector1) | |
if not isinstance(vector2, np.ndarray): | |
vector2 = np.asarray(vector2) | |
if normalize: | |
vector_diff = np.zeros(len(vector1)) | |
for i in range(len(vector1)): | |
max_v1v2 = max([vector1[i], vector2[i]]) | |
if max_v1v2 > 0: | |
vector_diff[i] = (vector1[i] - vector2[i]) / max_v1v2 | |
else: | |
vector_diff = vector1 - vector2 | |
return np.sqrt(sum((vector_diff)**2)) / np.size(vector1) | |
else: | |
print("Vectors have to be of equal size to compute distance.") | |
return None | |
def pairwise_vector_distances(vectors, save_file=False, normalize=False): | |
""" | |
Compare every pair of equal-sized vectors. | |
Parameters | |
---------- | |
vectors : array of 1-D lists or arrays of integers or floats | |
save_file : bool | |
save file? | |
normalize : bool | |
normalize each element of the vectors? | |
Returns | |
------- | |
vector_distances : numpy array of integers or floats | |
distances between each pair of vectors | |
outfile : string [optional] | |
output filename for pairwise_vector_distances | |
Examples | |
-------- | |
>>> import numpy as np | |
>>> from mindboggle.guts.compute import pairwise_vector_distances | |
>>> vectors = [[1,2,3],[0,3,5],[0,3.5,5],[1,1,1]] | |
>>> save_file = False | |
>>> normalize = False | |
>>> vector_distances, outfile = pairwise_vector_distances(vectors, | |
... save_file, normalize) | |
>>> print(np.array_str(np.array(vector_distances), | |
... precision=5, suppress_small=True)) | |
[[0. 0.8165 0.89753 0.74536] | |
[0. 0. 0.16667 1.52753] | |
[0. 0. 0. 1.60728] | |
[0. 0. 0. 0. ]] | |
""" | |
import os | |
import numpy as np | |
from mindboggle.guts.compute import vector_distance | |
# Make sure argument is a numpy array | |
if not isinstance(vectors, np.ndarray): | |
vectors = np.array(vectors) | |
# Initialize output | |
vector_distances = np.zeros((len(vectors), len(vectors))) | |
# -------------------------------------------------------------------------- | |
# Compute distance between each pair of vectors | |
# -------------------------------------------------------------------------- | |
# Loop through every pair of vectors | |
for ihist1 in range(len(vectors)): | |
for ihist2 in range(len(vectors)): | |
if ihist2 >= ihist1: | |
# Store pairwise distances between histogram values | |
d = vector_distance(1.0*vectors[ihist1], | |
1.0*vectors[ihist2], | |
normalize=normalize) | |
vector_distances[ihist1, ihist2] = d | |
if save_file: | |
outfile = os.path.join(os.getcwd(), 'vector_distances.txt') | |
np.savetxt(outfile, vector_distances, | |
fmt=len(vectors) * '%.4f ', delimiter='\t', newline='\n') | |
if not os.path.exists(outfile): | |
raise IOError(outfile + " not found") | |
else: | |
outfile = '' | |
return vector_distances, outfile | |
def source_to_target_distances(sourceIDs, targetIDs, points, | |
segmentIDs=[], excludeIDs=[-1]): | |
""" | |
Create a Euclidean distance matrix between source and target points. | |
Compute the Euclidean distance from each source point to | |
its nearest target point, optionally within each segment. | |
Example:: | |
Compute fundus-to-feature distances, the minimum distance | |
from each label boundary vertex (corresponding to a fundus | |
in the DKT cortical labeling protocol) to all of the | |
feature vertices in the same fold. | |
Parameters | |
---------- | |
sourceIDs : list of N integers (N is the number of vertices) | |
source IDs, where any ID not in excludeIDs is a source point | |
targetIDs : list of N integers (N is the number of vertices) | |
target IDs, where any ID not in excludeIDs is a target point | |
points : list of N lists of three floats (N is the number of vertices) | |
coordinates of all vertices | |
segmentIDs : list of N integers (N is the number of vertices) | |
segment IDs, where each ID not in excludeIDs is considered a | |
different segment (unlike above, where value in sourceIDs or | |
targetIDs doesn't matter, so long as its not in excludeIDs); | |
source/target distances are computed within each segment | |
excludeIDs : list of integers | |
IDs to exclude | |
Returns | |
------- | |
distances : numpy array | |
distance value for each vertex (default -1) | |
distance_matrix : numpy array [#points by maximum segment ID + 1] | |
distances organized by segments (columns) | |
""" | |
import numpy as np | |
from mindboggle.guts.compute import point_distance | |
if isinstance(points, list): | |
points = np.asarray(points) | |
npoints = len(points) | |
# Extract unique segment IDs (or use all points as a single segment): | |
if np.size(segmentIDs): | |
segments = [x for x in np.unique(segmentIDs) if x not in excludeIDs] | |
else: | |
segmentIDs = np.zeros(npoints) | |
segments = [0] | |
nsegments = max(segments) + 1 | |
# Initialize outputs: | |
distances = -1 * np.ones(npoints) | |
distance_matrix = -1 * np.ones((npoints, nsegments)) | |
# For each segment: | |
for segment in segments: | |
segment_indices = [i for i,x in enumerate(segmentIDs) | |
if x == segment] | |
# Find all source points in the segment: | |
source_indices = [i for i,x in enumerate(sourceIDs) | |
if x not in excludeIDs | |
if i in segment_indices] | |
# Find all target points in the segment: | |
target_indices = [i for i,x in enumerate(targetIDs) | |
if x not in excludeIDs | |
if i in segment_indices] | |
if source_indices and target_indices: | |
# For each source point in the segment: | |
for isource in source_indices: | |
# Find the closest target point: | |
d, i = point_distance(points[isource], | |
points[target_indices]) | |
distances[isource] = d | |
distance_matrix[isource, segment] = d | |
return distances, distance_matrix | |
def weighted_to_repeated_values(X, W=[], precision=1): | |
""" | |
Create a list of repeated values from weighted values. | |
This is useful for computing weighted statistics (ex: weighted median). | |
Adapted to allow for fractional weights from | |
http://stackoverflow.com/questions/966896/ | |
code-golf-shortest-code-to-find-a-weighted-median | |
Parameters | |
---------- | |
X : numpy array of floats or integers | |
values | |
W : numpy array of floats or integers | |
weights | |
precision : integer | |
number of decimal places to consider weights | |
Returns | |
------- | |
repeat_values : numpy array of floats or integers | |
repeated values according to their weight | |
Examples | |
-------- | |
>>> import numpy as np | |
>>> from mindboggle.guts.compute import weighted_to_repeated_values | |
>>> X = np.array([1,2,4,7,8]) | |
>>> W = np.array([.1,.1,.3,.2,.3]) | |
>>> precision = 1 | |
>>> weighted_to_repeated_values(X, W, precision) | |
[1, 2, 4, 4, 4, 7, 7, 8, 8, 8] | |
""" | |
import numpy as np | |
# Make sure arguments have the correct type: | |
if not isinstance(X, np.ndarray): | |
X = np.array(X) | |
if not isinstance(W, np.ndarray): | |
W = np.array(W) | |
if not isinstance(precision, int): | |
precision = int(precision) | |
if np.size(W): | |
# If weights are decimals, multiply by 10 until they are whole numbers. | |
# If after multiplying precision times they are not whole, round them: | |
whole = True | |
if any(np.mod(W,1)): | |
whole = False | |
for i in range(precision): | |
if any(np.mod(W,1)): | |
W *= 10 | |
else: | |
whole = True | |
break | |
if not whole: | |
W = [int(np.round(x)) for x in W] | |
repeat_values = sum([[x]*w for x,w in zip(X,W)],[]) | |
else: | |
repeat_values = X | |
return repeat_values | |
def weighted_median(X, W=[], precision=1): | |
""" | |
Compute a weighted median. | |
Parameters | |
---------- | |
X : numpy array of floats or integers | |
values | |
W : numpy array of floats or integers | |
weights | |
precision : integer | |
number of decimal places to consider weights | |
Returns | |
------- | |
wmedian : float | |
weighted median | |
Examples | |
-------- | |
>>> import numpy as np | |
>>> from mindboggle.guts.compute import weighted_median | |
>>> X = np.array([1,2,4,7,8]) | |
>>> W = np.array([.1,.1,.3,.2,.3]) | |
>>> precision = 1 | |
>>> # [1, 2, 4, 4, 4, 7, 7, 8, 8, 8] | |
>>> weighted_median(X, W, precision) | |
5.5 | |
""" | |
import numpy as np | |
from mindboggle.guts.compute import weighted_to_repeated_values | |
# Make sure arguments have the correct type: | |
if not isinstance(X, np.ndarray): | |
X = np.array(X) | |
if not isinstance(W, np.ndarray): | |
W = np.array(W) | |
if not isinstance(precision, int): | |
precision = int(precision) | |
wmedian = np.median(weighted_to_repeated_values(X, W, precision)) | |
return wmedian | |
def median_abs_dev(X, W=[], precision=1, c=1.0): | |
""" | |
Compute the (weighted) median absolute deviation. | |
mad = median(abs(x - median(x))) / c | |
Parameters | |
---------- | |
X : numpy array of floats or integers | |
values | |
W : numpy array of floats or integers | |
weights | |
precision : integer | |
number of decimal places to consider weights | |
c : float | |
constant used as divisor for mad computation; | |
c = 0.6745 is used to convert from mad to standard deviation | |
Returns | |
------- | |
mad : float | |
(weighted) median absolute deviation | |
Examples | |
-------- | |
>>> import numpy as np | |
>>> from mindboggle.guts.compute import median_abs_dev | |
>>> X = np.array([1,2,4,7,8]) | |
>>> W = np.array([.1,.1,.3,.2,.3]) | |
>>> precision = 1 | |
>>> # [1, 2, 4, 4, 4, 7, 7, 8, 8, 8] | |
>>> median_abs_dev(X, W, precision) | |
2.0 | |
""" | |
import numpy as np | |
from mindboggle.guts.compute import weighted_to_repeated_values | |
# Make sure arguments have the correct type: | |
if not isinstance(X, np.ndarray): | |
X = np.array(X) | |
if not isinstance(W, np.ndarray): | |
W = np.array(W) | |
if not isinstance(precision, int): | |
precision = int(precision) | |
if np.size(W): | |
X = weighted_to_repeated_values(X, W, precision) | |
mad = np.median(np.abs(X - np.median(X))) / c | |
return mad | |
def means_per_label(values, labels, include_labels=[], exclude_labels=[], areas=[]): | |
""" | |
Compute the mean value across vertices per label, | |
optionally taking into account surface area per vertex (UNTESTED). | |
Formula: | |
average value = sum(a_i * v_i) / total_surface_area, | |
where *a_i* and *v_i* are the area and value for each vertex *i*. | |
Note :: | |
This function is different than stats_per_label() in two ways: | |
1. It only computes the (weighted) mean and sdev. | |
2. It can accept 2-D arrays (such as [x,y,z] coordinates). | |
Parameters | |
---------- | |
values : numpy array of one or more lists of integers or floats | |
values to average per label | |
labels : list or array of integers | |
label for each value | |
include_labels : list of integers | |
labels to include | |
exclude_labels : list of integers | |
labels to be excluded | |
areas : numpy array of floats | |
surface areas (if provided, used to normalize means and sdevs) | |
Returns | |
------- | |
means : list of floats | |
mean(s) for each label | |
sdevs : list of floats | |
standard deviation(s) for each label | |
label_list : list of integers | |
unique label numbers | |
label_areas : list of floats (if normalize_by_area) | |
surface area for each labeled set of vertices | |
Examples | |
-------- | |
>>> import numpy as np | |
>>> from mindboggle.mio.vtks import read_scalars, read_vtk | |
>>> from mindboggle.guts.compute import means_per_label | |
>>> from mindboggle.mio.fetch_data import prep_tests | |
>>> urls, fetch_data = prep_tests() | |
>>> values_file = fetch_data(urls['left_mean_curvature'], '', '.vtk') | |
>>> labels_file = fetch_data(urls['left_freesurfer_labels'], '', '.vtk') | |
>>> area_file = fetch_data(urls['left_area'], '', '.vtk') | |
>>> values, name = read_scalars(values_file, True, True) | |
>>> labels, name = read_scalars(labels_file) | |
>>> areas, name = read_scalars(area_file, True, True) | |
>>> include_labels = [] | |
>>> exclude_labels = [-1] | |
>>> # Compute mean curvature per label normalized by area: | |
>>> means, sdevs, label_list, label_areas = means_per_label(values, labels, | |
... include_labels, exclude_labels, areas) | |
>>> [np.float("{0:.{1}f}".format(x, 5)) for x in means[0:5]] | |
[-1.1793, -1.21405, -2.49318, -3.58116, -3.34987] | |
>>> [np.float("{0:.{1}f}".format(x, 5)) for x in sdevs[0:5]] | |
[2.43827, 2.33857, 2.0185, 3.25964, 2.8274] | |
>>> # Compute mean curvature per label: | |
>>> areas = [] | |
>>> means, sdevs, label_list, label_areas = means_per_label(values, labels, | |
... include_labels, exclude_labels, areas) | |
>>> [np.float("{0:.{1}f}".format(x, 5)) for x in means[0:5]] | |
[-0.99077, -0.3005, -1.59342, -2.03939, -2.31815] | |
>>> [np.float("{0:.{1}f}".format(x, 5)) for x in sdevs[0:5]] | |
[2.3486, 2.4023, 2.3253, 3.31023, 2.91794] | |
>>> # FIX: compute mean coordinates per label: | |
>>> #points, indices, lines, faces, labels, scalar_names, npoints, input_vtk = read_vtk(values_file) | |
>>> #means, sdevs, label_list, label_areas = means_per_label(points, labels, | |
>>> # include_labels, exclude_labels, areas) | |
>>> #means[0:3] | |
>>> #sdevs[0:3] | |
""" | |
import numpy as np | |
# Make sure arguments are numpy arrays | |
if not isinstance(values, np.ndarray): | |
values = np.asarray(values) | |
if not isinstance(areas, np.ndarray): | |
areas = np.asarray(areas) | |
if include_labels: | |
label_list = include_labels | |
else: | |
label_list = np.unique(labels) | |
label_list = [int(x) for x in label_list if int(x) not in exclude_labels] | |
means = [] | |
sdevs = [] | |
label_areas = [] | |
if values.ndim > 1: | |
dim = np.shape(values)[1] | |
else: | |
dim = 1 | |
for label in label_list: | |
I = [i for i,x in enumerate(labels) if x == label] | |
if I: | |
X = values[I] | |
if np.size(areas): | |
W = areas[I] | |
sumW = sum(W) | |
label_areas.append(sumW) | |
if sumW > 0: | |
if dim > 1: | |
W = np.transpose(np.tile(W, (dim,1))) | |
means.append(np.sum(W * X, axis=0) / sumW) | |
Xdiff = X - np.mean(X, axis=0) | |
sdevs.append(np.sqrt(np.sum(W * Xdiff**2, axis=0) / sumW)) | |
else: | |
if dim > 1: | |
means.append(np.mean(X, axis=0)) | |
sdevs.append(np.std(X, axis=0)) | |
else: | |
means.append(np.mean(X)) | |
sdevs.append(np.std(X)) | |
else: | |
if dim > 1: | |
means.append(np.mean(X, axis=0)) | |
sdevs.append(np.std(X, axis=0)) | |
else: | |
means.append(np.mean(X)) | |
sdevs.append(np.std(X)) | |
else: | |
means.append(np.zeros(dim)) | |
sdevs.append(np.zeros(dim)) | |
label_areas.append(np.zeros(dim)) | |
if dim > 1: | |
means = [x.tolist() for x in means] | |
sdevs = [x.tolist() for x in sdevs] | |
label_areas = [x.tolist() for x in label_areas] | |
return means, sdevs, label_list, label_areas | |
def sum_per_label(values, labels, include_labels=[], exclude_labels=[]): | |
""" | |
Compute the sum value across vertices per label. | |
Parameters | |
---------- | |
values : numpy array of one or more lists of integers or floats | |
values to average per label | |
labels : list or array of integers | |
label for each value | |
include_labels : list of integers | |
labels to include | |
exclude_labels : list of integers | |
labels to be excluded | |
Returns | |
------- | |
sums : list of floats | |
sum for each label | |
label_list : list of integers | |
unique label numbers | |
Examples | |
-------- | |
>>> import numpy as np | |
>>> from mindboggle.mio.vtks import read_scalars | |
>>> from mindboggle.guts.compute import sum_per_label | |
>>> from mindboggle.mio.fetch_data import prep_tests | |
>>> urls, fetch_data = prep_tests() | |
>>> values_file = fetch_data(urls['left_mean_curvature'], '', '.vtk') | |
>>> labels_file = fetch_data(urls['left_freesurfer_labels'], '', '.vtk') | |
>>> values, name = read_scalars(values_file, True, True) | |
>>> labels, name = read_scalars(labels_file) | |
>>> include_labels = [] | |
>>> exclude_labels = [-1] | |
>>> # Compute sum area per label: | |
>>> sums, label_list = sum_per_label(values, labels, include_labels, | |
... exclude_labels) | |
>>> [np.float("{0:.{1}f}".format(x, 5)) for x in sums[0:5]] | |
[-8228.32913, -424.90109, -1865.8959, -8353.33769, -5130.06613] | |
""" | |
import numpy as np | |
# Make sure arguments are numpy arrays | |
if not isinstance(values, np.ndarray): | |
values = np.asarray(values) | |
if include_labels: | |
label_list = include_labels | |
else: | |
label_list = np.unique(labels) | |
label_list = [int(x) for x in label_list if int(x) not in exclude_labels] | |
sums = [] | |
for label in label_list: | |
I = [i for i,x in enumerate(labels) if x == label] | |
if I: | |
X = values[I] | |
sums.append(np.sum(X)) | |
else: | |
sums.append(0) | |
return sums, label_list | |
def stats_per_label(values, labels, include_labels=[], exclude_labels=[], | |
weights=[], precision=1): | |
""" | |
Compute various statistical measures across vertices per label, | |
optionally using weights (such as surface area per vertex). | |
Example (area-weighted mean): | |
average value = sum(a_i * v_i) / total_surface_area, | |
where *a_i* and *v_i* are the area and value for each vertex *i*. | |
Reference: | |
Weighted skewness and kurtosis unbiased by sample size | |
Lorenzo Rimoldini, arXiv:1304.6564 (2013) | |
http://arxiv.org/abs/1304.6564 | |
Note :: | |
This function is different than means_per_label() in two ways: | |
1. It computes more than simply the (weighted) mean and sdev. | |
2. It only accepts 1-D arrays of values. | |
Parameters | |
---------- | |
values : numpy array of individual or lists of integers or floats | |
values for all vertices | |
labels : list or array of integers | |
label for each value | |
include_labels : list of integers | |
labels to include | |
exclude_labels : list of integers | |
labels to be excluded | |
weights : numpy array of floats | |
weights to compute weighted statistical measures | |
precision : integer | |
number of decimal places to consider weights | |
Returns | |
------- | |
medians : list of floats | |
median for each label | |
mads : list of floats | |
median absolute deviation for each label | |
means : list of floats | |
mean for each label | |
sdevs : list of floats | |
standard deviation for each label | |
skews : list of floats | |
skew for each label | |
kurts : list of floats | |
kurtosis value for each label | |
lower_quarts : list of floats | |
lower quartile for each label | |
upper_quarts : list of floats | |
upper quartile for each label | |
label_list : list of integers | |
list of unique labels | |
Examples | |
-------- | |
>>> import numpy as np | |
>>> from mindboggle.mio.vtks import read_scalars | |
>>> from mindboggle.guts.compute import stats_per_label | |
>>> from mindboggle.mio.fetch_data import prep_tests | |
>>> urls, fetch_data = prep_tests() | |
>>> values_file = fetch_data(urls['left_mean_curvature'], '', '.vtk') | |
>>> labels_file = fetch_data(urls['left_freesurfer_labels'], '', '.vtk') | |
>>> area_file = fetch_data(urls['left_area'], '', '.vtk') | |
>>> values, name = read_scalars(values_file, True, True) | |
>>> areas, name = read_scalars(area_file, True, True) | |
>>> labels, name = read_scalars(labels_file) | |
>>> include_labels = [] | |
>>> exclude_labels = [-1] | |
>>> weights = areas | |
>>> precision = 1 | |
>>> medians, mads, means, sdevs, skews, kurts, lower_quarts, upper_quarts, label_list = stats_per_label(values, | |
... labels, include_labels, exclude_labels, weights, precision) | |
>>> [np.float("{0:.{1}f}".format(x, 5)) for x in medians[0:5]] | |
[-1.13602, -1.22961, -2.49665, -3.80782, -3.37309] | |
>>> [np.float("{0:.{1}f}".format(x, 5)) for x in mads[0:5]] | |
[1.17026, 1.5045, 1.28234, 2.11515, 1.69333] | |
>>> [np.float("{0:.{1}f}".format(x, 5)) for x in means[0:5]] | |
[-1.1793, -1.21405, -2.49318, -3.58116, -3.34987] | |
>>> [np.float("{0:.{1}f}".format(x, 5)) for x in kurts[0:5]] | |
[2.34118, -0.3969, -0.55787, -0.73993, 0.3807] | |
""" | |
import numpy as np | |
from scipy.stats import skew, kurtosis, scoreatpercentile | |
from mindboggle.guts.compute import weighted_to_repeated_values, median_abs_dev | |
# Make sure arguments are numpy arrays: | |
if not isinstance(values, np.ndarray): | |
values = np.asarray(values) | |
if not isinstance(weights, np.ndarray): | |
weights = np.asarray(weights) | |
# Initialize all statistical lists: | |
if include_labels: | |
label_list = include_labels | |
else: | |
label_list = np.unique(labels) | |
label_list = [int(x) for x in label_list if int(x) not in exclude_labels] | |
medians = [] | |
mads = [] | |
means = [] | |
sdevs = [] | |
skews = [] | |
kurts = [] | |
lower_quarts = [] | |
upper_quarts = [] | |
# Extract all vertex indices for each label: | |
for label in label_list: | |
I = [i for i,x in enumerate(labels) if x == label] | |
if I: | |
# Get the vertex values: | |
X = values[I] | |
if len([x for x in X if x != 0]): | |
# If there are as many weights as values, apply the weights to the values: | |
if np.size(weights) == np.size(values): | |
W = weights[I] | |
sumW = np.sum(W) | |
# If the sum of the weights and the standard deviation is non-zero, | |
# compute all statistics of the weighted values: | |
if sumW > 0: | |
Xdiff = X - np.mean(X) | |
Xstd = np.sqrt(np.sum(W * Xdiff**2) / sumW) | |
means.append(np.sum(W * X) / sumW) | |
sdevs.append(Xstd) | |
if Xstd > 0: | |
skews.append((np.sum(W * Xdiff**3) / sumW) / Xstd**3) | |
kurts.append((np.sum(W * Xdiff**4) / sumW) / Xstd**4 - 3) | |
else: | |
skews.append(skew(X)) | |
kurts.append(kurtosis(X)) | |
X = weighted_to_repeated_values(X, W, precision) | |
# If the sum of the weights equals zero, simply compute the statistics: | |
else: | |
means.append(np.mean(X)) | |
sdevs.append(np.std(X)) | |
skews.append(skew(X)) | |
kurts.append(kurtosis(X)) | |
# If there are no (or not enough) weights, simply compute the statistics: | |
else: | |
means.append(np.mean(X)) | |
sdevs.append(np.std(X)) | |
skews.append(skew(X)) | |
kurts.append(kurtosis(X)) | |
# Compute median, median absolute deviation, and lower and upper quartiles: | |
if np.size(X): | |
medians.append(np.median(X)) | |
mads.append(median_abs_dev(X)) | |
lower_quarts.append(scoreatpercentile(X, 25)) | |
upper_quarts.append(scoreatpercentile(X, 75)) | |
# If the weights are all smaller than the precision, then X will disappear, | |
# so set the above statistics (in the 'if' block) to zero: | |
else: | |
medians.append(0) | |
mads.append(0) | |
lower_quarts.append(0) | |
upper_quarts.append(0) | |
# If all values are equal to zero, set all statistics to zero: | |
else: | |
medians.append(0) | |
mads.append(0) | |
means.append(0) | |
sdevs.append(0) | |
skews.append(0) | |
kurts.append(0) | |
lower_quarts.append(0) | |
upper_quarts.append(0) | |
# If there are no vertices for the label, set all statistics to zero: | |
else: | |
medians.append(0) | |
mads.append(0) | |
means.append(0) | |
sdevs.append(0) | |
skews.append(0) | |
kurts.append(0) | |
lower_quarts.append(0) | |
upper_quarts.append(0) | |
return medians, mads, means, sdevs, skews, kurts, \ | |
lower_quarts, upper_quarts, label_list | |
def count_per_label(labels, include_labels=[], exclude_labels=[]): | |
""" | |
Compute the number of times each label occurs. | |
Parameters | |
---------- | |
labels : numpy 1-D array of integers | |
labels (e.g., one label per vertex of a mesh) | |
include_labels : list of integers | |
labels to include | |
(if empty, use unique numbers in image volume file) | |
exclude_labels : list of integers | |
labels to be excluded | |
Returns | |
------- | |
unique_labels : list of integers | |
unique label numbers | |
counts : list of floats | |
number of times each label occurs | |
Examples | |
-------- | |
>>> from mindboggle.guts.compute import count_per_label | |
>>> labels = [8,8,8,8,8,10,11,12,10,10,11,11,11,12,12,12,12,13] | |
>>> include_labels = [9,10,11,12] | |
>>> exclude_labels = [13] | |
>>> unique_labels, counts = count_per_label(labels, include_labels, | |
... exclude_labels) | |
>>> unique_labels | |
[9, 10, 11, 12] | |
>>> counts | |
[0, 3, 4, 5] | |
>>> import nibabel as nb | |
>>> from mindboggle.mio.vtks import read_scalars | |
>>> from mindboggle.mio.labels import DKTprotocol | |
>>> from mindboggle.guts.compute import count_per_label | |
>>> from mindboggle.mio.fetch_data import prep_tests | |
>>> urls, fetch_data = prep_tests() | |
>>> labels_file = fetch_data(urls['freesurfer_labels'], '', '.nii.gz') | |
>>> img = nb.load(labels_file) | |
>>> hdr = img.get_header() | |
>>> labels = img.get_data().ravel() | |
>>> dkt = DKTprotocol() | |
>>> include_labels = dkt.label_numbers | |
>>> exclude_labels = [] | |
>>> unique_labels, counts = count_per_label(labels, | |
... include_labels, exclude_labels) | |
>>> counts[0:10] | |
[0, 0, 0, 972, 2414, 2193, 8329, 2941, 1998, 10906] | |
""" | |
import numpy as np | |
# Make sure labels is a numpy array: | |
if isinstance(labels, list): | |
labels = np.array(labels) | |
elif isinstance(labels, np.ndarray): | |
pass | |
else: | |
raise IOError("labels should be a numpy array.") | |
# Unique list of labels: | |
if include_labels: | |
label_list = include_labels | |
else: | |
label_list = np.unique(labels).tolist() | |
label_list = [int(x) for x in label_list if int(x) not in exclude_labels] | |
# Loop through labels: | |
unique_labels = [] | |
counts = [] | |
for ilabel, label in enumerate(label_list): | |
# Find which voxels contain the label in each volume: | |
indices = np.where(labels == label)[0] | |
count = len(indices) | |
unique_labels.append(label) | |
counts.append(count) | |
return unique_labels, counts | |
def compute_overlaps(targets, list1, list2, output_file='', save_output=True, | |
verbose=False): | |
""" | |
Compute overlap for each target between two lists of numbers. | |
Parameters | |
---------- | |
targets : list of integers | |
targets to find in lists | |
list1 : 1-D numpy array (or list) of numbers | |
list2 : 1-D numpy array (or list) of numbers | |
output_file : string | |
(optional) output file name | |
save_output : bool | |
save output file? | |
verbose : bool | |
print statements? | |
Returns | |
------- | |
dice_overlaps : numpy array | |
Dice overlap values | |
jacc_overlaps : numpy array | |
Jaccard overlap values | |
output_file : string | |
output text file name with overlap values | |
Examples | |
-------- | |
>>> import numpy as np | |
>>> from mindboggle.guts.compute import compute_overlaps | |
>>> from mindboggle.mio.labels import DKTprotocol | |
>>> list1 = [1021, 1021, 1021, 1021, 1021, 1010, 1010, 1010, 1010, 1010] | |
>>> list2 = [1003, 1021, 1021, 1021, 1021, 1021, 1003, 1003, 1003, 1003] | |
>>> dkt = DKTprotocol() | |
>>> targets = dkt.cerebrum_cortex_DKT31_numbers | |
>>> output_file = '' | |
>>> save_output = True | |
>>> verbose = False | |
>>> dice_overlaps, jacc_overlaps, output_file = compute_overlaps(targets, | |
... list1, list2, output_file, save_output, verbose) | |
>>> print('{0:.5f}'.format(dice_overlaps[18])) | |
0.80000 | |
>>> print('{0:.5f}'.format(jacc_overlaps[18])) | |
0.66667 | |
""" | |
import os | |
import numpy as np | |
import pandas as pd | |
if isinstance(list1, list): | |
list1 = np.array(list1) | |
if isinstance(list2, list): | |
list2 = np.array(list2) | |
if np.size(list1) != np.size(list2): | |
raise IOError("Files are different sizes") | |
# Initialize output: | |
dice_overlaps = np.zeros(len(targets)) | |
jacc_overlaps = np.zeros(len(targets)) | |
if save_output and not output_file: | |
output_file = os.path.join(os.getcwd(), 'ID_dice_jaccard.csv') | |
# Loop through targets: | |
for itarget, target in enumerate(targets): | |
# Find which indices contain the target: | |
list1_indices = np.where(list1 == target)[0] | |
list2_indices = np.where(list2 == target)[0] | |
len1 = len(list1_indices) | |
len2 = len(list2_indices) | |
# Compute their intersection and union: | |
len_intersection = len(np.intersect1d(list2_indices, list1_indices)) | |
len_union = len(np.union1d(list2_indices, list1_indices)) | |
# If there is at least one target in each list: | |
if len2 * len1 > 0: | |
# Compute Dice and Jaccard coefficients: | |
dice = np.float(2.0 * len_intersection) / (len2 + len1) | |
jacc = np.float(len_intersection) / len_union | |
dice_overlaps[itarget] = dice | |
jacc_overlaps[itarget] = jacc | |
if verbose: | |
print('target: {0}, dice: {1:.2f}, jacc: {2:.2f}'.format( | |
target, dice, jacc)) | |
# Save output: | |
if save_output: | |
#np.savetxt(output_file, overlaps, fmt='%d %.4f %.4f', | |
# delimiter='\t', newline='\n') | |
df1 = pd.DataFrame({'ID': targets}) | |
df2 = pd.DataFrame({'Dice overlap': dice_overlaps}) | |
df3 = pd.DataFrame({'Jaccard overlap': jacc_overlaps}) | |
df = pd.concat([df1, df2, df3], axis=1) | |
df.to_csv(output_file, index=False, encoding='utf-8') | |
return dice_overlaps, jacc_overlaps, output_file | |
def compute_image_histogram(infile, nbins=100, threshold=0.0): | |
""" | |
Compute histogram values from nibabel-readable image. | |
Parameters | |
---------- | |
infile : string | |
input nibabel-readable image file name | |
nbins : integer | |
number of bins | |
threshold : float | |
remove values lower than threshold | |
Returns | |
------- | |
histogram_values : numpy array | |
histogram bin values | |
Examples | |
-------- | |
>>> from mindboggle.guts.compute import compute_image_histogram | |
>>> from mindboggle.mio.fetch_data import prep_tests | |
>>> urls, fetch_data = prep_tests() | |
>>> labels_file = fetch_data(urls['freesurfer_labels'], '', '.nii.gz') | |
>>> nbins = 100 | |
>>> threshold = 0.5 | |
>>> histogram_values = compute_image_histogram(labels_file, nbins, | |
... threshold) | |
>>> histogram_values[0:5] | |
array([102865, 119610, 0, 0, 0]) | |
""" | |
import numpy as np | |
import nibabel as nb | |
#from pylab import plot #, hist | |
# ------------------------------------------------------------------------ | |
# Compute histogram | |
# ------------------------------------------------------------------------ | |
# Load image | |
data = nb.load(infile).get_data().ravel() | |
# Threshold image | |
if threshold > 0: | |
data = data / max(data) | |
data = data[data >= threshold] | |
# Compute histogram | |
histogram_values, bin_edges = np.histogram(data, bins=nbins) | |
# plot(list(range(len(histogram_values))), histogram_values, '-') | |
##a,b,c = hist(data, bins=nbins) | |
return histogram_values | |
# ============================================================================ | |
# Doctests | |
# ============================================================================ | |
if __name__ == "__main__": | |
import doctest | |
doctest.testmod(verbose=True) # py.test --doctest-modules |