-
Notifications
You must be signed in to change notification settings - Fork 4
/
outfind.py
174 lines (151 loc) · 5.85 KB
/
outfind.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
""" Module with routines for finding outliers
"""
from pathlib import Path
import numpy as np
import nibabel as nib
from skimage.filters import threshold_otsu
from .metrics import dvars,dvars_voxel
from .detectors import iqr_detector,mad_voxel_detector,mad_time_detector
def segment_brain(img):
""" Segments brain region from background and returns only brain voxels
Parameters
----------
img : array
2D array with voxels in rows and timepoints in columns
Returns
-------
thresholded_img : array
2D array containing only brain voxels in rows and timepoints in columns
"""
# calculate the mean of each voxel over time
mean_img = np.mean(img, axis=-1)
# calculate the threshold for segmenting brain from background
threshold = threshold_otsu(mean_img)
mask = mean_img > threshold
# filter only brain voxels
brain_voxels = img[mask]
return brain_voxels
def detect_outliers_mad_median_absolute_deviation_mask(fname):
""" Detect outliers given image file path 'filename'
Parameters
----------
fname : str or Path
Filename of 4D image, as string or Path object
Returns
-------
outliers : array
Indices of outlier volumes.
"""
# A mask is used to first segment the brain regions from the background, then median absolute deviation is used to detect outliers
img = nib.load(fname)
img_data = img.get_fdata()
# reshape from 4D to 2D
img_data_2D = np.reshape(img_data, (-1,img_data.shape[-1]))
# segment brain from background
brain_voxels = segment_brain(img_data_2D)
# find the outlying voxels
outliers_voxel = mad_voxel_detector(brain_voxels)
# calculate the number of outlying voxels for each time point
voxel_outliers_per_time = np.nansum(outliers_voxel,axis=0)
# find the outliers in the time-series
outliers_time = mad_time_detector(voxel_outliers_per_time, lower_bound=False)
# Return indices of True values from Boolean array.
return np.nonzero(outliers_time)
def detect_outliers_mad_dvars_mask(fname):
""" Detect outliers given image file path 'filename'
Parameters
----------
fname : str or Path
Filename of 4D image, as string or Path object
Returns
-------
outliers : array
Indices of outlier volumes.
"""
# A mask is used to first segment the brain regions from the background, dvars is calculated and then median absolute deviation is used to detect outliers
img = nib.load(fname)
img_data = img.get_fdata()
# reshape from 4D to 2D
img_data_2D = np.reshape(img_data, (-1,img_data.shape[-1]))
# segment brain from background
brain_voxels = segment_brain(img_data_2D)
# calculate dvars
dvs = dvars_voxel(brain_voxels)
# detect outliers
is_outlier = mad_time_detector(dvs, lower_bound=True)
return np.nonzero(is_outlier)
def detect_outliers_mad_sliding_window_mask(fname):
""" Detect outliers given image file path 'filename'
Parameters
----------
fname : str or Path
Filename of 4D image, as string or Path object
Returns
-------
outliers : array
Indices of outlier volumes.
"""
# A mask is used to first segment the brain regions from the background, sliding window approach is used to detect outliers in each window using median absolute deviation
img = nib.load(fname)
img_data = img.get_fdata()
# reshape from 4D to 2D
img_data_2D = np.reshape(img_data, (-1,img_data.shape[-1]))
# segment brain from background
brain_voxels = segment_brain(img_data_2D)
# apply sliding window
overlap = 10
window_length = 20
outliers = []
for i in range(0, brain_voxels.shape[-1],overlap):
if i+window_length>=brain_voxels.shape[-1]:
elements = np.mean(brain_voxels[:,i:],axis=0)
outliers_1 = np.nonzero(mad_time_detector(elements, lower_bound=True))[0] + i
outliers.extend(outliers_1)
break
else:
elements = np.mean(brain_voxels[:,i:i+window_length],axis=0)
outliers_1 = np.nonzero(mad_time_detector(elements, lower_bound=True))[0] + i
outliers.extend(outliers_1)
return np.unique(outliers)
def detect_outliers(fname):
""" Detect outliers given image file path `filename`
Parameters
----------
fname : str or Path
Filename of 4D image, as string or Path object
Returns
-------
outliers : array
Indices of outlier volumes.
"""
# This is a very simple function, using dvars and iqroutliers
img = nib.load(fname)
dvs = dvars(img)
is_outlier = iqr_detector(dvs, iqr_proportion=2)
# Return indices of True values from Boolean array.
return np.nonzero(is_outlier)
def find_outliers(data_directory):
""" Return filenames and outlier indices for images in `data_directory`.
Parameters
----------
data_directory : str
Directory containing containing images.
Returns
-------
outlier_dict : dict
Dictionary with keys being filenames and values being lists of outliers
for filename.
"""
image_fnames = Path(data_directory).glob("**/sub-*.nii.gz")
outlier_dict = {}
for fname in image_fnames:
# detect outliers using mad over voxels and mad over time points
outliers_mad = detect_outliers_mad_median_absolute_deviation_mask(fname)
# detect outliers using dvars and mad over time points
outliers_dvars = detect_outliers_mad_dvars_mask(fname)
# detect outliers using sliding window and mad over time points
outliers_sliding_window = np.array(detect_outliers_mad_sliding_window_mask(fname))
outliers = np.intersect1d(outliers_sliding_window,np.union1d(outliers_mad, outliers_dvars))
#outliers = detect_outliers(fname)
outlier_dict[fname] = outliers
return outlier_dict