-
Notifications
You must be signed in to change notification settings - Fork 0
/
jsmNormalise.m
186 lines (155 loc) · 5.25 KB
/
jsmNormalise.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
function [y,scaleFac] = jsmNormalise(x,method,thresh,rescale,grouping,normRef)
% x - sparse matrix of data
% method - one of the named methods
% thresh - a threshold below which data don't count
% rescale - currently not used
% grouping - necessary for the slide/section methods, differential
% scaling factors according to the
%tt = tic;
% Determine the original mean intensity of the raw data, which will be used
% for re-scaling purposes
if rescale
rescaleValue = nanmax(nanmean(x,1))
end
% Check that is of the correct size (i.e. number of variables)
if nargin > 5
if numel(normRef) ~= size(x,2)
error('normRef is incorrectly sized');
%else
%normRef = [];
%error('Not a good normRef');
end
else
normRef = [];
end
% Some methods run by groupings, e.g. s-tic, s-med. Check that we have them
% if needed, and then calculate the unique groups within.
switch lower(method)
case {'s-tic','s-med'}
if isempty(grouping)
grouping = ones(size(x,1),1);
warning('stic method won''t work!');
end
% Determine unique groupings and indices
[unq,~,unqI] = unique(grouping);
otherwise
% Grouping not required
end
% A threshold?
if isempty(thresh)
thresh = 0;
end
% First we need to determine the scaling factor
switch lower(method)
case 'tic'
% Find the sum of each pixel / spectrum
scaleFac = nansum(x,2);
case 's-tic'
% This creates a normalisation factor based on the sum of the
% entire tissue section rather than individual pixels. So we need
% to tell it which pixels belong to which files...
scaleFac = NaN(size(x,1),1);
% Run through each level to get the total sum over the grouping,
% which is typically the tissue section...
for n = 1:numel(unq)
fx = unqI == n;
tmp = full(nansum(nansum(x(fx,:))));
scaleFac(fx,1) = tmp;
end
case {'pqn-median','pqn-med','pqn-mean','pqn-mean-all'}
% A slightly different implementation of the PQN policy
tmpRef = lower(method(5:end));
% Find values above the threshold
if issparse(x)
tmp = sparse(NaN(size(x)));
else
tmp = NaN(size(x));
end
tmp(x > thresh) = x(x > thresh);
% Determine the reference spectrum, unless it is provided
if isempty(normRef)
switch tmpRef
case 'mean'
normRef = nanmean(tmp,1);
otherwise
normRef = nanmedian(tmp,1);
end
end
% Now determine the fold changes
fldchn = bsxfun(@rdivide,tmp,normRef);
% Now calculate the median of these fold changes for each sample
scaleFac = nanmedian(fldchn,2);
case 'uve'
% Find the variables with the largest mean intensity
mnn = nanmedian(x,1);
[~,bpk] = max(mnn);
scaleFac = x(:,bpk);
case 'med'
% Find the values above the threshold
if issparse(x)
tmp = sparse(NaN(size(x)));
else
tmp = NaN(size(x));
end
tmp(x > thresh) = x(x > thresh);
scaleFac = nanmedian(tmp,2);
case 's-med'
% This calculates a median value as per the grouping variable
% provided, akin to finding the sum over the whole section in stic
scaleFac = NaN(size(x,1),1);
% Run through each level to get the total sum over the grouping,
% which is typically the tissue section...
for n = 1:numel(unq)
fx = unqI == n;
tmp = x(fx,:);
tmp = median(tmp(tmp > thresh));
scaleFac(fx,1) = full(tmp);
end
case 'med-fc'
% Calculate the median over the entire set of spectra
if issparse(x)
tmp = sparse(NaN(size(x)));
else
tmp = NaN(size(x));
end
tmp(x > thresh) = x(x > thresh);
scaleFac = nanmedian(tmp,1);
case 'mean-fc'
% Calculate the mean over the entire set of spectra
if issparse(x)
tmp = sparse(NaN(size(x)));
else
tmp = NaN(size(x));
end
tmp(x > thresh) = x(x > thresh);
scaleFac = nanmean(tmp,1);
case 'raw'
% Do nothing...
y = x;
scaleFac = [];
disp('no norm');
return
otherwise
y = x;
scaleFac = [];
disp('no norm');
return
end
% Set all Inf, Nan, 0 to values of 1
scaleFac(isinf(scaleFac)) = 1;
scaleFac(isnan(scaleFac)) = 1;
scaleFac(scaleFac == 0) = 1;
% Are there any variables that sum to 0, i.e. fully empty?
zzz = sum(x,1) == 0;
% Now that we have an appropriate scaling factor, we use it!
y = bsxfun(@rdivide,x,scaleFac);
% Set empty variables to zero
y(:,zzz) = 0;
% Consider performing rescaling in order to put the spectra back to an
% intensity-appropriate level, rather than being stuck around 0-1
if rescale
y = y * rescaleValue;
end
%toc(tt);
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%