/
summarizeSamplingResults.m
227 lines (177 loc) · 8.14 KB
/
summarizeSamplingResults.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
function [stats, statsR] = summarizeSamplingResults(modelA,modelB,path,nFiles,pointsPerFile,starting_model,dataGenes,show_rxns,fonts,hist_per_page,bin,fileNameA,fileNameB)
% The function summarizes the sampling results of the two models `modelA` and
% `modelB`. Subsequently, it returnes medians from the two models' sampling,
% FVA results for the respective reaction, simple test results (`stats`).
% Finally, histograms are produced, one for each reactions containing the
% distributions of the two models. The set of analyzed reactions can be
% limited (`show_rxns`). Reactions associated with genes of special interest,
% e.g. differentially expressed genes, can be marked to facilitate the
% analysis, and simplify the identification of interesting histograms.
%
% USAGE:
%
% [stats, statsR] = summarizeSamplingResults(modelA, modelB, path, nFiles, pointsPerFile, starting_model, dataGenes, show_rxns, fonts, hist_per_page, bin, fileNameA, fileNameB)
%
% INPUTS:
% modelA: Sampled modelA (condition 1)
% modelB: Sampled modelB (condition 2)
% path: Path to sampling output files
% nFiles: Number of files saved, e.g., 20;
% pointsPerFilePoints: Points saved per file, e.g., 5000;
% starting_model: Original metabolic model (Recon)
% dataGenes: Gene set, whose associated reactions should be emphasized by color, e.g., alternatively spliced or differentially expressed genes
%
% OPTIONAL INPUTS:
% show_rxns: Vector of reactions that should be displayed, e.g., certain pathways or non-loop reactions
% fonts: Font size (default = 9)
% hist_per_page: Defines the number of histogramms that are printed per page, e.g., 4, 9, or 25 (default = 9)
% bin: Binning for histogramm (default = 30)
% fileNameA: Name of files the sampling points are stored in
% fileNameB: Name of files the sampling points are stored in
%
% OUTPUTS:
% stats: Statistics from the sampling (Columns: Median modelA, Median modelB, minFlux_A, maxFlux_A, minFlux_B, maxFlux_B )
% statsR: Reaction vector for stats output
%
% Depends on COBRA functions: `loadsamples`, `findRxnsFromGenes`, flux
% variability analysis / `fastFVA`
%
% .. Author: - Maike K. Aurich 22/08/15
if ~exist('hist_per_page','var') %% default settings
hist_per_page=9;
end
if ~exist('fonts','var')
fonts=9;
end
if ~exist('bin','var')
bin = 30;
end
if ~exist('fileNameA','var')
fileNameA = modelA;
end
if ~exist('fileNameB','var')
fileNameB = modelB;
end
%% Summarize sampling results
samples_A = loadSamples([path filesep fileNameA], nFiles, pointsPerFile);
samples_B = loadSamples([path filesep fileNameB], nFiles, pointsPerFile);
%% Find the reactions associated, e.g., with the alternatively spliced or differentially expressed genes
Transcript = starting_model.genes;
cnt = 1;
for i = 1 : length(Transcript); % 1905
a=regexp(Transcript{i,1},'\.','split');
if ~isempty(char(a));
if ~isempty(find(dataGenes== str2num(char(a(1)))));
j = find(dataGenes == str2num(char(a(1))));
Genes2Transcripts(j,cnt) = 1;
ExpressionData.Transcript{cnt,1} = Transcript{i,1};
ExpressionData.Locus(cnt,1) = dataGenes(j(1));
cnt = cnt +1;
end
end
end
if ~exist('ExpressionData')
Rxns_List_reg={};
else
[results ListResults] = findRxnsFromGenes(starting_model, ExpressionData.Transcript,1,1);
Rxns_List_reg = unique(ListResults(:,1));
end
%% find reactions that overlap
%Reactions = find(ismember(modelA.rxns, modelB.rxns)); %find indices
ReactionsName = modelA.rxns(find(ismember(modelA.rxns, modelB.rxns))); % find reaction names for figure titles
%%%
Reg_modelA_rxns= (find(ismember(modelA.rxns,Rxns_List_reg)));
Reg_modelA_rxnsNames= modelA.rxns(find(ismember(modelA.rxns, Rxns_List_reg)));
%%%
Reg_modelB_rxns= (find(ismember(modelB.rxns,Rxns_List_reg)));
Reg_modelB_rxnsNames= modelB.rxns(find(ismember(modelB.rxns, Rxns_List_reg)));
%% run FVAs on reduced models
[minFlux_modelB,maxFlux_modelB] = fluxVariability(modelB,0);
[minFlux_modelA,maxFlux_modelA] = fluxVariability(modelA,0);
FVA_results.minFlux_modelA = minFlux_modelA;
FVA_results.maxFlux_modelA = maxFlux_modelA;
FVA_results.minFlux_modelB = minFlux_modelB;
FVA_results.maxFlux_modelB = maxFlux_modelB;
%% For all common reations of the subset of NL
title_modelA = regexprep(modelA.rxns,'_','- ');
if ~exist('show_rxns'); % if a subset of reactions is not defined, use all reactions found in both models
show_rxns = ReactionsName;
end
%reactions to illustration
Reactions_show_rxns= ReactionsName(find(ismember(ReactionsName,show_rxns)));
clear Reactions
%find indices in model A and B with reaction names
Reactions= find(ismember(modelA.rxns,Reactions_show_rxns));
Reactions_modelA_Names= modelA.rxns(find(ismember(modelA.rxns,Reactions_show_rxns)));
Reactions_modelB = (find(ismember(modelB.rxns,Reactions_show_rxns )));
%Reactions_modelB_Names = modelB.rxns(find(ismember(modelB.rxns,Reactions_show_rxns )));
%% make figure (automatically saved to pdf)
pages = length(show_rxns)/hist_per_page;
v=0;
for g=1:pages % see if pages works
figure;
for i = 1:hist_per_page
% save the last page even if page is not full
if i+v>length(Reactions_modelA_Names);
fprintf('Saving results to %s.png.\n', [path filesep 'sampling_page' num2str(g)]);
saveas(gcf, [path filesep 'sampling_page' num2str(g)], 'pdf');
break
end
%define size of subplot
if hist_per_page==4
subplot (2,2,i)
elseif hist_per_page==9
subplot (3,3,i)
elseif hist_per_page==5
subplot (5,5,i);
end
% position of rxns in modelA
%j=find(ismember(modelA.rxns,(Reactions_modelA_Names(i+v))));
j=Reactions(i+v);
MC = median(samples_A(j,:));
%k= find(ismember(modelB.rxns,(Reactions_modelB_Names(i+v))));
k= Reactions_modelB(i+v);
MM = median(samples_B(k,:));
[N,X] = hist(samples_A(j,:),bin);
plot(X,N, 'b');set(gca,'FontSize',fonts)
hold on
[N,X] = hist(samples_B(k,:),bin);
plot(X,N, 'r');set(gca,'FontSize',fonts)
minFlux_A = FVA_results.minFlux_modelA(j,1);
maxFlux_A = FVA_results.maxFlux_modelA(j,1);
minFlux_B = FVA_results.minFlux_modelB(k,1);
maxFlux_B = FVA_results.maxFlux_modelB(k,1);
% collect statistics for output and illustrations
stats(i+v,1)= MC;
stats(i+v,2)= MM;
stats(i+v,3)= minFlux_A;
stats(i+v,4)= maxFlux_A;
stats(i+v,5)= minFlux_B;
stats(i+v,6)= maxFlux_B;
statsR(i+v,1) = modelA.rxns(j);
%% color if is Data-gene associated reaction
if ismember(modelA.rxns{j},Reg_modelA_rxnsNames)
titleString = [title_modelA{j}];
titleString2 =['FVA(modelA): ' num2str(minFlux_A) ',' num2str(maxFlux_A)];
titleString3 =['FVA(modelB): ' num2str(minFlux_B) ',' num2str(maxFlux_B)];
titleString4 = ['Median(modelA): ' num2str(MC)];
titleString5 = ['Median(modelB): ' num2str(MM) ];
title(titleString, 'Color','r');
h= title(strvcat(titleString , titleString2 ,titleString3, titleString4 ,titleString5),'Color','r' );
set(h,'fontsize',fonts)
else
titleString = [title_modelA{j}];
titleString2 =['FVA(modelA): ' num2str(minFlux_A) ',' num2str(maxFlux_A)];
titleString3 =['FVA(modelB): ' num2str(minFlux_B) ',' num2str(maxFlux_B)];
titleString4 =['Median(modelA): ' num2str(MC)];
titleString5 =['Median(modelB): ' num2str(MM)];
h= title(strvcat(titleString , titleString2,titleString3, titleString4 ,titleString5));
set(h,'fontsize',fonts);
end
end
fprintf('Saving results to %s.png.\n', [path filesep 'sampling_page' num2str(g)]);
saveas(gcf, [path filesep 'sampling_page' num2str(g)], 'pdf');
close
v=v+hist_per_page;
end
end