-
Notifications
You must be signed in to change notification settings - Fork 8
/
team_training_code.m
176 lines (151 loc) · 5.01 KB
/
team_training_code.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
function model = team_training_code(input_directory,output_directory) % train_ECG_leads_classifier
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Purpose: Train ECG leads and obtain classifier models
% for 12-lead, 6-lead, 3-lead, 4-lead and 2-lead ECG sets
% Inputs:
% 1. input_directory
% 2. output_directory
%
% Outputs:
% model: trained model
% Logistic regression models for different sets of leads
%
% Author: Erick Andres Perez Alday, PhD, <perezald@ohsu.edu>
% Version 1.0 Aug-2020
% Revision History
% By: Nadi Sadr, PhD, <nadi.sadr@dbmi.emory.edu>
% Version 2.0 1-Dec-2020
% Version 2.2 25-Jan-2021
% Version 2.3 26-April-2021
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Define lead sets (e.g 12, 6, 4, 3 and 2 lead ECG sets)
twelve_leads = [{'I'}, {'II'}, {'III'}, {'aVR'}, {'aVL'}, {'aVF'}, {'V1'}, {'V2'}, {'V3'}, {'V4'}, {'V5'}, {'V6'}];
six_leads = [{'I'}, {'II'}, {'III'}, {'aVR'}, {'aVL'}, {'aVF'}];
four_leads = [{'I'}, {'II'}, {'III'}, {'V2'}];
three_leads = [{'I'}, {'II'}, {'V2'}];
two_leads = [{'I'}, {'II'}];
lead_sets = {twelve_leads, six_leads, four_leads, three_leads, two_leads};
disp('Loading data...')
% Find files.
input_files = {};
features =[];
for f = dir(input_directory)'
if exist(fullfile(input_directory, f.name), 'file') == 2 && f.name(1) ~= '.' && all(f.name(end - 2 : end) == 'mat')
input_files{end + 1} = f.name;
end
end
% Extract classes from dataset.
% read number of unique classes
classes = get_classes(input_directory,input_files);
num_classes = length(classes); % number of classes
num_files = length(input_files);
Total_data = cell(1,num_files);
Total_header= cell(1,num_files);
%% Load data recordings and header files
% Iterate over files.
disp('Training model..')
label=zeros(num_files,num_classes);
for i = 1:num_files
disp([' ', num2str(i), '/', num2str(num_files), '...'])
% Load data.
file_tmp = strsplit(input_files{i},'.');
tmp_input_file = fullfile(input_directory, file_tmp{1});
[data,header_data] = load_challenge_data(tmp_input_file);
%% Check the number of available ECG leads
tmp_hea = strsplit(header_data{1},' ');
num_leads = str2num(tmp_hea{2});
[leads, leads_idx] = get_leads(header_data,num_leads);
%% Extract features
tmp_features = get_features(data,header_data,leads_idx);
features(i,:) = tmp_features(:);
%% Extract labels
for j = 1 : length(header_data)
if startsWith(header_data{j},'#Dx')
tmp = strsplit(header_data{j},': ');
% Extract more than one label if avialable
tmp_c = strsplit(tmp{2},',');
for k=1:length(tmp_c)
idx=find(strcmp(classes,tmp_c{k}));
label(i,idx)=1;
end
break
end
end
end
%% train logistic regression models for the lead sets
for i=1:length(lead_sets)
% Train ECG model
disp(['Training ',num2str(length(lead_sets{i})),'-lead ECG model...'])
num_leads = length(lead_sets{i});
[leads, leads_idx] = get_leads(header_data,num_leads);
% Features = [1:12] features from 12 ECG leads + Age + Sex
Features_leads_idx = [leads_idx,13,14];
Features_leads = features(:,Features_leads_idx);
model = mnrfit(Features_leads,label,'model','hierarchical');
save_ECGleads_model(model,output_directory,classes,num_leads);
end
end
function save_ECGleads_model(model,output_directory,classes,num_leads) %save_ECG_model
% Save results.
tmp_file = [num2str(num_leads),'_lead_ecg_model.mat'];
filename = fullfile(output_directory,tmp_file);
save(filename,'model','classes','-v7.3');
disp('Done.')
end
function save_ECGleads_features(features,output_directory) %save_ECG_model
% Save results.
tmp_file = 'features.mat';
filename=fullfile(output_directory,tmp_file);
save(filename,'features');
end
% find unique number of classes
function classes = get_classes(input_directory,files)
classes={};
num_files = length(files);
k=1;
for i = 1:num_files
g = strrep(files{i},'.mat','.hea');
input_file = fullfile(input_directory, g);
fid=fopen(input_file);
tline = fgetl(fid);
tlines = cell(0,1);
while ischar(tline)
tlines{end+1,1} = tline;
tline = fgetl(fid);
if startsWith(tline,'#Dx')
tmp = strsplit(tline,': ');
tmp_c = strsplit(tmp{2},',');
for j=1:length(tmp_c)
idx2 = find(strcmp(classes,tmp_c{j}));
if isempty(idx2)
classes{k}=tmp_c{j};
k=k+1;
end
end
break
end
end
fclose(fid);
end
classes=sort(classes);
end
function [data,tlines] = load_challenge_data(filename)
% Opening header file
fid=fopen([filename '.hea']);
if (fid<=0)
disp(['error in opening file ' filename]);
end
tline = fgetl(fid);
tlines = cell(0,1);
while ischar(tline)
tlines{end+1,1} = tline;
tline = fgetl(fid);
end
fclose(fid);
f=load([filename '.mat']);
try
data = f.val;
catch ex
rethrow(ex);
end
end