-
Notifications
You must be signed in to change notification settings - Fork 4
/
mean.py
executable file
·49 lines (39 loc) · 1.91 KB
/
mean.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/usr/bin/env python
"""
datasink: A Pipeline for Large-Scale Heterogeneous Ensemble Learning
Copyright (C) 2013 Sean Whalen
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see [http://www.gnu.org/licenses/].
"""
from os import mkdir
from os.path import abspath, exists
from sys import argv
from pandas import DataFrame, concat
from sklearn.externals.joblib import Parallel, delayed
import common
def mean_aggregation(fold):
train_df, train_labels, test_df, test_labels = common.read_fold(path, fold)
ids = test_df.index.get_level_values('id')
labels = test_df.index.get_level_values('label')
predictions = test_df.mean(axis = 1)
diversity = common.diversity_score(test_df.values)
return DataFrame({'id': ids, 'label': labels, 'fold': fold, 'prediction': predictions, 'diversity': diversity})
path = abspath(argv[1])
assert exists(path)
if not exists('%s/analysis' % path):
mkdir('%s/analysis' % path)
p = common.load_properties(path)
fold_count = int(p['foldCount'])
predictions = Parallel(n_jobs = -1, verbose = 0)(delayed(mean_aggregation)(fold) for fold in range(fold_count))
predictions_df = concat(predictions)
predictions_df['method'] = 'mean'
predictions_df.to_csv('%s/analysis/mean.csv' % path, index = False)
print '%.3f' % predictions_df.groupby('fold').apply(lambda x: common.score(x.label, x.prediction)).mean()