-
Notifications
You must be signed in to change notification settings - Fork 1
/
analyse_regions_summary.py
92 lines (59 loc) · 2.72 KB
/
analyse_regions_summary.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import numpy as np
import pandas as pd
from collections import OrderedDict
from merge import RESULTS_PATH
from rpy2.robjects import pandas2ri, r
from rpy2.robjects.packages import importr
RESAMPLERS = ['CCR', 'RB-CCR-L', 'RB-CCR-E', 'RB-CCR-H']
CLASSIFIERS = ['CART', 'KNN', 'L-SVM', 'R-SVM', 'P-SVM', 'LR', 'NB', 'R-MLP', 'L-MLP']
METRICS = ['Precision', 'Recall', 'Specificity', 'AUC', 'F-measure', 'G-mean']
P_VALUE = 0.10
def test_friedman_shaffer(dictionary):
df = pd.DataFrame(dictionary)
columns = df.columns
pandas2ri.activate()
importr('scmamp')
rFriedmanTest = r['friedmanTest']
rPostHocTest = r['postHocTest']
initial_results = rFriedmanTest(df)
posthoc_results = rPostHocTest(df, test='friedman', correct='shaffer', use_rank=True)
ranks = np.array(posthoc_results[0])[0]
p_value = initial_results[2][0]
corrected_p_values = np.array(posthoc_results[2])
ranks_dict = {col: rank for col, rank in zip(columns, ranks)}
corrected_p_values_dict = {}
for outer_col, corr_p_val_vect in zip(columns, corrected_p_values):
corrected_p_values_dict[outer_col] = {}
for inner_col, corr_p_val in zip(columns, corr_p_val_vect):
corrected_p_values_dict[outer_col][inner_col] = corr_p_val
return ranks_dict, p_value, corrected_p_values_dict
def load_final_dict(classifier, metric):
df = pd.read_csv(RESULTS_PATH / 'results_final.csv')
df = df[(df['Classifier'] == classifier) & (df['Metric'] == metric)]
measurements = OrderedDict()
datasets = df['Dataset'].unique()
for resampler in RESAMPLERS:
measurements[resampler] = []
for dataset in datasets:
scores = df[(df['Resampler'] == resampler) & (df['Dataset'] == dataset)]['Score']
assert len(scores) == 10, len(scores)
measurements[resampler].append(np.mean(scores))
return measurements
if __name__ == '__main__':
for classifier_name in CLASSIFIERS:
line = [classifier_name]
for metric in METRICS:
d = load_final_dict(classifier_name, metric)
ranks_dict, p_value, corrected_p_values_dict = test_friedman_shaffer(d)
min_key = min(ranks_dict, key=ranks_dict.get)
max_key = max(ranks_dict, key=ranks_dict.get)
if min_key == 'CCR':
region = 'LEH'
else:
region = min_key[7]
if corrected_p_values_dict[min_key]['CCR'] <= P_VALUE:
region = '%s \\textsubscript{++}' % region
elif any([v <= P_VALUE for v in corrected_p_values_dict[min_key].values()]):
region = '%s \\textsubscript{+}' % region
line.append(region)
print(' & '.join(line) + '\\\\')