/
analise_results.py
51 lines (47 loc) · 2.45 KB
/
analise_results.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import pandas as pd
results_file_paths = [
# 'results_reduced/result_dataset_organism_resistance_manualRemove.csv',
'results_reduced/result_dataset_organism_resistance_manualRemove_IG.csv',
# 'results_reduced/result_dataset_organism_resistance_manualRemove_noUseless.csv',
# 'results_reduced/result_dataset_organism_resistance_manualRemove_noUseless_wrapper.csv',
'results_reduced/result_dataset_organism_resistance_noUseless.csv',
'results_reduced/result_dataset_organism_resistance_noUseless_wrapper.csv',
'results_reduced/result_dataset_organism_resistance.csv',
'results_reduced/result_dataset_organism_resistance_IG.csv',
# 'results_reduced/result_dataset_organism_resistance_manual.csv'
]
mean_results = pd.DataFrame([])
classifiers_files = pd.DataFrame([])
for file in results_file_paths:
print(file)
df = pd.read_csv(file)
folds = [i for i in range(10) for _ in range(6)]
df['fold'] = folds
classifiers = df['classifier'].unique()
for classifer in classifiers:
df_classifier = df[df['classifier'] == classifer]
means = df_classifier.mean()
means['classifier'] = classifer
means['fname'] = df_classifier.iloc[0]['fname']
mean_results = mean_results.append(means, ignore_index=True)
# Get classifiers that score near to the mean
classifier_file = dict()
classifier_file['classifier'] = classifer
classifier_file['fname'] = df_classifier.iloc[0]['fname']
min_value = None
min_index = None
for index, row in df_classifier.iterrows():
abs_value = abs(row['kappa'] - means['kappa'])
if min_value is None or min_value > abs_value:
min_value = abs_value
min_index = row
classifier_file['classifier_fname'] = '{}_{}_fold{}.pkl'.format(min_index['fname'].split('.')[0],
min_index['classifier'], min_index['fold'])
classifier_file['kappa'] = means['kappa']
classifier_file['fold'] = min_index['fold']
classifier_file = pd.DataFrame(classifier_file, index=[0])
classifiers_files = classifiers_files.append(classifier_file, ignore_index=True)
mean_results = mean_results.set_index('fname').drop(columns=['fold'])
mean_results.to_csv('results_reduced/media_resultados.csv')
classifiers_files = classifiers_files.set_index('fname')
classifiers_files.to_csv('results_reduced/classifiers.csv')