# Definições

In [1]:
dir_results = '/media/rafael/DadosCompartilhados/experimentos/experimentos_marcio/Novos/LLGC'
dir_output = '/media/rafael/DadosCompartilhados/experimentos/experimentos_marcio/Resultados'
base_name = 'GP_LLGC'
medidas = ['Accuracy', 'Macro_F1', 'Micro_F1'] 

# Importações de bibliotecas

In [2]:
import pandas as pd 
import numpy as np 
import os
from tqdm import tqdm

# Funções

In [3]:
def list_files_results(dir_results):
    list_files = []
    for root, dirs, files in os.walk(dir_results, topdown=True): 
        for file in files: 
            if file == 'resultadoFinal.csv': 
                list_files.append(os.path.join(root,file))
    return list_files

In [4]:
def create_dfs(medidas): 
    dfs = {}
    for medida in medidas:
        dfs[medida] = pd.DataFrame()
    return dfs

In [5]:
def create_metrics(df): 
    metrics = {}
    metrics['Accuracy'] = df.iloc[0]['Accuracy']
    metrics['Macro Precision'] = df.iloc[0]['Macro Precision']
    metrics['Macro Recall'] = df.iloc[0]['Macro Recall']
    metrics[' Micro Precision'] = df.iloc[0][' Micro Precision']
    metrics[' Micro Recall'] = df.iloc[0][' Micro Recall']
    metrics['Macro_F1'] = (2 * metrics['Macro Precision'] * metrics['Macro Recall']) / (metrics['Macro Precision'] + metrics['Macro Recall'])
    metrics['Micro_F1'] = (2 * metrics[' Micro Precision'] * metrics[' Micro Recall']) / (metrics[' Micro Precision'] + metrics[' Micro Recall'])
    return metrics

In [6]:
def create_column(df): 
    column = [np.nan for i in range(len(df))]
    return column 

In [7]:
def create_new_row_with_series(parametro, metricas, medida, dataset): 
    new_row = pd.Series({parametro: metricas[medida]}, name=dataset)
    return new_row

In [8]:
def create_new_row_with_loc(parametro, metricas, medida, series): 
    new_row = series
    new_row[parametro] = metricas[medida]
    return new_row

In [9]:
def statistics(series): 
    series['Máximo'] = series.max()
    series['Mínimo'] = series.min()
    series['Média'] = series.mean()
    series['Desvio Padrão'] = series.std()
    return series    

# Processamento

In [10]:
dfs = create_dfs(medidas)
for file in tqdm(list_files_results(dir_results)): 
    df = pd.read_csv(file)
    metrics = create_metrics(df)
    partes = file.split('/')
    dataset = partes[-2]
    parametro = partes[-3]
    for medida in medidas: 
        metricas = create_metrics(df)
        if parametro not in dfs[medida]: 
            dfs[medida][parametro] = create_column(dfs[medida])
        if dataset not in dfs[medida].index: 
            new_row = create_new_row_with_series(parametro, metricas, medida, dataset)
            dfs[medida] = dfs[medida].append(new_row)
        else: 
            new_row = create_new_row_with_loc(parametro, metricas, medida, dfs[medida].loc[dataset])
            df.loc[dataset] = new_row
print('\nDone!')

100%|██████████| 2722/2722 [00:20<00:00, 133.33it/s]
Done!



# Gerando Estatísticas

In [11]:
for medida in medidas: 
    dfs[medida] = dfs[medida].fillna(0) 
    #dfs[medida] = dfs[medida].dropna
    dfs[medida] = dfs[medida].apply(statistics, axis=1)
print('Done!')

Done!


# Salvando os Resultados

In [12]:
for medida in medidas: 

    df = dfs[medida]
    df[df.columns[1:]] = df[df.columns[1:]].replace(to_replace=0, value=np.nan)
    df = df.dropna()
    df.to_csv(os.path.join(dir_output, f'{medida}_{base_name}.csv'))
print('Done!')

Done!


# Área de testes

In [13]:
df_test = pd.read_csv('/media/rafael/DadosCompartilhados/experimentos/experimentos_marcio/Resultados/Accuracy_GP_LLGC.csv')

In [14]:
df_test

Unnamed: 0.1,Unnamed: 0,param1,param10,param11,param12,param13,param14,param15,param16,param17,...,param4,param5,param6,param7,param8,param9,Máximo,Mínimo,Média,Desvio Padrão
0,1005_glass,65.883721,71.232558,65.395349,64.023256,70.953488,70.186047,70.953488,66.813953,64.720930,...,68.325581,67.069767,70.069767,71.604651,71.395349,69.906977,71.604651,61.348837,67.502114,3.374809e+00
1,1011_ecoli,99.837037,99.851852,99.925926,99.733333,99.777778,99.600000,99.777778,99.733333,99.703704,...,100.000000,99.851852,99.955556,99.614815,99.644444,99.718519,100.000000,99.600000,99.789226,1.210942e-01
2,1045_kc1-top5,94.153846,92.444444,92.102564,89.504274,93.333333,93.025641,90.358974,91.589744,92.923077,...,94.017094,94.017094,93.435897,92.581197,93.641026,90.495726,94.153846,89.504274,92.335664,1.572130e+00
3,1048_jEdit_4.2_4.3,65.824324,66.824324,66.824324,66.324324,68.297297,66.878378,66.418919,65.864865,64.608108,...,68.459459,64.783784,66.391892,68.756757,68.243243,68.243243,68.756757,64.608108,66.649263,1.332164e+00
4,1059_ar1,87.102041,90.938776,90.571429,91.061224,88.530612,89.714286,90.775510,91.346939,90.000000,...,81.673469,80.653061,91.306122,89.918367,91.346939,91.387755,91.387755,78.040816,88.248609,4.382940e+00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
134,1524_vertebra-column,99.290323,99.129032,99.677419,99.677419,99.677419,98.967742,99.677419,97.870968,99.177419,...,99.290323,98.935484,99.161290,99.435484,99.387097,99.290323,99.774194,97.548387,99.133431,6.434762e-01
135,1559_breast-tissue,79.862069,89.609195,77.057471,77.011494,77.011494,77.011494,76.827586,60.827586,58.988506,...,77.839080,81.149425,90.298851,89.379310,89.793103,89.241379,90.298851,58.988506,76.735632,1.117501e+01
136,1565_heart-h,63.713080,63.713080,63.713080,63.713080,63.713080,63.713080,63.713080,63.713080,63.713080,...,63.713080,63.713080,63.713080,63.713080,63.713080,63.713080,63.713080,63.713080,63.713080,1.429137e-14
137,1600_SPECTF,78.485981,88.934579,92.411215,92.897196,92.616822,90.635514,89.364486,79.495327,80.728972,...,82.112150,79.046729,90.859813,91.551402,88.579439,91.663551,92.897196,77.850467,85.745964,5.725589e+00


In [15]:
df_test[df_test.columns[1:]].replace(to_replace=0, value=np.nan).dropna()

Unnamed: 0,param1,param10,param11,param12,param13,param14,param15,param16,param17,param18,...,param4,param5,param6,param7,param8,param9,Máximo,Mínimo,Média,Desvio Padrão
0,65.883721,71.232558,65.395349,64.023256,70.953488,70.186047,70.953488,66.813953,64.720930,62.790698,...,68.325581,67.069767,70.069767,71.604651,71.395349,69.906977,71.604651,61.348837,67.502114,3.374809e+00
1,99.837037,99.851852,99.925926,99.733333,99.777778,99.600000,99.777778,99.733333,99.703704,99.822222,...,100.000000,99.851852,99.955556,99.614815,99.644444,99.718519,100.000000,99.600000,99.789226,1.210942e-01
2,94.153846,92.444444,92.102564,89.504274,93.333333,93.025641,90.358974,91.589744,92.923077,92.615385,...,94.017094,94.017094,93.435897,92.581197,93.641026,90.495726,94.153846,89.504274,92.335664,1.572130e+00
3,65.824324,66.824324,66.824324,66.324324,68.297297,66.878378,66.418919,65.864865,64.608108,65.500000,...,68.459459,64.783784,66.391892,68.756757,68.243243,68.243243,68.756757,64.608108,66.649263,1.332164e+00
4,87.102041,90.938776,90.571429,91.061224,88.530612,89.714286,90.775510,91.346939,90.000000,91.183673,...,81.673469,80.653061,91.306122,89.918367,91.346939,91.387755,91.387755,78.040816,88.248609,4.382940e+00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
133,94.032258,91.903226,93.854839,94.500000,94.887097,93.935484,93.483871,92.435484,90.516129,92.145161,...,93.032258,85.564516,93.548387,91.983871,95.919355,93.000000,95.919355,85.564516,92.515396,2.577470e+00
134,99.290323,99.129032,99.677419,99.677419,99.677419,98.967742,99.677419,97.870968,99.177419,97.548387,...,99.290323,98.935484,99.161290,99.435484,99.387097,99.290323,99.774194,97.548387,99.133431,6.434762e-01
135,79.862069,89.609195,77.057471,77.011494,77.011494,77.011494,76.827586,60.827586,58.988506,61.057471,...,77.839080,81.149425,90.298851,89.379310,89.793103,89.241379,90.298851,58.988506,76.735632,1.117501e+01
136,63.713080,63.713080,63.713080,63.713080,63.713080,63.713080,63.713080,63.713080,63.713080,63.713080,...,63.713080,63.713080,63.713080,63.713080,63.713080,63.713080,63.713080,63.713080,63.713080,1.429137e-14


In [21]:
df_test[df_test.columns[1:]] = df_test[df_test.columns[1:]].replace(to_replace=0, value=np.nan)
df_test = df_test.dropna()

In [22]:
df_test

Unnamed: 0.1,Unnamed: 0,param1,param10,param11,param12,param13,param14,param15,param16,param17,...,param4,param5,param6,param7,param8,param9,Máximo,Mínimo,Média,Desvio Padrão
0,1005_glass,65.883721,71.232558,65.395349,64.023256,70.953488,70.186047,70.953488,66.813953,64.720930,...,68.325581,67.069767,70.069767,71.604651,71.395349,69.906977,71.604651,61.348837,67.502114,3.374809e+00
1,1011_ecoli,99.837037,99.851852,99.925926,99.733333,99.777778,99.600000,99.777778,99.733333,99.703704,...,100.000000,99.851852,99.955556,99.614815,99.644444,99.718519,100.000000,99.600000,99.789226,1.210942e-01
2,1045_kc1-top5,94.153846,92.444444,92.102564,89.504274,93.333333,93.025641,90.358974,91.589744,92.923077,...,94.017094,94.017094,93.435897,92.581197,93.641026,90.495726,94.153846,89.504274,92.335664,1.572130e+00
3,1048_jEdit_4.2_4.3,65.824324,66.824324,66.824324,66.324324,68.297297,66.878378,66.418919,65.864865,64.608108,...,68.459459,64.783784,66.391892,68.756757,68.243243,68.243243,68.756757,64.608108,66.649263,1.332164e+00
4,1059_ar1,87.102041,90.938776,90.571429,91.061224,88.530612,89.714286,90.775510,91.346939,90.000000,...,81.673469,80.653061,91.306122,89.918367,91.346939,91.387755,91.387755,78.040816,88.248609,4.382940e+00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
133,1523_vertebra-column,94.032258,91.903226,93.854839,94.500000,94.887097,93.935484,93.483871,92.435484,90.516129,...,93.032258,85.564516,93.548387,91.983871,95.919355,93.000000,95.919355,85.564516,92.515396,2.577470e+00
134,1524_vertebra-column,99.290323,99.129032,99.677419,99.677419,99.677419,98.967742,99.677419,97.870968,99.177419,...,99.290323,98.935484,99.161290,99.435484,99.387097,99.290323,99.774194,97.548387,99.133431,6.434762e-01
135,1559_breast-tissue,79.862069,89.609195,77.057471,77.011494,77.011494,77.011494,76.827586,60.827586,58.988506,...,77.839080,81.149425,90.298851,89.379310,89.793103,89.241379,90.298851,58.988506,76.735632,1.117501e+01
136,1565_heart-h,63.713080,63.713080,63.713080,63.713080,63.713080,63.713080,63.713080,63.713080,63.713080,...,63.713080,63.713080,63.713080,63.713080,63.713080,63.713080,63.713080,63.713080,63.713080,1.429137e-14
