In [31]:
import os
import pandas as pd

def consolidate_files(directory: str, search_string: str, output_file: str, 
                      sort_by: str = None, ascending: bool = True):
    """
    Consolida arquivos de um diretório cujo nome contém a string especificada em um único arquivo de saída.
    
    Parâmetros:
    - directory (str): Caminho do diretório a ser percorrido.
    - search_string (str): Parte do nome dos arquivos a serem consolidados.
    - output_file (str): Caminho do arquivo final consolidado.
    - sort_by (str, opcional): Nome da coluna para ordenação (aplicável para CSV e JSON).
    - ascending (bool, opcional): Se True, ordena em ordem crescente; se False, ordem decrescente.
    
    Retorna:
    - DataFrame consolidado (se arquivos forem CSV ou JSON), caso contrário retorna None.
    """
    # Criar lista de arquivos que contêm a string no nome
    file_list = [os.path.join(directory, f) for f in os.listdir(directory) if search_string in f]

    if not file_list:
        print("Nenhum arquivo encontrado.")
        return None

    # Verificar a extensão do primeiro arquivo para decidir o método de consolidação
    ext = os.path.splitext(file_list[0])[-1].lower()

    df_consolidated = None  # Inicializa a variável que conterá o DataFrame final
    
    if ext in ['.csv']:
        # Consolida arquivos CSV
        df_list = [pd.read_csv(file) for file in file_list]
        df_consolidated = pd.concat(df_list, ignore_index=True)

    elif ext in ['.json']:
        # Consolida arquivos JSON
        df_list = [pd.read_json(file) for file in file_list]
        df_consolidated = pd.concat(df_list, ignore_index=True)

    elif ext in ['.txt']:
        # Consolida arquivos TXT
        with open(output_file, 'w', encoding='utf-8') as outfile:
            for file in file_list:
                with open(file, 'r', encoding='utf-8') as infile:
                    outfile.write(infile.read() + '\n')
        print(f"Consolidação concluída! Arquivo salvo em: {output_file}")
        return None  # Retorna None para arquivos TXT

    else:
        print(f"Extensão {ext} não suportada para consolidação.")
        return None

    # Se houver um campo de ordenação, ordena antes de salvar
    if sort_by and sort_by in df_consolidated.columns:
        df_consolidated = df_consolidated.sort_values(by=sort_by, ascending=ascending)

    # Salvar no CSV final
    df_consolidated.to_csv(output_file, index=False)
    
    print(f"Consolidação concluída! Arquivo salvo em: {output_file}")
    
    return df_consolidated  # Retorna o DataFrame consolidado

df_consolidated = consolidate_files("individual_ensemble_candidates", "metrics", "consolidate.csv", "accuracy", False )
df_consolidated

Consolidação concluída! Arquivo salvo em: consolidate.csv


Unnamed: 0,ensemble_name,ensemble_length,models_types,model_files,features,ind_accuracy,ensemble_method,accuracy,f1,precision,recall
1299,ensemble_0000002934,4,"['CNN-3', 'CNN-3', 'CNN-3', 'DNN-6']",['/teamspace/studios/this_studio/CNN_MODEL_TRA...,"[['CNN_MFCC_with_Deltas', 'CNN_Spectral_Contra...","[0.7037037037037037, 0.6898148148148148, 0.703...",Soft Voting,0.810185,0.809684,0.821723,0.810185
893,ensemble_0000002931,4,"['CNN-3', 'CNN-3', 'CNN-3', 'DNN-6']",['/teamspace/studios/this_studio/CNN_MODEL_TRA...,"[['CNN_MFCC_with_Deltas', 'CNN_Spectral_Contra...","[0.7037037037037037, 0.6898148148148148, 0.703...",Soft Voting,0.810185,0.809684,0.821723,0.810185
734,ensemble_0000002932,4,"['CNN-3', 'CNN-3', 'CNN-3', 'DNN-6']",['/teamspace/studios/this_studio/CNN_MODEL_TRA...,"[['CNN_MFCC_with_Deltas', 'CNN_Spectral_Contra...","[0.7037037037037037, 0.6898148148148148, 0.703...",Soft Voting,0.810185,0.810074,0.823559,0.810185
2824,ensemble_0000002936,4,"['CNN-3', 'CNN-3', 'CNN-3', 'DNN-6']",['/teamspace/studios/this_studio/CNN_MODEL_TRA...,"[['CNN_MFCC_with_Deltas', 'CNN_Spectral_Contra...","[0.7037037037037037, 0.6898148148148148, 0.703...",Soft Voting,0.810185,0.808889,0.820429,0.810185
529,ensemble_0000002930,4,"['CNN-3', 'CNN-3', 'CNN-3', 'DNN-6']",['/teamspace/studios/this_studio/CNN_MODEL_TRA...,"[['CNN_MFCC_with_Deltas', 'CNN_Spectral_Contra...","[0.7037037037037037, 0.6898148148148148, 0.703...",Soft Voting,0.810185,0.810074,0.823559,0.810185
...,...,...,...,...,...,...,...,...,...,...,...
58,ensemble_0000003521,3,"['CNN-3', 'CNN-3', 'CNN-3']",['/teamspace/studios/this_studio/CNN_MODEL_TRA...,"[['CNN_CQT_Spectrogram', 'CNN_Chromagram', 'CN...","[0.6805555555555556, 0.5787037037037037, 0.560...",Soft Voting,0.601852,0.596232,0.660281,0.601852
5959,ensemble_0000005177,4,"['CNN-3', 'DNN-6', 'DNN-6', 'DNN-6']",['/teamspace/studios/this_studio/CNN_MODEL_TRA...,"[['CNN_Chromagram', 'CNN_MFCC_with_Deltas', 'C...","[0.5925925925925926, 0.6620370370370371, 0.643...",Soft Voting,0.601852,0.605919,0.680158,0.601852
219,ensemble_0000000841,3,"['CNN-3', 'DNN-6', 'DNN-6']",['/teamspace/studios/this_studio/CNN_MODEL_TRA...,"[['CNN_Chromagram', 'CNN_MFCC_with_Deltas', 'C...","[0.5925925925925926, 0.6990740740740741, 0.666...",Soft Voting,0.601852,0.606246,0.679624,0.601852
2701,ensemble_0000000812,3,"['CNN-3', 'DNN-6', 'DNN-6']",['/teamspace/studios/this_studio/CNN_MODEL_TRA...,"[['CNN_Chromagram', 'CNN_MFCC_with_Deltas', 'C...","[0.5925925925925926, 0.6620370370370371, 0.699...",Soft Voting,0.597222,0.601902,0.679645,0.597222


In [32]:
df_consolidated.to_csv("best_models.csv")

In [19]:
files = df_consolidated.iloc[0]["features"]
files

"[['CNN_MFCC_with_Deltas', 'CNN_Spectral_Contrast'], ['CNN_MFCC_with_Deltas'], ['CNN_CQT_Spectrogram', 'CNN_Chromagram', 'CNN_Spectral_Contrast'], ['mfcc20', 'spectral_centroid', 'shimmer', 'spectral_contrast']]"

In [27]:
filtro = "[['CNN_CQT_Spectrogram', 'CNN_MFCC_with_Deltas'], ['rms', 'mfcc13', 'mfcc40', 'speech_rate'], ['rms', 'mfcc13', 'mfcc20', 'spectral_contrast'], ['rms', 'mfcc20', 'shimmer', 'spectral_contrast']]"
df_filtrado = df_consolidated[df_consolidated["features"] == filtro]
df_filtrado


Unnamed: 0,ensemble_name,ensemble_length,models_types,model_files,features,ind_accuracy,ensemble_method,accuracy,f1,precision,recall
1502,ensemble_0000001997,4,"['CNN-3', 'DNN-6', 'DNN-6', 'DNN-6']",['/teamspace/studios/this_studio/CNN_MODEL_TRA...,"[['CNN_CQT_Spectrogram', 'CNN_MFCC_with_Deltas...","[0.7037037037037037, 0.6620370370370371, 0.648...",Soft Voting,0.694444,0.690529,0.707065,0.694444
