In [1]:
import sys
sys.path.append("/teamspace/studios/this_studio/UTIL")
sys.path.append("/teamspace/studios/this_studio/")
import os
import concurrent.futures
import pandas as pd
from runEnsembles import run_ensemble
WORKSPACE = "/teamspace/studios/this_studio/"
from DNNPredict import DNNPredict
from CNNPredict import CNNPredict
DNN_test = os.path.join(WORKSPACE, "DNN_test.joblib")
CNN_test = os.path.join(WORKSPACE, "CNN_test.h5")
DNN_val2 = os.path.join(WORKSPACE, "DNN_val2.joblib")
CNN_val2 = os.path.join(WORKSPACE, "CNN_val2.h5")
import re
from IPython.display import clear_output



def process_csv(file_path):
    clear_output(wait=True)  # Apaga o output da célula
    
    """
    Processa um arquivo CSV e cria um arquivo de métricas correspondente.
    """
    metrics_file = file_path.replace('.csv', '_metrics.csv')
    
    if os.path.exists(metrics_file):
        print(f"[SKIPPED] Métricas já existem para: {file_path}")
        return
    
    print(f"[PROCESSING] {file_path}")
    
   
    row = pd.read_csv(file_path).iloc[0]
    
    ensemble_name = row["ensemble_name"]
    ensemble_length = int(row["ensemble_length"])
    
    models_types = eval(row["models_type"])
    model_files = eval(row["val2_model_path"])
    model_files = [os.path.join(WORKSPACE, file) for file in model_files]
    
   
    # Executa o ensemble
    ens_result, features, ind_accuracy = run_ensemble(model_files, DNN_test, CNN_test, DNN_val2, CNN_val2)
    
    # Prepara os dados para salvar no MongoDB
    data = []
    for result in ens_result:
        row_data = {
            "ensemble_name": ensemble_name,
            "ensemble_length": ensemble_length,
            "models_types": models_types,
            "model_files": model_files,
            "features": features,
            "ind_accuracy": ind_accuracy,
            "ensemble_method": result["ensemble_method"],
            "accuracy": result["ensemble_metrics"]["accuracy"],
            "f1": result["ensemble_metrics"]["f1"],
            "precision": result["ensemble_metrics"]["precision"],
            "recall": result["ensemble_metrics"]["recall"]
        }
        data.append(row_data)
    
    metrics_df = pd.DataFrame(data)
    metrics_df.to_csv(metrics_file, index=False)
    
    print(f"[COMPLETED] Saved metrics in {metrics_file}")

def process_directory(directory, WORKERS=10):
    """
    Gerencia workers para processar arquivos CSV em um diretório.
    """
    pattern = re.compile(r"ensemble_\d{10}\.csv$")

    # Listar e filtrar arquivos no formato correto
    csv_files = sorted(
        [os.path.join(directory, f) for f in os.listdir(directory) if pattern.match(f)],
        key=lambda x: int(os.path.splitext(os.path.basename(x))[0].split("_")[1])  # Extrai o número e ordena
    )

    
    with concurrent.futures.ThreadPoolExecutor(max_workers=WORKERS) as executor:
        futures = {executor.submit(process_csv, file): file for file in csv_files}
        
        for future in concurrent.futures.as_completed(futures):
            try:
                future.result()  # Captura exceções caso ocorram
            except Exception as e:
                print(f"Erro ao processar {futures[future]}: {e}")


process_directory("individual_ensemble_candidates", WORKERS=10)

[PROCESSING] individual_ensemble_candidates/ensemble_0000005984.csv
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[COMPLETED] Saved metrics in individual_ensemble_candidates/ensemble_0000005975_metrics.csv
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/7[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 56ms/step[COMPLETED] Saved metrics in individual_ensemble_candidates/ensemble_0000005977_metrics.csv
[COMPLETED] Saved metrics in individual_ensemble_candidates/ensemble_0000005976_metrics.csv
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m7/7[