# Rodar todos os códigos do diretório

In [2]:
import os
import nbformat
from nbconvert.preprocessors import ExecutePreprocessor
from concurrent.futures import ThreadPoolExecutor, as_completed

def run_notebook(notebook_path, output_path=None):
    """
    Executa um notebook e salva opcionalmente o resultado.
    """
    try:
        with open(notebook_path, 'r', encoding='utf-8') as f:
            notebook = nbformat.read(f, as_version=4)
        
        executor = ExecutePreprocessor(timeout=None, kernel_name='python3')
        executor.preprocess(notebook, {'metadata': {'path': './'}})
        
        if output_path:
            with open(output_path, 'w', encoding='utf-8') as f:
                nbformat.write(notebook, f)
        print(f"Executado: {notebook_path}")
    except Exception as e:
        print(f"Erro ao executar {notebook_path}: {e}")

def save_notebook_outputs_to_txt(notebook_path, output_txt_path):
    """
    Lê um notebook executado e salva as saídas no formato TXT.
    """
    try:
        with open(notebook_path, 'r', encoding='utf-8') as f:
            notebook_content = nbformat.read(f, as_version=4)
        
        with open(output_txt_path, 'w', encoding='utf-8') as txt_file:
            for cell in notebook_content['cells']:
                if cell['cell_type'] == 'code' and 'outputs' in cell:
                    txt_file.write("\n# Saída\n")
                    for output in cell['outputs']:
                        if 'text' in output:
                            txt_file.write(output['text'] + '\n')
                        elif 'data' in output and 'text/plain' in output['data']:
                            txt_file.write(output['data']['text/plain'] + '\n')
        print(f"Saída salva: {output_txt_path}")
    except Exception as e:
        print(f"Erro ao salvar saída de {notebook_path}: {e}")

def process_notebook(notebook_path, output_notebook_path, output_txt_path):
    """
    Função combinada para execução e salvamento da saída do notebook.
    """
    run_notebook(notebook_path, output_notebook_path)
    save_notebook_outputs_to_txt(output_notebook_path, output_txt_path)

def parallel_execute_notebooks(notebook_dir, output_dir, max_threads=4):
    """
    Paraleliza a execução de notebooks utilizando threads.
    """
    os.makedirs(output_dir, exist_ok=True)

    notebook_files = [f for f in os.listdir(notebook_dir) if f.endswith('.ipynb')]
    tasks = []

    with ThreadPoolExecutor(max_threads) as executor:
        for notebook_file in notebook_files:
            notebook_path = os.path.join(notebook_dir, notebook_file)
            output_notebook_path = os.path.join(output_dir, notebook_file)
            output_txt_path = os.path.join(output_dir, notebook_file.replace('.ipynb', '.txt'))
            
            # Submetendo tarefa
            tasks.append(executor.submit(
                process_notebook, notebook_path, output_notebook_path, output_txt_path
            ))

        for future in as_completed(tasks):
            try:
                future.result()
            except Exception as e:
                print(f"Erro durante execução paralela: {e}")

# Diretório de notebooks e saída
input_notebook_dir = r"C:\Users\CALEO\OneDrive - Hexagon\Documents\GitHub\Software_effort_estimation\proposal\algorithms\processamento"
output_dir = r"C:\Users\CALEO\OneDrive - Hexagon\Documents\GitHub\Software_effort_estimation\proposal\algorithms\processamento\output"

# Executar notebooks em paralelo
parallel_execute_notebooks(input_notebook_dir, output_dir, max_threads=4)


Executado: C:\Users\CALEO\OneDrive - Hexagon\Documents\GitHub\Software_effort_estimation\proposal\algorithms\processamento\processamento.ipynb
Saída salva: C:\Users\CALEO\OneDrive - Hexagon\Documents\GitHub\Software_effort_estimation\proposal\algorithms\processamento\output\processamento.txt


In [4]:
import pandas as pd
import re

# Caminho do arquivo
processamento_output_txt = r"C:\Users\CALEO\OneDrive - Hexagon\Documents\GitHub\Software_effort_estimation\proposal\algorithms\processamento\output\processamento.txt"

# Inicializar lista para armazenar os resultados
results = []

# Processar o arquivo
with open(processamento_output_txt, 'r', encoding='utf-8') as file:
    for line in file:
        line = line.strip()
        
        # Detectar nome do dataset e modelo
        if line.startswith("Arquivo:"):
            parts = line.split(" - ")
            current_dataset = parts[0].split(":")[1].strip()
            current_model = parts[1] if len(parts) > 1 else None
            
            # Redefinir métricas para o novo bloco
            mse = None
            rmse = None
            mae = None
            mape = None
            r2 = None
            explained_var = None
            max_err = None

        # Detectar métricas
        elif "Mean Squared Error" in line:
            mse = float(re.search(r"[-+]?\d*\.\d+|\d+", line).group())
        elif "Root Mean Squared Error" in line:
            rmse = float(re.search(r"[-+]?\d*\.\d+|\d+", line).group())
        elif "Mean Absolute Error" in line:
            mae = float(re.search(r"[-+]?\d*\.\d+|\d+", line).group())
        elif "Mean Absolute Percentage Error" in line:
            mape = float(re.search(r"[-+]?\d*\.\d+|\d+", line).group())
        elif "R² Score" in line:
            r2 = float(re.search(r"[-+]?\d*\.\d+|\d+", line).group())
        elif "Explained Variance Score" in line:
            explained_var = float(re.search(r"[-+]?\d*\.\d+|\d+", line).group())
        elif "Max Error" in line:
            max_err = float(re.search(r"[-+]?\d*\.\d+|\d+", line).group())
        
        # Adicionar resultados ao final do bloco
        elif line.startswith("--------------------------------------------------"):
            results.append({
                "Dataset": current_dataset,
                "Model": current_model,
                "MSE": mse,
                "RMSE": rmse,
                "MAE": mae,
                "MAPE": mape,
                "R²": r2,
                "Explained Variance": explained_var,
                "Max Error": max_err,
            })

# Criar DataFrame
df_results = pd.DataFrame(results)

# Salvar os resultados em um arquivo CSV
df_results.to_csv('tabela_resultados_processamento.csv', index=False)

# Exibir os primeiros resultados diretamente no terminal
print(df_results.head(10))


                                  Dataset  \
0                    0-saida/cocomo81.txt   
1               0-saida/encoded_china.txt   
2          0-saida/encoded_desharnais.txt   
3             0-saida/encoded_maxwell.txt   
4            1-saida/2-saida_cocomo81.txt   
5       1-saida/2-saida_encoded_china.txt   
6  1-saida/2-saida_encoded_desharnais.txt   
7     1-saida/2-saida_encoded_maxwell.txt   
8                   10-saida/cocomo81.txt   
9              10-saida/encoded_china.txt   

                                             Model   MSE  RMSE   MAE  MAPE  \
0  Regressão ElasticNet com KFold (100 repetições)  None  None  None  None   
1  Regressão ElasticNet com KFold (100 repetições)  None  None  None  None   
2  Regressão ElasticNet com KFold (100 repetições)  None  None  None  None   
3  Regressão ElasticNet com KFold (100 repetições)  None  None  None  None   
4  Regressão ElasticNet com KFold (100 repetições)  None  None  None  None   
5  Regressão ElasticNet com KFold (1