In [1]:
import os
import pandas as pd

def transform_categorical_and_boolean(data):
    """
    Transforma valores categóricos e booleanos (True e False) em 0 e 1.

    Args:
        data (pd.DataFrame): DataFrame a ser transformado.

    Returns:
        pd.DataFrame: DataFrame com valores transformados.
    """
    # Converter valores booleanos para 0 e 1
    boolean_columns = data.select_dtypes(include=['bool']).columns
    for col in boolean_columns:
        data[col] = data[col].astype(int)

    # Converter valores categóricos para 0 e 1
    categorical_columns = data.select_dtypes(include=['object']).columns
    for col in categorical_columns:
        data[col] = data[col].astype('category').cat.codes

    return data

def process_datasets(input_directory, output_directory):
    """
    Transforma valores categóricos e booleanos em 0 e 1, e salva os arquivos processados.

    Args:
        input_directory (str): Diretório contendo os arquivos .txt de entrada.
        output_directory (str): Diretório onde os arquivos transformados serão salvos.
    """
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    for filename in os.listdir(input_directory):
        if filename.endswith('.txt'):
            file_path = os.path.join(input_directory, filename)
            try:
                # Carregar o dataset
                data = pd.read_csv(file_path, delimiter=',')

                # Aplicar transformações
                data_transformed = transform_categorical_and_boolean(data)

                # Salvar o dataset transformado
                output_file_path = os.path.join(output_directory, filename)
                data_transformed.to_csv(output_file_path, index=False, sep=',')

                print(f"Arquivo transformado salvo em: {output_file_path}")

            except Exception as e:
                print(f"Erro ao processar o arquivo {filename}: {e}")

# Caminhos de entrada e saída
input_directory = r"C:\Users\CALEO\OneDrive - Hexagon\Documents\GitHub\Software_effort_estimation\proposal\algorithms\abordagens\output\0-saida"
output_directory = r"C:\Users\CALEO\OneDrive - Hexagon\Documents\GitHub\Software_effort_estimation\proposal\algorithms\abordagens\output\0-saida"

# Processar os datasets
process_datasets(input_directory, output_directory)


Arquivo transformado salvo em: C:\Users\CALEO\OneDrive - Hexagon\Documents\GitHub\Software_effort_estimation\proposal\algorithms\abordagens\output\0-saida\cocomo81.txt
Arquivo transformado salvo em: C:\Users\CALEO\OneDrive - Hexagon\Documents\GitHub\Software_effort_estimation\proposal\algorithms\abordagens\output\0-saida\encoded_china.txt
Arquivo transformado salvo em: C:\Users\CALEO\OneDrive - Hexagon\Documents\GitHub\Software_effort_estimation\proposal\algorithms\abordagens\output\0-saida\encoded_desharnais.txt
Arquivo transformado salvo em: C:\Users\CALEO\OneDrive - Hexagon\Documents\GitHub\Software_effort_estimation\proposal\algorithms\abordagens\output\0-saida\encoded_maxwell.txt
