# Instalando o Toolkit



## Verificar a versão do **Python**


In [1]:
import sys
print(sys.version)


3.12.12 (main, Oct 10 2025, 08:52:57) [GCC 11.4.0]


## Instalar ThreeWToolkit do *github*



In [5]:
!pip install --quiet --upgrade pip
!pip install --quiet git+https://github.com/petrobras/3W.git


  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


## Testar instalação

In [6]:
try:
    import ThreeWToolkit
    print("3WToolkit importado com sucesso")
    print("Versão:", ThreeWToolkit.__version__)
except ImportError as e:
    print("Falha ao importar ThreeWToolkit:", e)


3WToolkit importado com sucesso
Versão: 2.0.0


In [7]:
try:
    from ThreeWToolkit.dataset import ParquetDataset
    from ThreeWToolkit.preprocessing import ImputeMissing, Normalize
    from ThreeWToolkit.feature_extraction import ExtractStatisticalFeatures
    from ThreeWToolkit.models import SklearnModels
    from ThreeWToolkit.dataset import ParquetDatasetConfig

    print("Módulos principais importados com sucesso")
except ImportError as e:
    print("Falha ao importar módulos:", e)


Módulos principais importados com sucesso


# Baixando o dataset

In [8]:
from ThreeWToolkit.dataset import ParquetDataset, ParquetDatasetConfig

config = ParquetDatasetConfig(path="./dataset")  # folder where it will be stored
dataset = ParquetDataset(config)


[ParquetDataset] Dataset found at ./dataset
[ParquetDataset] Validating dataset integrity...
[ParquetDataset] Dataset integrity check passed!


# Criando a pipeline

In [9]:
import matplotlib.pyplot as plt

from ThreeWToolkit.pipeline import Pipeline
from ThreeWToolkit.core.base_dataset import ParquetDatasetConfig
from ThreeWToolkit.core.base_preprocessing import (
    ImputeMissingConfig,
    NormalizeConfig,
    WindowingConfig,
)
from ThreeWToolkit.core.base_assessment import ModelAssessmentConfig
from ThreeWToolkit.core.enums import TaskType
from ThreeWToolkit.models.mlp import MLPConfig
from ThreeWToolkit.trainer.trainer import TrainerConfig
# Define dataset path
dataset_path = "./dataset"

In [17]:
# Define the model configuration
config_model = MLPConfig(
    hidden_sizes=(64, 32),
    output_size=2,
    random_seed=42,
    activation_function="relu",
    regularization=None,
)

# Create the pipeline with sequential steps
pipeline = Pipeline(
    [
        # Step 1: Load Dataset
        ParquetDatasetConfig(
            path=dataset_path,
            split=None,
            force_download=False,
            columns=["T-JUS-CKP", "T-MON-CKP"],
            target_column="class",
            target_class=[0, 1],
        ),
        # Step 2: Preprocessing
        ImputeMissingConfig(strategy="median", columns=["T-JUS-CKP"]),
        NormalizeConfig(norm="l2"),
        WindowingConfig(window_size=100),
        # Step 3: Model Training
        TrainerConfig(
            optimizer="adam",
            criterion="cross_entropy",
            batch_size=32,
            epochs=10,
            seed=42,
            config_model=config_model,
            learning_rate=0.001,
            cross_validation=False,
            shuffle_train=True,
        ),
        # Step 4: Model Assessment
        ModelAssessmentConfig(
            metrics=["balanced_accuracy", "precision", "recall", "f1"],
            task_type=TaskType.CLASSIFICATION,
            export_results=True,
            generate_report=True,           # Important option to enable report generation through the Pipeline class
        ),
    ]
)

[ParquetDataset] Dataset found at ./dataset
[ParquetDataset] Validating dataset integrity...
[ParquetDataset] Dataset integrity check passed!
>> ['T-JUS-CKP', 'T-MON-CKP']


In [None]:
import time
# Execute the entire pipeline
start_time = time.perf_counter()
pipeline.run()
end_time = time.perf_counter()

elapsed_seconds = end_time - start_time
print(f"Tempo total da pipeline: {elapsed_seconds:.2f} s ({elapsed_seconds/60:.2f} min)")


[Pipeline] Processing batches:   0%|          | 0/722 [00:00<?, ?file/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]

[Pipeline] Preprocessing steps:   0%|          | 0/3 [00:00<?, ?step/s]