In [3]:
import sagemaker
from sagemaker.session import Session
from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput
from sagemaker.workflow.steps import ProcessingStep
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.triggers import PipelineSchedule

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [4]:
sess = Session()
role = sagemaker.get_execution_role()
region = sess.boto_region_name
bucket = "experimento-lucas-barbosa"

print(f"Role: {role}")
print(f"Region: {region}")
print(f"Bucket: {bucket}")

Role: arn:aws:iam::657444906686:role/service-role/AmazonSageMaker-ExecutionRole-20250708T150740
Region: sa-east-1
Bucket: experimento-lucas-barbosa


In [5]:
# Definir ScriptProcessor para notebooks
processor = ScriptProcessor(
    image_uri=f"763104351884.dkr.ecr.{region}.amazonaws.com/pytorch-training:1.12.0-cpu-py38-ubuntu20.04",
    command=["python3"],
    instance_count=1,
    instance_type="ml.t3.medium",
    role=role,
    sagemaker_session=sess
)

In [6]:
# Função para criar cada ProcessingStep
# Usa o wrapper run_<notebook>.py como código
def make_step(notebook):
    step_name = notebook.replace('.ipynb','')
    wrapper   = f"run_{step_name}.py"
    return ProcessingStep(
        name=step_name,
        processor=processor,
        inputs=[
            ProcessingInput(
                source=f's3://{bucket}/notebooks/{notebook}',
                destination=f'/opt/ml/processing/input/{notebook}'
            ),
            ProcessingInput(
                source=f's3://{bucket}/notebooks/{wrapper}',
                destination=f'/opt/ml/processing/input/{wrapper}'
            )
        ],
        outputs=[
            ProcessingOutput(
                source='/opt/ml/processing/output/',
                destination=f's3://{bucket}/logs/execucoes/{step_name}/'
            )
        ],
        code=wrapper
    )

# Criar steps
step_config   = make_step('00_config.ipynb')
step_validate = make_step('01_validar_dados.ipynb')
step_metrics  = make_step('02_calcular_metricas.ipynb')
step_save     = make_step('03_salvar_resultados.ipynb')

In [7]:
# Montar e registrar Pipeline
pipeline = Pipeline(
    name='Experimentos_Notebooks_Pipeline',
    steps=[step_config, step_validate, step_metrics, step_save],
    sagemaker_session=sess
)
pipeline.upsert(role_arn=role)
print(f"✅ Pipeline registrado: {pipeline.name}")