In [12]:
import sagemaker
from sagemaker.session import Session
from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput
from sagemaker.workflow.steps import ProcessingStep
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.triggers import PipelineSchedule

In [13]:
sess = Session()
role = sagemaker.get_execution_role()
region = sess.boto_region_name
bucket = "experimento-lucas-barbosa"

print(f"Role: {role}")
print(f"Region: {region}")
print(f"Bucket: {bucket}")

Role: arn:aws:iam::657444906686:role/service-role/AmazonSageMaker-ExecutionRole-20250708T150740
Region: sa-east-1
Bucket: experimento-lucas-barbosa


In [14]:
# Definir ScriptProcessor para notebooks
processor = ScriptProcessor(
    image_uri=f"763104351884.dkr.ecr.{region}.amazonaws.com/pytorch-training:1.12.0-cpu-py38-ubuntu20.04",
    command=["python3"],
    instance_count=1,
    instance_type="ml.t3.medium",
    role=role,
    sagemaker_session=sess
)

In [19]:
# Célula 2: ScriptProcessor (image com Python e Jupyter)
processor = ScriptProcessor(
    image_uri=f"763104351884.dkr.ecr.{region}.amazonaws.com/pytorch-training:1.12.0-cpu-py38-ubuntu20.04",
    command=["python3"],
    instance_type='ml.t3.medium',
    instance_count=1,
    role=role,
    sagemaker_session=sess,
    base_job_name='experimentos-notebooks'
)

# Para cada notebook, usamos um wrapper Python simples que chama papermill:
# (assume run_<step>.py já criado ao lado de cada notebook)

def make_step(name, notebook):
    return ProcessingStep(
        name=name,
        processor=processor,
        inputs=[
            ProcessingInput(source=f's3://{bucket}/notebooks/{notebook}',
                            destination=f'/opt/ml/processing/input/{notebook}'),
        ],
        outputs=[
            ProcessingOutput(source='/opt/ml/processing/output/',
                             destination=f's3://{bucket}/logs/execucoes/{name}/')
        ],
        code=f'run_{notebook.replace(".ipynb","")}.py'
    )

step_config    = make_step('ConfigStep','00_config.ipynb')
step_validate  = make_step('ValidateStep','01_validar_dados.ipynb')
step_metrics   = make_step('MetricsStep','02_calcular_metricas.ipynb')
step_save      = make_step('SaveStep','03_salvar_resultados.ipynb')

In [20]:
# Montar Pipeline
pipeline = Pipeline(
    name='Experimentos_Notebooks_Pipeline',
    steps=[step_config, step_validate, step_metrics, step_save],
    sagemaker_session=sess
)
pipeline.upsert(role_arn=role)
print(f"✅ Pipeline registrado: {pipeline.name}")


ValueError: code run_00_config.py wasn't found. Please make sure that the file exists.
                    