In [9]:
import sagemaker
from sagemaker.session import Session
from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput
from sagemaker.workflow.steps import ProcessingStep
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.triggers import PipelineSchedule

In [10]:
# ─── 1) Configuração de sessão e role ─────────────────────────────────────────
sess = Session()
role = sagemaker.get_execution_role()
region = sess.boto_region_name
bucket = "experimento-lucas-barbosa"    # ajuste para o seu bucket

In [11]:
# ─── 2) Definir o ScriptProcessor ────────────────────────────────────────────
#    Este container executará o seu processar_experimentos.py
processor = ScriptProcessor(
    image_uri=f"{region}.dkr.ecr.{region}.amazonaws.com/sagemaker-scikit-learn:2.0-1-cpu-py38",
    command=["python3"],
    instance_count=1,
    instance_type="ml.t3.medium",
    role=role,
    sagemaker_session=sess
)


In [12]:
# ─── 3) Criar o ProcessingStep que roda o seu script ─────────────────────────
step_processar = ProcessingStep(
    name="ProcessarExperimentosStep",
    processor=processor,
    inputs=[
        ProcessingInput(
            source=f"s3://{bucket}/raw/",
            destination="/opt/ml/processing/input/raw/"
        )
    ],
    outputs=[
        ProcessingOutput(
            source="/opt/ml/processing/output/processed/",
            destination=f"s3://{bucket}/processed/"
        ),
        ProcessingOutput(
            source="/opt/ml/processing/output/archive/",
            destination=f"s3://{bucket}/archive/"
        )
    ],
    code="processar_experimentos.py"
)


In [13]:
# ─── 4) Montar o Pipeline (sem role_arn no construtor) ──────────────────────
pipeline = Pipeline(
    name="PipelineProcessarExperimentos",
    steps=[step_processar],
    sagemaker_session=sess
)

In [14]:
# 5) upsert() COM role_arn
pipeline.upsert(role_arn=role)
print(f"✅ Pipeline '{pipeline.name}' criado/atualizado com role {role}")



✅ Pipeline 'PipelineProcessarExperimentos' criado/atualizado com role arn:aws:iam::657444906686:role/service-role/AmazonSageMaker-ExecutionRole-20250708T150740


In [16]:
# 6) Agendamento com PipelineSchedule (agora incluindo role_arn)
#cron_body = "0 6 * * ? *" 
cron_body = "0/1 * * * ? *"
schedule = PipelineSchedule(
    name="DailyProcessarExperimentosSchedule",
    cron=cron_body
)

# Anexa o schedule, incluindo role_arn
#pipeline.put_triggers(triggers=[schedule], role_arn=role)
pipeline.put_triggers(triggers=[], role_arn=role)
print(f"✅ Agendamento '{schedule.name}' definido: cron({cron_body})")