In [1]:
# Par√¢metros do pipeline
bucket_name = "experimento-lucas-barbosa"  # Nome do bucket S3
pipeline_name = "ExperimentoPipeline"  # Nome do pipeline
instance_type = "ml.m5.large"  # Tipo de inst√¢ncia para processamento
wait_for_completion = True  # Se deve aguardar conclus√£o ou executar ass√≠ncrono
# Notebooks a serem executados
config_notebook = "00_config.ipynb"
validacao_notebook = "01_validar_dados.ipynb"
metricas_notebook = "02_calcular_metricas.ipynb"
salvar_notebook = "03_salvar_resultados.ipynb"

In [2]:
import boto3
import sagemaker
from sagemaker.session import Session
from sagemaker import get_execution_role
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.notebook_job_step import NotebookJobStep
from sagemaker.processing import ScriptProcessor
from datetime import datetime
import time

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [3]:
# Configura√ß√£o inicial
sess = Session()
role = get_execution_role()
region = sess.boto_region_name
bucket = bucket_name

print(f"üîß Configura√ß√£o do ambiente:")
print(f"  Role: {role}")
print(f"  Region: {region}")
print(f"  Bucket: {bucket}")
print(f"  Pipeline: {pipeline_name}")
print(f"  Instance type: {instance_type}")

üîß Configura√ß√£o do ambiente:
  Role: arn:aws:iam::657444906686:role/service-role/AmazonSageMaker-ExecutionRole-20250708T150740
  Region: sa-east-1
  Bucket: experimento-lucas-barbosa
  Pipeline: ExperimentoPipeline
  Instance type: ml.m5.large


In [4]:
# Para sa-east-1 - SageMaker Distribution
image_uri = "567556641782.dkr.ecr.sa-east-1.amazonaws.com/sagemaker-distribution-prod:1-cpu"

In [5]:
# Etapa 1: Configura√ß√£o
# Gera o arquivo config.json com as configura√ß√µes do experimento
step_config = NotebookJobStep(
    name="ConfigStep",
    notebook_job_name=f"config-job-{int(time.time())}",
    input_notebook=config_notebook,
    image_uri=image_uri,
    role=role,
    instance_type=instance_type,
    kernel_name="python3",
    parameters={
        "BUCKET_NAME": bucket_name,
        "PASTA_RAW": "raw/",
        "PASTA_PROCESSED": "processed/",
        "PASTA_ARCHIVE": "archive/"
    }
)

print("‚úì Etapa 1 (Config) definida")
print(f"  Notebook: {config_notebook}")
print(f"  Par√¢metros: bucket={bucket_name}")

‚úì Etapa 1 (Config) definida
  Notebook: 00_config.ipynb
  Par√¢metros: bucket=experimento-lucas-barbosa


In [6]:
# Etapa 2: Valida√ß√£o de dados
# Valida arquivos CSV na pasta raw/ e prepara para processamento
step_validacao = NotebookJobStep(
    name="ValidacaoStep",
    notebook_job_name=f"validacao-job-{int(time.time())}",
    input_notebook=validacao_notebook,
    image_uri=image_uri,
    role=role,
    instance_type=instance_type,
    kernel_name="python3",
    parameters={
        "input_config_path": "config.json",
        "bucket_name": bucket_name,
        "pasta_raw": "raw/",
        "output_validation_file": "validacao_resultado.json"
    }
)

print("‚úì Etapa 2 (Valida√ß√£o) definida")
print(f"  Notebook: {validacao_notebook}")
print(f"  Depende de: ConfigStep")

‚úì Etapa 2 (Valida√ß√£o) definida
  Notebook: 01_validar_dados.ipynb
  Depende de: ConfigStep


In [7]:
# Etapa 3: C√°lculo de m√©tricas
# Calcula m√©tricas b√°sicas e lift para cada grupo do experimento
step_metricas = NotebookJobStep(
    name="MetricasStep",
    notebook_job_name=f"metricas-job-{int(time.time())}",
    input_notebook=metricas_notebook,
    image_uri=image_uri,
    role=role,
    instance_type=instance_type,
    kernel_name="python3",
    parameters={
        "input_config_path": "config.json",
        "input_validacao_path": "validacao_resultado.json",
        "output_metricas_path": "metricas_completas.csv"
    }
)

print("‚úì Etapa 3 (M√©tricas) definida")
print(f"  Notebook: {metricas_notebook}")
print(f"  Depende de: ValidacaoStep")

‚úì Etapa 3 (M√©tricas) definida
  Notebook: 02_calcular_metricas.ipynb
  Depende de: ValidacaoStep


In [8]:
# Etapa 4: Salvar resultados
# Salva m√©tricas finais no S3 e gera logs de execu√ß√£o
step_salvar = NotebookJobStep(
    name="SalvarStep",
    notebook_job_name=f"salvar-job-{int(time.time())}",
    input_notebook=salvar_notebook,
    image_uri=image_uri,
    role=role,
    instance_type=instance_type,
    kernel_name="python3",
    parameters={
        "input_config_path": "config.json",
        "input_metricas_path": "metricas_completas.csv",
        "bucket_name": bucket_name,
        "pasta_processed": "processed/",
        "pasta_logs": "logs/"
    }
)

print("‚úì Etapa 4 (Salvar) definida")
print(f"  Notebook: {salvar_notebook}")
print(f"  Depende de: MetricasStep")

‚úì Etapa 4 (Salvar) definida
  Notebook: 03_salvar_resultados.ipynb
  Depende de: MetricasStep


In [9]:
# Definir depend√™ncias entre etapas
step_validacao.add_depends_on([step_config])
step_metricas.add_depends_on([step_validacao])
step_salvar.add_depends_on([step_metricas])

print("‚úì Depend√™ncias configuradas:")
print("  ConfigStep ‚Üí ValidacaoStep ‚Üí MetricasStep ‚Üí SalvarStep")

‚úì Depend√™ncias configuradas:
  ConfigStep ‚Üí ValidacaoStep ‚Üí MetricasStep ‚Üí SalvarStep


In [10]:
# Criar o pipeline
pipeline = Pipeline(
    name=pipeline_name,
    steps=[
        step_config,
        step_validacao,
        step_metricas,
        step_salvar
    ],
    sagemaker_session=sess
)

print("‚úÖ Pipeline criado com sucesso")
print(f"  Nome: {pipeline.name}")
print(f"  Etapas: {len(pipeline.steps)}")
print(f"  Sess√£o: {sess.boto_region_name}")

# Mostrar resumo das etapas
print("\nüìã Resumo das etapas:")
for i, step in enumerate(pipeline.steps, 1):
    print(f"  {i}. {step.name}")

‚úÖ Pipeline criado com sucesso
  Nome: ExperimentoPipeline
  Etapas: 4
  Sess√£o: sa-east-1

üìã Resumo das etapas:
  1. ConfigStep
  2. ValidacaoStep
  3. MetricasStep
  4. SalvarStep


In [11]:
# Executar o pipeline
try:
    print("üöÄ Iniciando execu√ß√£o do pipeline...")
    
    # Primeiro, criar/atualizar a defini√ß√£o do pipeline
    pipeline.upsert(role_arn=role)
    print("‚úì Pipeline registrado no SageMaker")
    
    # Iniciar execu√ß√£o
    execution = pipeline.start()
    print(f"‚úì Execu√ß√£o iniciada: {execution.arn}")
    
    # Aguardar conclus√£o (opcional)
    if wait_for_completion:
        print("‚è≥ Aguardando conclus√£o do pipeline...")
        print("   (Isso pode levar alguns minutos)")
        
        # Monitorar progresso
        start_time = time.time()
        while True:
            try:
                status = execution.describe()['PipelineExecutionStatus']
                elapsed = int(time.time() - start_time)
                print(f"   Status: {status} (tempo: {elapsed}s)")
                
                if status in ['Succeeded', 'Failed', 'Stopped']:
                    break
                    
                time.sleep(30)  # Aguardar 30 segundos antes de verificar novamente
                
            except Exception as e:
                print(f"   Erro ao verificar status: {str(e)}")
                break
        
        final_status = execution.describe()['PipelineExecutionStatus']
        if final_status == 'Succeeded':
            print(f"‚úÖ Pipeline conclu√≠do com sucesso!")
        else:
            print(f"‚ùå Pipeline falhou com status: {final_status}")
            
    else:
        print("üîÑ Pipeline executando em modo ass√≠ncrono")
        print(f"   Acompanhe o progresso no SageMaker Studio")
        print(f"   ARN: {execution.arn}")
    
except Exception as e:
    print(f"‚ùå Erro na execu√ß√£o do pipeline: {str(e)}")
    print("\nüîç Detalhes do erro:")
    import traceback
    traceback.print_exc()
    
    print("\nüí° Dicas para debugging:")
    print("1. Verifique se todos os notebooks t√™m a c√©lula 'parameters' marcada")
    print("2. Confirme se o bucket S3 existe e tem as permiss√µes corretas")
    print("3. Verifique os logs no CloudWatch ou SageMaker Studio")
    print("4. Confirme se a role tem permiss√µes para SageMaker e S3")

üöÄ Iniciando execu√ß√£o do pipeline...
‚úì Pipeline registrado no SageMaker
‚úì Execu√ß√£o iniciada: arn:aws:sagemaker:sa-east-1:657444906686:pipeline/ExperimentoPipeline/execution/35wpot930fbw
‚è≥ Aguardando conclus√£o do pipeline...
   (Isso pode levar alguns minutos)
   Status: Executing (tempo: 0s)
   Status: Executing (tempo: 30s)
   Status: Executing (tempo: 60s)
   Status: Executing (tempo: 90s)
   Status: Executing (tempo: 120s)
   Status: Executing (tempo: 150s)
   Status: Executing (tempo: 180s)
   Status: Executing (tempo: 210s)
   Status: Executing (tempo: 240s)
   Status: Executing (tempo: 270s)
   Status: Executing (tempo: 300s)
   Status: Executing (tempo: 331s)
   Status: Executing (tempo: 361s)
   Status: Failed (tempo: 391s)
‚ùå Pipeline falhou com status: Failed
