In [None]:
from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput
import sagemaker
from sagemaker.experiments.run import Run, load_run
import time
from datetime import datetime

sagemaker.__version__

In [None]:
session = sagemaker.Session()
sm = session.sagemaker_client

experiment_name = "team1-index-predictor-data-collection"
trial_suffix = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
trial_name = f"data-collection-{trial_suffix}"

bucket_name = "team1-index-predictor-bucket"
bucket_prefix = "data/raw"
data_s3_url = f"s3://{bucket_name}/{bucket_prefix}/data"
data_inference_s3_url = f"s3://{bucket_name}/{bucket_prefix}/inference"

processing_instance_type = "ml.t3.medium"
processing_instance_count = 1

In [None]:
with Run(
    experiment_name=experiment_name,
    run_name=trial_name,
    run_display_name="Data Collection",
    sagemaker_session=session,
) as run:
    experiment_config = run.experiment_config

In [None]:
script_processor = ScriptProcessor(
    command=["python3"],
    image_uri="492215442770.dkr.ecr.eu-central-1.amazonaws.com/sagemaker-scikit-learn:1.0-1-cpu-py3",
    role=sagemaker.get_execution_role(),
    instance_count=processing_instance_count,
    instance_type=processing_instance_type,
)

script_processor.run(
    code="../src/data/collector.py",
    inputs=[],
    outputs=[
        ProcessingOutput(
            source="/opt/ml/processing/output",
            destination=data_inference_s3_url,
            output_name="inference",
        )
    ],
    arguments=[
        "--data-inference-s3-url",
        data_inference_s3_url,
    ],
    experiment_config=experiment_config,
)