In [None]:
import sagemaker
from sagemaker.sklearn.processing import SKLearnProcessor
from sagemaker.processing import ProcessingInput, ProcessingOutput
from datetime import datetime

session = sagemaker.Session()
region  = session.boto_region_name
role    = "arn:aws:iam::[ACCOUNT]:role/SagemakerAccess"

raw_bucket = "ai-trading-copilot-raw"
cur_bucket = "ai-trading-copilot-curated"

dates_to_process = [
    "2025-10-06", "2025-10-08", "2025-10-09", "2025-10-10",
    "2025-10-13", "2025-10-14", "2025-10-15",
]
most_recent_date = max(dates_to_process)
print(f"Using data from the most recent folder: {most_recent_date}")

processing_input = [
    ProcessingInput(
        source=f"s3://{raw_bucket}/stooq/daily/{most_recent_date}/",
        destination="/opt/ml/processing/input/raw-data/"
    )
]

processor = SKLearnProcessor(
    framework_version="1.2-1",
    role=role,
    instance_type="ml.t3.medium",
    instance_count=1,
    base_job_name="feature-engg-yearly", 
    env={
        "AWS_REGION": region,
        "RAW_BUCKET": raw_bucket,
        "CURATED_BUCKET": cur_bucket
    }
)

timestamp = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
unique_job_name = f"fe-yearly-run-{timestamp}"

# 6. Run the job
processor.run(
    code="processing/feature_builder.py",
    job_name=unique_job_name,
    arguments=["--symbols", "AAPL,MSFT,AMZN"],
    inputs=processing_input, 
    outputs=[
        ProcessingOutput(
            source="/opt/ml/processing/output",
            destination=f"s3://{cur_bucket}/features/{unique_job_name}/"
        )
    ]
)