In [None]:
import sagemaker
# Initialize SageMaker session and role
sagemaker_session = sagemaker.session.Session()
role = "arn:aws:iam::282698011778:role/service-role/AmazonSageMaker-ExecutionRole-20250701T225193"

In [None]:
from sagemaker.workflow.parameters import ParameterString, ParameterInteger

# Pipeline parameters
input_data = ParameterString(
    name="InputDataUrl", 
    default_value="s3://mlops-testing-buck"
)
processing_instance_count = ParameterInteger(
    name="ProcessingInstanceCount", 
    default_value=1
)
training_instance_count = ParameterInteger(
    name="TrainingInstanceCount", 
    default_value=1
)

In [None]:
from sagemaker.workflow.steps import ProcessingStep
from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker.sklearn.processing import SKLearnProcessor

# Define processing step

# Define sklearn processor
sklearn_processor = SKLearnProcessor(
    framework_version="0.23-1",
    role=role,
    instance_type="ml.m5.large",
    instance_count=1,  # optional, defaults to 1
    base_job_name='sklearn-preprocessing'  # optional, helps identify the processing job
)

# Define processing step
step_process = ProcessingStep(
    name="SampleProcessing",
    processor=sklearn_processor,
    inputs=[
        sagemaker.processing.ProcessingInput(
            source=input_data,
            destination="/opt/ml/processing/input"
        )
    ],
    outputs=[
        sagemaker.processing.ProcessingOutput(
            output_name="train",
            source="/opt/ml/processing/output/train"
        ),
        sagemaker.processing.ProcessingOutput(
            output_name="validation",
            source="/opt/ml/processing/output/validation"
        )
    ],
    code="processing_script.py"  # This script should exist in your directory
)


In [None]:
import sagemaker
from sagemaker.estimator import Estimator
from sagemaker.workflow.steps import TrainingStep
from sagemaker.inputs import TrainingInput
from sagemaker.session import Session

# Initialize the SageMaker session
sagemaker_session = Session()

# Get the execution role
role = "arn:aws:iam::282698011778:role/service-role/AmazonSageMaker-ExecutionRole-20250701T225193"

# Define the training instance count
training_instance_count = 1

# Define your S3 path for training data
train_data_path = "s3://mlops-testing-bucket-kulsin/sample-data.csv"

# Define XGBoost estimator with specified version
xgboost_estimator = Estimator(
    image_uri=sagemaker.image_uris.retrieve(
        "xgboost", 
        sagemaker_session.boto_region_name,
        version="1.7-1"
    ),
    role=role,
    instance_count=training_instance_count,
    instance_type='ml.m5.xlarge',
    volume_size=30,
    max_run=3600,
    input_mode='File',
    output_path=f"s3://{sagemaker_session.default_bucket()}/output"
)

# Define training step with only training data
step_train = TrainingStep(
    name="SampleTraining",
    estimator=xgboost_estimator,
    inputs={
        "train": TrainingInput(
            s3_data=train_data_path,
            content_type="csv"
        )
    }
)

In [None]:
import sagemaker
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.steps import ProcessingStep, TrainingStep
from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker.inputs import TrainingInput
from sagemaker.sklearn.processing import SKLearnProcessor
from sagemaker.sklearn import SKLearn
from sagemaker.workflow.parameters import ParameterString, ParameterInteger
from sklearn.model_selection import train_test_split
from sagemaker.workflow.triggers import PipelineSchedule
import os
import boto3

# Get the SageMaker session and role
sagemaker_session = sagemaker.Session()
role = "arn:aws:iam::282698011778:role/service-role/AmazonSageMaker-ExecutionRole-20250701T225193"

# Define pipeline parameters
input_data = ParameterString(
    name="InputData",
    default_value="s3://mlops-testing-bucket-kulsin/sample-data.csv"
)

# Get the current working directory
base_dir = os.getcwd()
processing_script_path = os.path.join(base_dir, "processing_script.py")
training_script_path = os.path.join(base_dir, "train.py")

# Define your processing step
sklearn_processor = SKLearnProcessor(
    framework_version="0.23-1",
    role=role,
    instance_type="ml.m5.large",
    instance_count=1,
    base_job_name='sklearn-processing'
)

# Create processing step
step_process = ProcessingStep(
    name="PreprocessData",
    processor=sklearn_processor,
    inputs=[
        ProcessingInput(
            source=input_data.to_string(),
            destination="/opt/ml/processing/input"
        )
    ],
    outputs=[
        ProcessingOutput(
            output_name="train",
            source="/opt/ml/processing/train"
        ),
        ProcessingOutput(
            output_name="test",
            source="/opt/ml/processing/test"
        )
    ],
    code=processing_script_path
)

# Define the estimator for training
sklearn_estimator = SKLearn(
    entry_point=training_script_path,
    framework_version="0.23-1",
    instance_type="ml.m5.large",
    role=role,
    instance_count=1,
    base_job_name='sklearn-train'
)

# Create training step
step_train = TrainingStep(
    name="TrainModel",
    estimator=sklearn_estimator,
    inputs={
        "training": TrainingInput(
            s3_data=step_process.properties.ProcessingOutputConfig.Outputs["train"].S3Output.S3Uri
        ),
        "testing": TrainingInput(
            s3_data=step_process.properties.ProcessingOutputConfig.Outputs["test"].S3Output.S3Uri
        )
    }
)

# Create the pipeline
pipeline = Pipeline(
    name="SampleDataAnalysisPipeline",
    parameters=[input_data],
    steps=[step_process, step_train],
    sagemaker_session=sagemaker_session
)

# Submit the pipeline definition
pipeline.upsert(role_arn=role)

# Start the pipeline execution
execution = pipeline.start()

# Print the execution ARN
print(f"Pipeline execution started with ARN: {execution.arn}")

# Create schedule
# cron_schedule = PipelineSchedule(
#     name="DailyPipelineSchedule",  # Added name parameter
#     cron="0 0 * * ? *",  # Daily at midnight UTC
#     kms_key_arn="arn:aws:kms:us-east-1:282698011778:key/3f2f4b4e-7ea0-482d-99f7-b35c84bba8e0"
# )

# # Apply the schedule
# pipeline.put_triggers(
#     triggers=[cron_schedule],
#     role_arn=role
# )

# print(f"Pipeline schedule created successfully")
# Create schedule with encryption

# eventbridge = boto3.client('scheduler')

# # Get AWS account ID using boto3
# sts_client = boto3.client('sts')
# account_id = sts_client.get_caller_identity()['Account']

# # Construct pipeline ARN correctly
# pipeline_arn = f"arn:aws:sagemaker:{sagemaker_session.boto_region_name}:{account_id}:pipeline/SampleDataAnalysisPipeline"

# # Create EventBridge schedule with encryption
# eventbridge = boto3.client('scheduler')

# try:
#     response = eventbridge.create_schedule(
#         Name="DailyPipelineSchedule",
#         FlexibleTimeWindow={
#             'Mode': 'OFF'
#         },
#         ScheduleExpression="cron(0 0 * * ? *)",  # Runs daily at midnight UTC
#         KmsKeyArn="arn:aws:kms:us-east-1:282698011778:key/3f2f4b4e-7ea0-482d-99f7-b35c84bba8e0",
#         Target={
#             'RoleArn': role,
#             'Arn': pipeline_arn,
#             'Input': '{}',
#             'SageMakerPipelineParameters': {
#                 'PipelineParameterList': []
#             }
#         },
#         State='ENABLED'
#     )
#     print("Pipeline schedule created successfully with encryption enabled")
#     print(f"Schedule ARN: {response.get('ScheduleArn')}")
# except Exception as e:
#     print(f"Error creating pipeline schedule: {str(e)}")

############# Working code ###############################

# # Create EventBridge scheduler client
# eventbridge = boto3.client('scheduler')

# # Get AWS account ID using boto3
# sts_client = boto3.client('sts')
# account_id = sts_client.get_caller_identity()['Account']

# # Construct pipeline ARN correctly
# pipeline_arn = f"arn:aws:sagemaker:{sagemaker_session.boto_region_name}:{account_id}:pipeline/SampleDataAnalysisPipeline"

# # Define the KMS key ARN
# kms_key_arn = "arn:aws:kms:us-east-1:282698011778:key/3f2f4b4e-7ea0-482d-99f7-b35c84bba8e0"

# try:
#     # First check if schedule exists
#     try:
#         existing_schedule = eventbridge.get_schedule(
#             Name="DailyPipelineSchedule"
#         )
#         print("Existing schedule found.")
        
#         # Check if it's already encrypted with our KMS key
#         if 'KmsKeyArn' in existing_schedule:
#             if existing_schedule['KmsKeyArn'] == kms_key_arn:
#                 print(f"Schedule is already encrypted with the compliance CMK key: {kms_key_arn}")
#                 exit()
#             else:
#                 print("Schedule is encrypted with a different key. Updating to compliance CMK...")
#                 # Update existing schedule with the correct KMS key
#                 response = eventbridge.update_schedule(
#                     Name="DailyPipelineSchedule",
#                     FlexibleTimeWindow=existing_schedule['FlexibleTimeWindow'],
#                     ScheduleExpression=existing_schedule['ScheduleExpression'],
#                     Target=existing_schedule['Target'],
#                     KmsKeyArn=kms_key_arn,
#                     State='ENABLED'
#                 )
#                 print(f"Schedule updated successfully with compliance CMK: {kms_key_arn}")
#         else:
#             print("Schedule exists but is not encrypted. Updating with CMK...")
#             # Update existing schedule with KMS key
#             response = eventbridge.update_schedule(
#                 Name="DailyPipelineSchedule",
#                 FlexibleTimeWindow=existing_schedule['FlexibleTimeWindow'],
#                 ScheduleExpression=existing_schedule['ScheduleExpression'],
#                 Target=existing_schedule['Target'],
#                 KmsKeyArn=kms_key_arn,
#                 State='ENABLED'
#             )
#             print(f"Schedule updated successfully with compliance CMK: {kms_key_arn}")
        
#     except eventbridge.exceptions.ResourceNotFoundException:
#         # If schedule doesn't exist, create new one
#         print("Schedule not found. Creating new schedule with encryption...")
#         response = eventbridge.create_schedule(
#             Name="DailyPipelineSchedule",
#             FlexibleTimeWindow={
#                 'Mode': 'OFF'
#             },
#             ScheduleExpression="cron(0 0 * * ? *)",
#             KmsKeyArn=kms_key_arn,
#             Target={
#                 'RoleArn': role,
#                 'Arn': pipeline_arn,
#                 'Input': '{}',
#                 'SageMakerPipelineParameters': {
#                     'PipelineParameterList': []
#                 }
#             },
#             State='ENABLED'
#         )
#         print(f"New schedule created and encrypted with compliance CMK: {kms_key_arn}")

# except Exception as e:
#     print(f"Error managing schedule: {str(e)}")
# ///
############# Working code end ###############################




In [None]:
import boto3
from typing import Dict, List, Optional
role = "arn:aws:iam::282698011778:role/service-role/AmazonSageMaker-ExecutionRole-20250701T225193"
def configure_sagemaker_schedule(
    schedule_name: str,
    pipeline_name: str,
    role_arn: str,
    kms_key_arn: str,
    region_name: str,
    schedule_expression: str = "cron(0 0 * * ? *)",
    pipeline_parameters: Optional[List[Dict]] = None
) -> Dict:
    """
    Configure a compliant EventBridge schedule for SageMaker pipeline with KMS encryption.

    Args:
        schedule_name (str): Name of the schedule
        pipeline_name (str): Name of the SageMaker pipeline
        role_arn (str): IAM role ARN for execution
        kms_key_arn (str): KMS key ARN for encryption
        region_name (str): AWS region name
        schedule_expression (str): Cron expression for scheduling
        pipeline_parameters (List[Dict], optional): Pipeline parameters

    Returns:
        Dict: Response from EventBridge Scheduler
    """
    # Initialize AWS clients
    eventbridge = boto3.client('scheduler')
    sts = boto3.client('sts')

    # Get account ID and construct pipeline ARN
    account_id = sts.get_caller_identity()['Account']
    pipeline_arn = f"arn:aws:sagemaker:{region_name}:{account_id}:pipeline/{pipeline_name}"

    # Prepare schedule configuration
    schedule_config = {
        'Name': schedule_name,
        'FlexibleTimeWindow': {
            'Mode': 'OFF'
        },
        'ScheduleExpression': schedule_expression,
        'KmsKeyArn': kms_key_arn,
        'Target': {
            'RoleArn': role_arn,
            'Arn': pipeline_arn,
            'Input': '{}',
            'SageMakerPipelineParameters': {
                'PipelineParameterList': pipeline_parameters or []
            }
        },
        'State': 'ENABLED'
    }

    try:
        # Check if schedule exists
        try:
            existing_schedule = eventbridge.get_schedule(Name=schedule_name)
            print(f"Found existing schedule: {schedule_name}")

            # Update if KMS key is different or missing
            if ('KmsKeyArn' not in existing_schedule or 
                existing_schedule['KmsKeyArn'] != kms_key_arn):
                print("Updating schedule with compliant KMS key...")
                response = eventbridge.update_schedule(**schedule_config)
                print(f"Schedule updated with KMS key: {kms_key_arn}")
            else:
                print("Schedule already compliant with specified KMS key")
                response = existing_schedule

        except eventbridge.exceptions.ResourceNotFoundException:
            print(f"Creating new schedule: {schedule_name}")
            response = eventbridge.create_schedule(**schedule_config)
            print(f"New schedule created with KMS key: {kms_key_arn}")

        return response

    except Exception as e:
        print(f"Error configuring schedule: {str(e)}")
        raise

# Basic schedule configuration
response = configure_sagemaker_schedule(
    schedule_name="DailyPipelineSchedule",
    pipeline_name="DailyPipelineSchedule",
    role_arn=role,
    kms_key_arn="arn:aws:kms:us-east-1:282698011778:key/3f2f4b4e-7ea0-482d-99f7-b35c84bba8e0",
    region_name="us-east-1"
)