In [19]:
import boto3
import sagemaker
from sagemaker.workflow.pipeline_context import PipelineSession

sagemaker_session = sagemaker.session.Session()
region = sagemaker_session.boto_region_name
role = sagemaker.get_execution_role()
pipeline_session = PipelineSession()
default_bucket = sagemaker_session.default_bucket()

# Define the S3 path to the dataset
input_data_uri = "s3://sagemaker-ml-28573/pipelines/train/Employee.csv"

# Initialize S3 resource
s3 = boto3.resource('s3')

# Define your S3 bucket and path
bucket_name = 'sagemaker-ml-28573'
prefix = 'pipeline'



In [20]:
from sagemaker.estimator import Estimator
from sagemaker.inputs import TrainingInput
from sagemaker.workflow.steps import TrainingStep

# Define the XGBoost Estimator
xgb_train = Estimator(
    image_uri=sagemaker.image_uris.retrieve("xgboost", boto3.Session().region_name, "1.2-1"),
    instance_type="ml.m5.xlarge",
    instance_count=1,
    output_path=f"s3://{bucket_name}/model-output",
    role=role,
    sagemaker_session=pipeline_session,
)

# Set hyperparameters
xgb_train.set_hyperparameters(
    objective="binary:logistic",
    num_round=100,
    max_depth=5,
    eta=0.2,
    gamma=4,
    min_child_weight=6,
    subsample=0.7,
)

# Define the training step
step_train = TrainingStep(
    name="EmployeeAttritionTrain",
    estimator=xgb_train,
    inputs={
        "train": TrainingInput(s3_data="s3://sagemaker-ml-28573/pipelines/train/Employee.csv", content_type="text/csv"),
    },
)


INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.


In [25]:
from sagemaker.model_metrics import MetricsSource, ModelMetrics
from sagemaker.workflow.model_step import ModelStep
from sagemaker.model import Model

# Define model metrics using the output of the evaluation step

# Define the model to be registered
model = Model(
    image_uri=xgb_train.image_uri,  # Use the same image URI as the training step
    model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
    sagemaker_session=pipeline_session,
    role=role,
)

# Register the model
register_args = model.register(
    content_types=["text/csv"],  # Content type of the input data
    response_types=["text/csv"],  # Content type of the output data
    inference_instances=["ml.t2.medium"],  # Supported inference instance types
    transform_instances=["ml.m4.xlarge"],  # Supported transform instance types
    model_package_group_name="EmployeeAttritionModelGroup23",  # Model package group name
    #approval_status=model_approval_status,  # Default approval status
   
)

# Create a ModelStep to register the model
step_register = ModelStep(
    name="EmployeeAttritionRegisterModel",
    step_args=register_args,
)


In [26]:
from sagemaker.workflow.pipeline import Pipeline

# Define the pipeline
pipeline = Pipeline(
    name="EmployeeAttritionPipeline-2",
    steps=[step_train, step_register],
)




In [23]:
import json


definition = json.loads(pipeline.definition())
definition



{'Version': '2020-12-01',
 'Metadata': {},
 'Parameters': [],
 'PipelineExperimentConfig': {'ExperimentName': {'Get': 'Execution.PipelineName'},
  'TrialName': {'Get': 'Execution.PipelineExecutionId'}},
 'Steps': [{'Name': 'EmployeeAttritionTrain',
   'Type': 'Training',
   'Arguments': {'AlgorithmSpecification': {'TrainingInputMode': 'File',
     'TrainingImage': '683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.2-1'},
    'OutputDataConfig': {'S3OutputPath': 's3://sagemaker-ml-28573/model-output'},
    'StoppingCondition': {'MaxRuntimeInSeconds': 86400},
    'ResourceConfig': {'VolumeSizeInGB': 30,
     'InstanceCount': 1,
     'InstanceType': 'ml.m5.xlarge'},
    'RoleArn': 'arn:aws:iam::448049810900:role/service-role/AmazonSageMaker-ExecutionRole-20240823T111826',
    'InputDataConfig': [{'DataSource': {'S3DataSource': {'S3DataType': 'S3Prefix',
        'S3Uri': 's3://sagemaker-ml-28573/pipelines/train/Employee.csv',
        'S3DataDistributionType': 'FullyReplicate

In [27]:

# Submit the pipeline definition to SageMaker
pipeline.upsert(role_arn=role)



{'PipelineArn': 'arn:aws:sagemaker:us-east-1:448049810900:pipeline/EmployeeAttritionPipeline-2',
 'ResponseMetadata': {'RequestId': 'e31e99c4-9c96-4b3b-81b8-63bd2ba3dae7',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'e31e99c4-9c96-4b3b-81b8-63bd2ba3dae7',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '95',
   'date': 'Wed, 04 Sep 2024 18:40:01 GMT'},
  'RetryAttempts': 0}}

In [28]:
# Start the pipeline execution
execution = pipeline.start()

# Wait for the pipeline execution to complete
execution.wait()


In [None]:
pipeline.delete()