In [None]:
import boto3
import sagemaker
from sagemaker.workflow.pipeline_context import PipelineSession

sagemaker_session = sagemaker.session.Session()
region = sagemaker_session.boto_region_name
role = sagemaker.get_execution_role()
pipeline_session = PipelineSession()
default_bucket = sagemaker_session.default_bucket()

# Define the S3 path to the dataset
bucket_name = 'kennys-testing-bucket'
input_data_uri = f's3://{bucket_name}/training-data/Employee.csv'

# Initialize S3 resource
s3 = boto3.resource('s3')

# Define your S3 bucket and path
prefix = 'pipeline'



In [None]:
from sagemaker.estimator import Estimator
from sagemaker.inputs import TrainingInput
from sagemaker.workflow.steps import TrainingStep

# Define the XGBoost Estimator
xgb_train = Estimator(
    image_uri=sagemaker.image_uris.retrieve("xgboost", boto3.Session().region_name, "1.2-1"),
    instance_type="ml.m5.xlarge",
    instance_count=1,
    output_path=f"s3://{bucket_name}/model-output",
    role=role,
    sagemaker_session=pipeline_session,
)

# Set hyperparameters
xgb_train.set_hyperparameters(
    objective="binary:logistic",
    num_round=100,
    max_depth=5,
    eta=0.2,
    gamma=4,
    min_child_weight=6,
    subsample=0.7,
)

# Define the training step
step_train = TrainingStep(
    name="EmployeeAttritionTrain",
    estimator=xgb_train,
    inputs={
        "train": TrainingInput(s3_data="s3://kennys-testing-bucket/input-data/train/train.csv", content_type="text/csv"),
    },
)


In [None]:
from sagemaker.model_metrics import MetricsSource, ModelMetrics
from sagemaker.workflow.model_step import ModelStep
from sagemaker.model import Model

# Define model metrics using the output of the evaluation step

# Define the model to be registered
model = Model(
    image_uri=xgb_train.image_uri,  # Use the same image URI as the training step
    model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
    sagemaker_session=pipeline_session,
    role=role,
)

# Register the model
register_args = model.register(
    content_types=["text/csv"],  # Content type of the input data
    response_types=["text/csv"],  # Content type of the output data
    inference_instances=["ml.t2.medium"],  # Supported inference instance types
    transform_instances=["ml.m4.xlarge"],  # Supported transform instance types
    model_package_group_name="EmployeeAttritionModelGroup23",  # Model package group name
    #approval_status=model_approval_status,  # Default approval status
   
)

# Create a ModelStep to register the model
step_register = ModelStep(
    name="EmployeeAttritionRegisterModel",
    step_args=register_args,
)


In [None]:
from sagemaker.workflow.pipeline import Pipeline

# Define the pipeline
pipeline = Pipeline(
    name="EmployeeAttritionPipeline-2",
    steps=[step_train, step_register],
)




In [None]:
import json


definition = json.loads(pipeline.definition())
definition

In [None]:

# Submit the pipeline definition to SageMaker
pipeline.upsert(role_arn=role)

In [None]:
# Start the pipeline execution
execution = pipeline.start()

# Wait for the pipeline execution to complete
execution.wait()


In [None]:
pipeline.delete()