In [4]:
import sagemaker
from sagemaker.session import Session
from sagemaker.inputs import TrainingInput
from sagemaker.workflow.steps import TrainingStep

In [5]:
def create_training_step():
    
    # configure hyperparams
    hyperparameters = {'objective':'binary:logistic', 'max_depth':5, 'eta':0.2, 'gamma':4, 'min_child_weight':6, 'subsample':0.7, 'num_round':50}
    
    # set an output path where the trained model will be saved
    output_path = 's3://ktzouvan-trading-point-sagemaker-poc/models'

    # get teh xgboos container for specific version
    xgboost_container = sagemaker.image_uris.retrieve("xgboost", 'eu-west-1', "1.7-1")

    # create the estimator
    estimator = sagemaker.estimator.Estimator(image_uri=xgboost_container, 
                                          hyperparameters=hyperparameters,
                                          role=sagemaker.get_execution_role(),
                                          instance_count=1, 
                                          instance_type='ml.m5.large', 
                                          volume_size=5, # 5 GB 
                                          output_path=output_path)

    # define the data type and paths to the training and validation datasets
    content_type = "csv"
    train_input = TrainingInput("s3://ktzouvan-trading-point-sagemaker-poc/datasets/marketing/train.csv", content_type=content_type)
    validation_input = TrainingInput("s3://ktzouvan-trading-point-sagemaker-poc/datasets/marketing/validate.csv", content_type=content_type)

    training_step = TrainingStep(
        name="training-step",
        estimator=estimator,
        inputs={
            "train": train_input,
            "validation": validation_input
        },
    )

    return training_step

In [6]:
#training_step = create_training_step()
#training_step