# Create Hyperparameter Tuning Job

Create a hyperparameter tuning job via Python boto3

[https://docs.aws.amazon.com/sagemaker/latest/dg/xgboost-tuning.html](https://docs.aws.amazon.com/sagemaker/latest/dg/xgboost-tuning.html)

In [None]:
import boto3
import json
import time
from sagemaker import image_uris

# Initialize SageMaker client
sagemaker_client = boto3.client("sagemaker", region_name="us-east-1")  # Change region if needed

# Define the S3 bucket and dataset path

s3_bucket ="INSERT_BUCKET"

s3_input_train = f"s3://{s3_bucket}/tuning-job-dataset/train.csv"
s3_input_validation = f"s3://{s3_bucket}/tuning-job-dataset/validate.csv"

s3_output_location = f"s3://{s3_bucket}/tuningjoboutput/xgboost-tuning-output/"

# Define SageMaker execution role (replace with your actual ARN)
sagemaker_role = "INSERT_ROLE"

# Get the latest XGBoost container URI
xgboost_container = image_uris.retrieve(framework='xgboost',region='us-east-1', version='1.7-1')
print("XGBoost image uri: {}".format(xgboost_container))

In [None]:
# Define training job configuration
training_job_definition_name = "XGBoostTrainingJobDef"

# Review:
# https://docs.aws.amazon.com/sagemaker/latest/dg/autopilot-metrics-validation.html

# MetricName:
# See Training Job > Monitor > View Logs > Log Stream for container logs to validate metric output

training_job_definition = {
    "DefinitionName": training_job_definition_name,
    'TuningObjective': {
        'Type': 'Minimize',
        'MetricName': 'validation:rmse'
    },
    'HyperParameterRanges': {
        'IntegerParameterRanges': [
            {'Name': 'max_depth', 'MinValue': '3', 'MaxValue': '10'}
        ],
        "ContinuousParameterRanges": [
            { "Name": "eta","MaxValue" : "0.5","MinValue": "0","ScalingType": "Auto" }
        ],
    },
    "AlgorithmSpecification": {
        "TrainingImage": xgboost_container,
        "TrainingInputMode": "File",
    },
    "RoleArn": sagemaker_role,
    "OutputDataConfig": {
        "S3OutputPath": s3_output_location,
    },
    "ResourceConfig": {
        "InstanceType": "ml.m5.xlarge",
        "InstanceCount": 1,
        "VolumeSizeInGB": 10,
    },
    "StoppingCondition": {"MaxRuntimeInSeconds": 3600},
    "InputDataConfig": [
        {
            "ChannelName": "train",
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "S3Prefix",
                    "S3Uri": s3_input_train,
                    "S3DataDistributionType": "FullyReplicated",
                }
            },
            "ContentType": "csv",
            "CompressionType": "None",
        },
        {
            "ChannelName": "validation",
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "S3Prefix",
                    "S3Uri": s3_input_validation,
                    "S3DataDistributionType": "FullyReplicated",
                }
            },
            "ContentType": "csv",
            "CompressionType": "None",
        }
    ],
    "StaticHyperParameters": {
        "objective": "binary:logistic",
        "num_round": "100",
    },
}


In [None]:
tuning_job_name = f"xgboost-tuning-job-{int(time.time())}"

tuning_job_config = {
    "HyperParameterTuningJobName": tuning_job_name,
    "HyperParameterTuningJobConfig": {
        "Strategy": "Bayesian",
        "ResourceLimits": {
            "MaxNumberOfTrainingJobs": 25,
            "MaxParallelTrainingJobs": 5,
        },
    },
    "TrainingJobDefinitions": [training_job_definition],  # Wrap inside a list
}

In [None]:
# Start the hyperparameter tuning job
response = sagemaker_client.create_hyper_parameter_tuning_job(**tuning_job_config)

print("Hyperparameter tuning job started!")
print(f"Tuning Job Name: {tuning_job_name}")
print(json.dumps(response, indent=4))