In [8]:
import boto3
import sagemaker
from sagemaker.sklearn.estimator import SKLearn
from sagemaker.tuner import HyperparameterTuner, IntegerParameter, ContinuousParameter

# SageMaker session and role
sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()

# S3 paths
source_dir = "s3://diabetes-directory/diabetes_training/train.tar.gz"
output_path = "s3://diabetes-directory/model_output/"

# SKLearn Estimator
sklearn_estimator = SKLearn(
    entry_point="train.py",
    source_dir=source_dir,
    role=role,
    instance_type="ml.m5.large",
    framework_version="0.23-1",
    py_version="py3",
    output_path=output_path,
    base_job_name="diabetes-xgb-training"
)

In [9]:
# Define hyperparameter ranges
hyperparameter_ranges = {
    "max_depth": IntegerParameter(3, 10),
    "eta": ContinuousParameter(0.01, 0.3),
    "gamma": ContinuousParameter(0, 5),
    "min_child_weight": IntegerParameter(1, 10),
    "subsample": ContinuousParameter(0.5, 1.0)
}

# Metric definition
objective_metric_name = "validation:rmse"
metric_definitions = [{"Name": "validation:rmse", "Regex": "validation:rmse=(.*)"}]

# Set up tuner
tuner = HyperparameterTuner(
    estimator=sklearn_estimator,
    objective_metric_name=objective_metric_name,
    metric_definitions=metric_definitions,
    objective_type="Minimize",
    hyperparameter_ranges=hyperparameter_ranges,
    max_jobs=10,
    max_parallel_jobs=2,
    base_tuning_job_name="xgb-diabetes-tune"
)

In [10]:
# Launch the tuning job
tuner.fit(wait=False)

In [11]:
tuner.latest_tuning_job.name  # Shows the tuning job name

'xgb-diabetes-tune-250602-1720'

In [5]:
import sagemaker
from sagemaker.sklearn.estimator import SKLearn
from sagemaker import get_execution_role

# --- Setup ---
bucket = sagemaker.Session().default_bucket()  # or replace with your S3 bucket name
prefix = "diabetes_training"
role = get_execution_role()  # works inside SageMaker

# --- Location of your code package ---
bucket = "diabetes-directory"  # or use your default bucket: session.default_bucket()
prefix = "diabetes_training"
s3_output_path = f"s3://{bucket}/{prefix}/output"

# --- Create estimator ---
sklearn_estimator = SKLearn(
    entry_point="train.py",
    source_dir="s3://diabetes-directory/diabetes_training/train.tar.gz",
    role=role,
    instance_type="ml.m5.large",
    framework_version="0.23-1",
    py_version="py3",
    output_path=s3_output_path,
    base_job_name="diabetes-xgb-training"
)

# --- Launch training job ---
sklearn_estimator.fit()


2025-06-02 16:51:30 Starting - Starting the training job...
2025-06-02 16:51:44 Starting - Preparing the instances for training...
2025-06-02 16:52:08 Downloading - Downloading input data...
2025-06-02 16:52:34 Downloading - Downloading the training image...
2025-06-02 16:53:14 Training - Training image download completed. Training in progress.[34m2025-06-02 16:53:19,064 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2025-06-02 16:53:19,067 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2025-06-02 16:53:19,106 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2025-06-02 16:53:19,418 sagemaker-training-toolkit INFO     Installing dependencies from requirements.txt:[0m
[34m/miniconda3/bin/python -m pip install -r requirements.txt[0m
[34mCollecting pandas==1.3.5
  Downloading pandas-1.3.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.3 MB)
 

In [None]:
from sagemaker.tuner import HyperparameterTuner, IntegerParameter, ContinuousParameter

hyperparameter_ranges = {
    "max_depth": IntegerParameter(3, 10),
    "eta": ContinuousParameter(0.01, 0.3),
    "gamma": ContinuousParameter(0, 5),
    "min_child_weight": IntegerParameter(1, 10),
    "subsample": ContinuousParameter(0.5, 1.0)
}

objective_metric_name = "validation:rmse"
objective_type = "Minimize"

tuner = HyperparameterTuner(
    estimator=sklearn_estimator,
    objective_metric_name=objective_metric_name,
    hyperparameter_ranges=hyperparameter_ranges,
    max_jobs=10,
    max_parallel_jobs=2,
    objective_type=objective_type,
    base_tuning_job_name="xgb-diabetes-tune"
)

tuner.fit()
